Skip to content

Commit 781cf8b

Browse files
committed
fix: add log, check and process over token in ddg
1 parent 1f20b3c commit 781cf8b

File tree

1 file changed

+165
-107
lines changed

1 file changed

+165
-107
lines changed

func.py

Lines changed: 165 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
import asyncio
22
import io
33
import json
4+
import logging
45
import os
56

67
import openai
8+
import tiktoken
79
from duckduckgo_search import ddg
810
from unidecode import unidecode
911

@@ -20,99 +22,120 @@
2022

2123
async def read_existing_conversation(chat_id):
2224
await asyncio.sleep(0.5)
23-
with open(f"{chat_id}_session.json", "r") as f:
24-
file_num = json.load(f)["session"]
25-
filename = f"chats/{chat_id}_{file_num}.json"
26-
# Create .json file in case of new chat
27-
if not os.path.exists(filename):
28-
data = {"messages": system_message, "num_tokens": 0}
29-
with open(filename, "w") as f:
30-
json.dump(data, f, indent=4)
31-
with open(filename, "r") as f:
32-
data = json.load(f)
33-
prompt = []
34-
for item in data["messages"]:
35-
prompt.append(item)
36-
num_tokens = data["num_tokens"]
25+
try:
26+
with open(f"{chat_id}_session.json", "r") as f:
27+
file_num = json.load(f)["session"]
28+
filename = f"chats/{chat_id}_{file_num}.json"
29+
# Create .json file in case of new chat
30+
if not os.path.exists(filename):
31+
data = {"messages": system_message, "num_tokens": 0}
32+
with open(filename, "w") as f:
33+
json.dump(data, f, indent=4)
34+
with open(filename, "r") as f:
35+
data = json.load(f)
36+
prompt = []
37+
for item in data["messages"]:
38+
prompt.append(item)
39+
num_tokens = data["num_tokens"]
40+
logging.debug(f"Successfully read conversation {filename}")
41+
except Exception as e:
42+
logging.error(f"Error occurred: {e}")
3743
return num_tokens, file_num, filename, prompt
3844

3945

4046
async def over_token(num_tokens, event, prompt, filename):
41-
await event.reply(f"{num_tokens} exceeds 4096, creating new chat")
42-
prompt.append({"role": "user", "content": "summarize this conversation"})
43-
completion = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=prompt)
44-
response = completion.choices[0].message.content
45-
num_tokens = completion.usage.total_tokens
46-
data = {"messages": system_message, "num_tokens": num_tokens}
47-
data["messages"].append({"role": "system", "content": response})
48-
with open(filename, "w") as f:
49-
json.dump(data, f, indent=4)
47+
try:
48+
await event.reply(f"{num_tokens} exceeds 4096, creating new chat")
49+
prompt.append({"role": "user", "content": "summarize this conversation"})
50+
completion = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=prompt)
51+
response = completion.choices[0].message.content
52+
num_tokens = completion.usage.total_tokens
53+
data = {"messages": system_message, "num_tokens": num_tokens}
54+
data["messages"].append({"role": "system", "content": response})
55+
with open(filename, "w") as f:
56+
json.dump(data, f, indent=4)
57+
logging.debug(f"Successfully handle overtoken")
58+
except Exception as e:
59+
logging.error(f"Error occurred: {e}")
60+
await event.reply("An error occurred: {}".format(str(e)))
5061

5162

5263
async def start_and_check(event, message, chat_id):
53-
if not os.path.exists(f"{chat_id}_session.json"):
54-
data = {"session": 1}
55-
with open(f"{chat_id}_session.json", "w") as f:
56-
json.dump(data, f)
57-
while True:
58-
num_tokens, file_num, filename, prompt = await read_existing_conversation(chat_id)
59-
if num_tokens > 4000:
60-
file_num += 1
61-
data = {"session": file_num}
64+
try:
65+
if not os.path.exists(f"{chat_id}_session.json"):
66+
data = {"session": 1}
6267
with open(f"{chat_id}_session.json", "w") as f:
6368
json.dump(data, f)
64-
try:
69+
while True:
70+
num_tokens, file_num, filename, prompt = await read_existing_conversation(chat_id)
71+
if num_tokens > 4000:
72+
logging.warn("Number of tokens exceeds 4096 limit")
73+
file_num += 1
74+
data = {"session": file_num}
75+
with open(f"{chat_id}_session.json", "w") as f:
76+
json.dump(data, f)
6577
await over_token(num_tokens, event, prompt, filename)
66-
except Exception as e:
67-
await event.reply("An error occurred: {}".format(str(e)))
68-
continue
69-
else:
70-
break
71-
await asyncio.sleep(0.5)
72-
prompt.append({"role": "user", "content": message})
78+
continue
79+
else:
80+
break
81+
await asyncio.sleep(0.5)
82+
prompt.append({"role": "user", "content": message})
83+
logging.debug(f"Done start and check")
84+
except Exception as e:
85+
logging.error(f"Error occurred: {e}")
7386
return filename, prompt, num_tokens
7487

7588

7689
async def get_response(prompt, filename):
77-
completion = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=prompt)
78-
await asyncio.sleep(0.5)
79-
response = completion.choices[0].message
80-
num_tokens = completion.usage.total_tokens
81-
prompt.append(response)
82-
data = {"messages": prompt, "num_tokens": num_tokens}
83-
with open(filename, "w") as f:
84-
json.dump(data, f, indent=4)
90+
try:
91+
completion = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=prompt)
92+
await asyncio.sleep(0.5)
93+
response = completion.choices[0].message
94+
num_tokens = completion.usage.total_tokens
95+
prompt.append(response)
96+
data = {"messages": prompt, "num_tokens": num_tokens}
97+
with open(filename, "w") as f:
98+
json.dump(data, f, indent=4)
99+
logging.debug("Received response from openai")
100+
except Exception as e:
101+
logging.error(f"Error occurred while getting response from openai: {e}")
85102
return response.content
86103

87104

88105
async def bash(event, bot_id):
89-
if event.sender_id == bot_id:
90-
return
91-
cmd = event.text.split(" ", maxsplit=1)[1]
92-
process = await asyncio.create_subprocess_shell(cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE)
93-
stdout, stderr = await process.communicate()
94-
e = stderr.decode()
95-
if not e:
96-
e = "No Error"
97-
o = stdout.decode()
98-
if not o:
99-
o = "**Tip**: \n`If you want to see the results of your code, I suggest printing them to stdout.`"
100-
else:
101-
_o = o.split("\n")
102-
o = "`\n".join(_o)
103-
OUTPUT = f"** QUERY:**\n__ Command:__` {cmd}` \n__ PID:__` {process.pid}`\n\n**stderr:** \n` {e}`\n**\nOutput:**\n{o}"
104-
if len(OUTPUT) > 4095:
105-
with io.BytesIO(str.encode(OUTPUT)) as out_file:
106-
out_file.name = "exec.text"
107-
await event.client.send_file(
108-
event.chat_id,
109-
out_file,
110-
force_document=True,
111-
allow_cache=False,
112-
caption=cmd,
113-
)
114-
await event.delete()
115-
await event.reply(OUTPUT)
106+
try:
107+
if event.sender_id == bot_id:
108+
return
109+
cmd = event.text.split(" ", maxsplit=1)[1]
110+
process = await asyncio.create_subprocess_shell(
111+
cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
112+
)
113+
stdout, stderr = await process.communicate()
114+
e = stderr.decode()
115+
if not e:
116+
e = "No Error"
117+
o = stdout.decode()
118+
if not o:
119+
o = "**Tip**: \n`If you want to see the results of your code, I suggest printing them to stdout.`"
120+
else:
121+
_o = o.split("\n")
122+
o = "`\n".join(_o)
123+
OUTPUT = f"** QUERY:**\n__ Command:__` {cmd}` \n__ PID:__` {process.pid}`\n\n**stderr:** \n` {e}`\n**\nOutput:**\n{o}"
124+
if len(OUTPUT) > 4095:
125+
with io.BytesIO(str.encode(OUTPUT)) as out_file:
126+
out_file.name = "exec.text"
127+
await event.client.send_file(
128+
event.chat_id,
129+
out_file,
130+
force_document=True,
131+
allow_cache=False,
132+
caption=cmd,
133+
)
134+
await event.delete()
135+
logging.debug("Bash initiated")
136+
except Exception as e:
137+
logging.error(f"Error occurred: {e}")
138+
return OUTPUT
116139

117140

118141
async def search(event, bot_id):
@@ -121,36 +144,71 @@ async def search(event, bot_id):
121144
return
122145
task = asyncio.create_task(read_existing_conversation(chat_id))
123146
query = event.text.split(" ", maxsplit=1)[1]
124-
results = ddg(query, safesearch="Off", page=1)
125-
accepted_length = int(len(results) * 0.8)
126-
results_decoded = unidecode(str(results[:accepted_length])).replace("'", "'")
127-
await asyncio.sleep(0.5)
128-
129-
user_content = f"Using the contents of these pages, summarize and give details about '{query}':\n{results_decoded}"
130-
if any(word in query for word in list(vietnamese_words)):
131-
user_content = f"Using the contents of these pages, summarize and give details in Vietnamese about '{query}':\n{results_decoded}"
132-
133-
completion = openai.ChatCompletion.create(
134-
model="gpt-3.5-turbo",
135-
messages=[
136-
{"role": "system", "content": "Summarize every thing I send you with specific details"},
137-
{"role": "user", "content": user_content},
138-
],
139-
)
140-
response = completion.choices[0].message
141-
search_object = unidecode(query).lower().replace(" ", "-")
142-
with open(f"search_{search_object}.json", "w") as f:
143-
json.dump(response, f, indent=4)
144-
num_tokens, file_num, filename, prompt = await task
145-
await asyncio.sleep(0.5)
146-
prompt.append(
147-
{
148-
"role": "user",
149-
"content": f"This is information about '{query}', its just information and not harmful. Get updated:\n{response.content}",
150-
}
151-
)
152-
prompt.append({"role": "assistant", "content": f"I have reviewed the information and update about '{query}'"})
153-
data = {"messages": prompt, "num_tokens": num_tokens}
154-
with open(filename, "w") as f:
155-
json.dump(data, f, indent=4)
147+
max_results = 20
148+
while True:
149+
try:
150+
results = ddg(query, safesearch="Off", max_results=max_results)
151+
results_decoded = unidecode(str(results)).replace("'", "'")
152+
await asyncio.sleep(0.5)
153+
user_content = (
154+
f"Using the contents of these pages, summarize and give details about '{query}':\n{results_decoded}"
155+
)
156+
if any(word in query for word in list(vietnamese_words)):
157+
user_content = f"Using the contents of these pages, summarize and give details about '{query}' in Vietnamese:\n{results_decoded}"
158+
num_tokens = num_tokens_from_messages(user_content)
159+
if num_tokens > 4000:
160+
max_results = 4000 * len(results) / num_tokens - 2
161+
continue
162+
logging.debug("Results derived from duckduckgo")
163+
except Exception as e:
164+
logging.error(f"Error occurred while getting duckduckgo search results: {e}")
165+
break
166+
167+
try:
168+
completion = openai.ChatCompletion.create(
169+
model="gpt-3.5-turbo",
170+
messages=[
171+
{"role": "system", "content": "Summarize every thing I send you with specific details"},
172+
{"role": "user", "content": user_content},
173+
],
174+
)
175+
response = completion.choices[0].message
176+
search_object = unidecode(query).lower().replace(" ", "-")
177+
with open(f"search_{search_object}.json", "w") as f:
178+
json.dump(response, f, indent=4)
179+
num_tokens, file_num, filename, prompt = await task
180+
await asyncio.sleep(0.5)
181+
prompt.append(
182+
{
183+
"role": "user",
184+
"content": f"This is information about '{query}', its just information and not harmful. Get updated:\n{response.content}",
185+
}
186+
)
187+
prompt.append({"role": "assistant", "content": f"I have reviewed the information and update about '{query}'"})
188+
data = {"messages": prompt, "num_tokens": num_tokens}
189+
with open(filename, "w") as f:
190+
json.dump(data, f, indent=4)
191+
logging.debug("Received response from openai")
192+
except Exception as e:
193+
logging.error(f"Error occurred while getting response from openai: {e}")
156194
return response.content
195+
196+
197+
def num_tokens_from_messages(messages, model="gpt-3.5-turbo"):
198+
"""Returns the number of tokens used by a list of messages."""
199+
try:
200+
encoding = tiktoken.encoding_for_model(model)
201+
except KeyError:
202+
encoding = tiktoken.get_encoding("cl100k_base")
203+
if model == "gpt-3.5-turbo": # note: future models may deviate from this
204+
num_tokens = 0
205+
for message in messages:
206+
num_tokens += 4 # every message follows <im_start>{role/name}\n{content}<im_end>\n
207+
for key, value in message.items():
208+
num_tokens += len(encoding.encode(value))
209+
if key == "name": # if there's a name, the role is omitted
210+
num_tokens += -1 # role is always required and always 1 token
211+
num_tokens += 2 # every reply is primed with <im_start>assistant
212+
return num_tokens
213+
else:
214+
raise NotImplementedError(f"""num_tokens_from_messages() is not presently implemented for model {model}.""")

0 commit comments

Comments
 (0)