failed_discord_bot/bot.py

581 lines
27 KiB
Python
Raw Normal View History

2023-10-29 13:48:48 +01:00
import os
import requests
import json
import yaml
import asyncio
import nextcord
from nextcord.ext import commands
import re
2023-10-29 13:48:48 +01:00
import threading
import logging
import traceback
2023-10-29 13:48:48 +01:00
from datetime import timedelta
import tempfile
import filetype
import aiohttp
import random
import unicodedata
import string
from html import escape
2023-10-29 13:52:46 +01:00
from difflib import SequenceMatcher
2024-01-12 14:27:27 +01:00
# self
from llama_tokenizer_lite import LlamaTokenizerLite
from run import failed_discord_bot
from get_messages import MessageProcessor
2023-10-29 13:48:48 +01:00
def similar(a, b):
return SequenceMatcher(None, a, b).ratio()
logging.basicConfig(level=logging.INFO, format='%(message)s')
Mancer_Key = os.getenv("MANCER_KEY")
Mancer_Model = "mytholite"
Mancer_URL = "https://neuro.mancer.tech/webui/"+Mancer_Model+"/api"
2024-01-12 14:27:27 +01:00
run = failed_discord_bot()
intents = run.get_intents()
bot = run.get_bot()
tokenizer = LlamaTokenizerLite()
2023-10-29 13:48:48 +01:00
oldest_message_timestamp = None
last_bot_message_id = None
image_cache = {}
current_task = None
# Create a lock
lock = asyncio.Lock()
def get_mancer_body(prompt, stop_string):
data = read_yaml_file(os.path.join(config_directory, 'config.yaml'))
mancer_settings_directory = os.path.join(config_directory, 'mancer_settings')
ensure_directory_exists(mancer_settings_directory)
mancer_settings = get_first_filename(data, 'mancer_settings')
mancer_settings = read_file_contents(mancer_settings_directory, mancer_settings)
data = json.loads(mancer_settings)
data['prompt'] = prompt
data['stopping_strings'] = stop_string
return data
def read_yaml_file(filename):
with open(filename, 'r') as file:
data = yaml.safe_load(file)
return data
def get_first_filename(data, field):
return data[field][0]
def read_file_contents(directory, filename):
with open(os.path.join(directory, filename), 'r') as file:
contents = file.read()
return contents
def ensure_directory_exists(directory):
if not os.path.exists(directory):
os.makedirs(directory)
config_directory = 'config'
class MessageQueue():
def __init__(self):
self.queue = asyncio.Queue()
async def add_message(self, message):
await self.queue.put(message)
async def process_messages(self):
while True:
message = await self.queue.get()
try:
await process_images_and_generate_responses(message)
except Exception as e:
logging.exception("Failed to process message", exc_info=e)
finally:
self.queue.task_done()
message_queue = MessageQueue()
@bot.event
async def on_ready():
2023-10-29 13:48:48 +01:00
for guild in bot.guilds:
for channel in guild.text_channels:
if channel.permissions_for(guild.me).send_messages:
data = read_yaml_file(os.path.join(config_directory, 'config.yaml'))
boot_prompt_directory = os.path.join(config_directory, 'boot_prompt')
initial_prompt_directory = os.path.join(config_directory, 'initial_prompt')
ensure_directory_exists(boot_prompt_directory)
ensure_directory_exists(initial_prompt_directory)
boot_prompt = get_first_filename(data, 'boot_prompt')
initial_prompt = get_first_filename(data, 'initial_prompt')
boot_prompt = read_file_contents(boot_prompt_directory, boot_prompt)
initial_prompt = read_file_contents(initial_prompt_directory, initial_prompt)
await channel.send(boot_prompt)
sent_message = await channel.send(initial_prompt)
global oldest_message_timestamp, last_bot_message_id
oldest_message_timestamp = sent_message.created_at - timedelta(microseconds=1)
last_bot_message_id = sent_message.id
await message_queue.add_message(sent_message)
break
logging.info('Bot is ready')
@bot.event
2023-10-29 13:48:48 +01:00
async def on_message(message):
await message_queue.add_message(message)
logging.info('New message received')
2023-10-29 13:48:48 +01:00
@bot.event
2023-10-29 13:48:48 +01:00
async def process_images_and_generate_responses(message):
logging.info("process function initialised")
2023-10-29 13:48:48 +01:00
global oldest_message_timestamp
global last_bot_message_id
global image_cache
reply_punctuation = False
try:
for _ in range(7): # Limit to 7 attempts
2024-01-12 14:27:27 +01:00
logging.info(f"Number of _: {_}")
2023-10-29 13:48:48 +01:00
if not reply_punctuation:
channel = message.channel
messages = []
if oldest_message_timestamp is None:
oldest_message_timestamp = message.created_at
messages.append(message)
else:
async for m in channel.history(limit=200, after=oldest_message_timestamp):
2023-10-29 13:48:48 +01:00
messages.append(m)
logging.info(f"Number of messages it's reading: {len(messages)}")
2023-10-29 13:48:48 +01:00
try:
latest_message = messages[-1]
logging.info(f"latest message: {latest_message}")
2023-10-29 13:48:48 +01:00
latest_author_id = latest_message.author.id
except:
return
if latest_author_id == bot.user.id:
logging.info("latest author is bot")
2023-10-29 13:48:48 +01:00
return
while True:
2024-01-12 14:27:27 +01:00
# Process the messages
global discord_msgs, formatted_users, user_names
2023-10-29 13:48:48 +01:00
discord_msgs = []
2024-01-12 14:27:27 +01:00
if 'prompt' not in globals():
message_processor = MessageProcessor(bot=bot)
else:
message_processor = MessageProcessor(
bot=bot,
formatted_users=formatted_users,
image_cache=image_cache,
discord_msgs=discord_msgs,
user_names=user_names
)
logging.info("0 image stuff")
await message_processor.process_messages(messages)
# Access the processed messages and image cache
discord_msgs = message_processor.get_discord_msgs()
image_cache = message_processor.get_image_cache()
formatted_users = message_processor.get_formatted_users()
user_names = message_processor.get_user_names()
2023-10-29 13:48:48 +01:00
global system_prompt
global input_segment
global output_segment
global end_loop_prompt
global prompt
config_directory = 'config'
ensure_directory_exists(config_directory)
data = read_yaml_file(os.path.join(config_directory, 'config.yaml'))
system_prompt_directory = os.path.join(config_directory, 'system_prompt')
input_segment_directory = os.path.join(config_directory, 'input_segment')
output_segment_directory = os.path.join(config_directory, 'output_segment')
2024-01-12 14:27:27 +01:00
input_sequence_directory = os.path.join(config_directory, 'input_sequence')
output_sequence_directory = os.path.join(config_directory, 'output_sequence')
2023-10-29 13:48:48 +01:00
end_loop_prompt_directory = os.path.join(config_directory, 'end_loop_prompt')
ensure_directory_exists(system_prompt_directory)
ensure_directory_exists(input_segment_directory)
ensure_directory_exists(output_segment_directory)
2024-01-12 14:27:27 +01:00
ensure_directory_exists(input_sequence_directory)
ensure_directory_exists(output_sequence_directory)
2023-10-29 13:48:48 +01:00
ensure_directory_exists(end_loop_prompt_directory)
system_prompt = get_first_filename(data, 'system_prompt')
input_segment = get_first_filename(data, 'input_segment')
output_segment = get_first_filename(data, 'output_segment')
2024-01-12 14:27:27 +01:00
input_sequence = get_first_filename(data, 'input_sequence')
output_sequence = get_first_filename(data, 'output_sequence')
2023-10-29 13:48:48 +01:00
end_loop_prompt = get_first_filename(data, 'end_loop_prompt')
system_prompt = read_file_contents(system_prompt_directory, system_prompt)
input_segment = read_file_contents(input_segment_directory, input_segment)
output_segment = read_file_contents(output_segment_directory, output_segment)
2024-01-12 14:27:27 +01:00
input_sequence = read_file_contents(input_sequence_directory, input_sequence)
output_sequence = read_file_contents(output_sequence_directory, output_sequence)
2023-10-29 13:48:48 +01:00
end_loop_prompt = read_file_contents(end_loop_prompt_directory, end_loop_prompt)
end_loop_list = end_loop_prompt.split("\n")
end_loop_list = [item for item in end_loop_list if item != '']
end_loop_prompt = random.choice(end_loop_list)
logging.info("2 prompt stuff")
prev_author = None
i = 0
while i < len(discord_msgs):
m = discord_msgs[i]
2024-01-12 14:27:27 +01:00
logging.info(f"discord_msgs: {discord_msgs}")
2023-10-29 13:48:48 +01:00
try:
last_char = m['content'][-1]
except:
last_char = '.'
if m['author'] == prev_author:
if unicodedata.category(m['content'][-1])[0] not in ('L', 'N'):
discord_msgs[i-1]['content'] += f" {m['content']}"
else:
# Lowercase first letter before concatenating
lowercase_content = m['content'][0].lower() + m['content'][1:]
discord_msgs[i-1]['content'] += f", {lowercase_content}"
del discord_msgs[i]
else:
prev_author = m['author']
i += 1
2024-01-12 14:27:27 +01:00
message_prompt = ""
append_input_seq = True
for m in discord_msgs:
if m['author'] == bot.user.name:
if not append_input_seq:
append_input_seq = True
message_prompt += output_sequence
else:
if append_input_seq:
message_prompt += input_sequence
append_input_seq = False
message_prompt += f"{m['author']}: {m['content']}\n"
2023-10-29 13:48:48 +01:00
last_bot_index = None
for i, line in enumerate(message_prompt.split('\n')):
if line.startswith(bot.user.name + ":"):
last_bot_index = i
logging.info(f"last bot index: {last_bot_index}")
prompt = (f"{system_prompt}{input_segment}{message_prompt}{output_segment}")
prompt = prompt.replace('{{users}}', formatted_users)
prompt = prompt.replace('{{bot}}', bot.user.name)
prompt = prompt.replace('{{endloop}}', end_loop_prompt)
# Removing the newline character
prompt = prompt.rstrip("\n")
logging.info("3 more prompt stuff")
stop_string = [f"\n{name}" for name in set(user_names)]
stop_string += [f"{bot.user.name}:"]
2024-01-12 14:27:27 +01:00
stop_string += ["</s>", "<|", "\n#", "\n*", "\n\n\n", "<START>", ",", "?", "!", ";", "."]
2023-10-29 13:48:48 +01:00
headers = {
"X-API-KEY": Mancer_Key,
"Content-Type": "application/json",
}
new_body = get_mancer_body(prompt, stop_string)
token_count = await asyncio.to_thread(len, tokenizer.encode(prompt))
logging.info(f"token count (local): {token_count}")
if token_count <= 2410:
break
else:
2024-01-12 14:27:27 +01:00
logging.info(f"The too many messages: {messages}")
2023-10-29 13:48:48 +01:00
if len(messages) != 0:
messages.pop(0)
oldest_message_timestamp = messages[0].created_at - timedelta(microseconds=1)
old_messages_object = [msg async for msg in channel.history(after=nextcord.Object(id=last_bot_message_id)) if last_bot_message_id]
2023-10-29 13:48:48 +01:00
old_messages = "\n".join([f"{m.author}: \n{m.content}" for m in old_messages_object]) + "\n"
if len(old_messages) == 0:
return
logging.info("4 before generating stuff")
if _ != 0:
await asyncio.sleep(0.5)
2023-10-29 13:48:48 +01:00
async with channel.typing():
async with aiohttp.ClientSession() as session:
async with session.post(Mancer_URL + "/v1/generate", json=new_body, headers=headers) as response:
logging.info(f"Request Body: {json.dumps(new_body, indent=2)}")
api_response = await response.json()
x_input_tokens = api_response.get('x-input-tokens')
x_output_tokens = api_response.get('x-output-tokens')
logging.info(f"Response: {json.dumps(api_response, indent=2)}")
reply_content = api_response.get('results')[0].get('text')
reply_content = str(reply_content)
# Remove stop_string if it's at the end of reply_content
stop_string_python_sucks = [s if s.endswith('\n') else s+'\n' for s in stop_string]
stop_string_python_sucks.extend(stop_string)
if reply_content:
if reply_content.endswith((",", "!", "?", ";")):
reply_punctuation = True
else:
reply_punctuation = False
for stop_string in stop_string_python_sucks:
if reply_content.endswith(stop_string):
reply_content = reply_content[:-len(stop_string)]
break
if not reply_punctuation:
# Check for new messages after the bot's last message
new_messages_object = [msg async for msg in channel.history(after=nextcord.Object(id=last_bot_message_id)) if last_bot_message_id]
2023-10-29 13:48:48 +01:00
new_messages = "\n".join([f"{m.author}: \n{m.content}" for m in new_messages_object]) + "\n"
try:
latest_message = new_messages_object[-1]
latest_author_id = latest_message.author.id
except:
return
else:
new_messages = old_messages
# If new messages are found, regenerate the response
if old_messages == new_messages or not reply_punctuation and not re.fullmatch(r'[!?]+', reply_content):
2023-10-29 13:48:48 +01:00
logging.info('old_messages == new_messages')
# Check if the response consists of max amount of max_new_tokens
if x_output_tokens == 150:
logging.info("Generated message contained too many tokens, regenerating...")
else:
bot_messages = [msg['content'] for msg in discord_msgs if msg['is_bot']]
try:
last_punctuation = reply_content[-1]
if reply_punctuation and last_punctuation == ',':
reply_content = reply_content[:-1]
sent_message = await channel.send(reply_content) # Store the sent message
if old_messages != new_messages and not reply_punctuation:
break
if not reply_punctuation:
last_bot_message_id = sent_message.id # Update the ID of the last message sent by the bot
else:
reply_content = reply_content + last_punctuation
prompt = prompt + reply_content
while True:
new_body = get_mancer_body(prompt, stop_string)
token_count = await asyncio.to_thread(len, tokenizer.encode(prompt))
logging.info(f"token count (local): {token_count}")
if token_count <= 2410:
break
else:
discord_msgs = []
if len(messages) != 0:
messages.pop(0)
logging.info(f"messages variable: {messages}")
prev_author = None
i = 0
# messages initialise
for m in messages:
if m.attachments:
msg_content = m.content
2024-01-12 14:27:27 +01:00
logging.info(f"please tell me there is any image cache in here {image_cache}")
2023-10-29 13:48:48 +01:00
if m.id in image_cache:
for image in image_cache[m.id]:
if msg_content:
msg_content += f' *sends an image of {image["output"]}*'
else:
msg_content += f'*sends an image of {image["output"]}*'
else:
2024-01-12 14:27:27 +01:00
logging.info(f"no image for {m.content}")
2023-10-29 13:48:48 +01:00
msg_content = m.content
discord_msgs.append({
"content": msg_content,
"author": m.author.name,
"is_bot": m.author.bot
})
# discord_msgs initialise
while i < len(discord_msgs):
m = discord_msgs[i]
try:
last_char = m['content'][-1]
except:
last_char = '.'
if m['author'] == prev_author:
if unicodedata.category(m['content'][-1])[0] not in ('L', 'N'):
discord_msgs[i-1]['content'] += f" {m['content']}"
else:
# Lowercase first letter before concatenating
lowercase_content = m['content'][0].lower() + m['content'][1:]
discord_msgs[i-1]['content'] += f", {lowercase_content}"
del discord_msgs[i]
else:
prev_author = m['author']
i += 1
message_prompt = "\n".join([f"{m['author']}: {m['content']}" for m in discord_msgs]) + '\n'
last_bot_index = None
for i, line in enumerate(message_prompt.split('\n')):
if line.startswith(bot.user.name + ":"):
last_bot_index = i
2024-01-12 14:27:27 +01:00
logging.info(f"message_prompt: {message_prompt}")
logging.info(f"bot.user.name: {bot.user.name}")
2023-10-29 13:48:48 +01:00
logging.info(f"last bot index: {last_bot_index}")
prompt = (f"{system_prompt}{input_segment}{message_prompt}{output_segment}")
prompt = prompt.replace('{{users}}', formatted_users)
prompt = prompt.replace('{{bot}}', bot.user.name)
prompt = prompt.replace('{{endloop}}', end_loop_prompt)
# Removing the newline character
prompt = prompt.rstrip("\n")
prompt = prompt + reply_content
new_body["prompt"] = prompt
if reply_content.endswith(("!", "?")) and _ >= 2:
return
except nextcord.errors.HTTPException:
2023-10-29 13:48:48 +01:00
pass
else:
logging.info('old_messages != new_messages')
try:
last_bot_message_id = sent_message.id
except:
pass
if not reply_punctuation:
old_messages_object = [msg async for msg in channel.history(after=nextcord.Object(id=last_bot_message_id)) if last_bot_message_id]
2023-10-29 13:48:48 +01:00
old_messages = "\n".join([f"{m.author}: \n{m.content}" for m in old_messages_object]) + "\n"
except Exception as error:
logging.info(str(error))
logging.info(str(traceback.format_exc()))
2023-10-29 13:48:48 +01:00
@bot.slash_command(name="ping", description="Check if I'm alive!")
async def ping(interaction:nextcord.Interaction):
2023-10-29 13:48:48 +01:00
try:
latency = round(bot.latency * 1000)
except Exception as e:
latency = "Error getting bot latency: " + str(e)
try:
r = requests.get(Mancer_URL)
mancer_ping = round(r.elapsed.total_seconds() * 1000)
except Exception as e:
mancer_ping = "Error pinging LLM API: " + str(e)
response = f"Bot Latency: {latency}ms\nLLM Latency: {mancer_ping}ms"
await interaction.response.send_message(response, ephemeral=True)
@bot.slash_command(name="showprompt", description="Check my current prompt in full!")
async def showprompt(interaction:nextcord.Interaction):
2023-10-29 13:48:48 +01:00
global prompt
if 'prompt' not in globals():
await interaction.response.send_message("Send a message to me first to see the prompt!", ephemeral=True)
return
html = f"<!DOCTYPE html><html><head><style>"
# CSS for dark mode
html += """
body {
background: white;
color: black;
}
@media (prefers-color-scheme: dark) {
body {
background: black;
color: white;
}
}
"""
# CSS for formatting
html += """
body {
font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
}
p {
font-size: 18px;
line-height: 1.3;
}
"""
# CSS for line numbers
html += """
p {
counter-increment: line;
padding-left: 3em;
border-left: 2px solid transparent;
position: relative;
}
p:before {
content: counter(line);
position: absolute;
left: 0;
}
"""
html += "</style></head><body>"
# Add prompt with line number spans
for i, line in enumerate(prompt.split("\n")):
if not line.strip(): # If line is empty
line = "&nbsp;"
else:
line = escape(line)
html += f"<p>{line}</p>"
html += "</body></html>"
# Generate a random filename
filename = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(10)) + '.html'
with open(filename, 'w+') as f:
f.write(html)
async with aiohttp.ClientSession() as session:
async with session.post("https://0x0.st",
data={"file": open(filename, 'rb'),
"expires": "1",
"secret": ""
}) as resp:
url = await resp.text()
os.remove(filename) # Delete the file after uploading
# Wrap url in markdown with text "View Prompt Here!"
markdown_url = f"[View Prompt Here!]({url.strip()})"
# Add line about link expiration
message = f"{markdown_url}\n\n`Link expires after an hour`"
await interaction.response.send_message(message, ephemeral=True)
@bot.event
async def on_resume():
# Re-register intents
intents = nextcord.Intents.default()
intents.message_content = True
bot.intents = intents
logging.info('Bot is resumed')
2023-10-29 13:48:48 +01:00
def main():
bot.loop.create_task(message_queue.process_messages())
# Run bot
token = os.environ['DISCORD_BOT_TOKEN']
bot.run(token)
if __name__ == "__main__":
main()