Uses custom tokenizer, nextcord, alpine, and more

This commit is contained in:
cyi1341 2023-11-03 22:09:32 +08:00
parent 94a7d75a8a
commit a82cbe2b01
5 changed files with 291 additions and 42 deletions

View File

@ -1,17 +1,18 @@
# Base image
FROM python:3.11-slim-buster
# Use Alpine 3.11 as base image
FROM python:3.11-alpine
# Create app directory
WORKDIR /app
# Install Python dependencies
# Install dependencies
COPY requirements.txt ./
RUN pip install --no-cache-dir -r requirements.txt
RUN pip install -r requirements.txt
# Set working directory
WORKDIR /app
# Bundle app source
COPY . .
# Expose ports
EXPOSE 8080
# Expose port
EXPOSE 8080
CMD python bot.py
# Run bot
CMD ["python", "bot.py"]

View File

@ -2,4 +2,5 @@
A Discord bot that tries to use free and unreliable APIs of various AI models to simulate a Discord user.
Tokenizer modified from: https://github.com/belladoreai/llama-tokenizer-js
To be determined.

81
bot.py
View File

@ -1,13 +1,15 @@
import os
import discord
import requests
import json
import yaml
import asyncio
from discord.ext import commands
import nextcord
from nextcord.ext import commands
import re
import threading
from transformers import LlamaTokenizer
from llama_tokenizer_lite import LlamaTokenizerLite
import logging
import traceback
from datetime import timedelta
import tempfile
import filetype
@ -23,14 +25,15 @@ def similar(a, b):
logging.basicConfig(level=logging.INFO, format='%(message)s')
tokenizer = LlamaTokenizer.from_pretrained('hf-internal-testing/llama-tokenizer')
Mancer_Key = os.getenv("MANCER_KEY")
Mancer_Model = "mytholite"
Mancer_URL = "https://neuro.mancer.tech/webui/"+Mancer_Model+"/api"
intents = discord.Intents.all()
bot = commands.Bot(command_prefix="i dont know if i should hate discord or discord.py sssaaa", intents=intents)
intents = nextcord.Intents.default()
intents.message_content = True
bot = nextcord.Client()
tokenizer = LlamaTokenizerLite()
oldest_message_timestamp = None
last_bot_message_id = None
@ -96,7 +99,7 @@ class MessageQueue():
message_queue = MessageQueue()
@bot.event
async def on_ready():
async def on_ready():
for guild in bot.guilds:
for channel in guild.text_channels:
if channel.permissions_for(guild.me).send_messages:
@ -120,15 +123,16 @@ async def on_ready():
last_bot_message_id = sent_message.id
await message_queue.add_message(sent_message)
break
await bot.tree.sync()
logging.info('Bot is ready')
@bot.listen('on_message')
@bot.event
async def on_message(message):
await message_queue.add_message(message)
logging.info('New message received')
@bot.listen('on_message')
@bot.event
async def process_images_and_generate_responses(message):
logging.info("process function initialised")
global oldest_message_timestamp
global last_bot_message_id
global image_cache
@ -143,15 +147,18 @@ async def process_images_and_generate_responses(message):
oldest_message_timestamp = message.created_at
messages.append(message)
else:
async for m in channel.history(after=oldest_message_timestamp):
async for m in channel.history(limit=200, after=oldest_message_timestamp):
messages.append(m)
logging.info(f"Number of messages it's reading: {len(messages)}")
try:
latest_message = messages[-1]
logging.info(f"latest message: {latest_message}")
latest_author_id = latest_message.author.id
except:
return
if latest_author_id == bot.user.id:
logging.info("latest author is bot")
return
while True:
@ -337,14 +344,14 @@ async def process_images_and_generate_responses(message):
for id in list(image_cache.keys()):
if id not in message_ids:
del image_cache[id]
old_messages_object = [msg async for msg in channel.history(after=discord.Object(id=last_bot_message_id)) if last_bot_message_id]
old_messages_object = [msg async for msg in channel.history(after=nextcord.Object(id=last_bot_message_id)) if last_bot_message_id]
old_messages = "\n".join([f"{m.author}: \n{m.content}" for m in old_messages_object]) + "\n"
if len(old_messages) == 0:
return
logging.info("4 before generating stuff")
if _ != 0:
await asyncio.sleep(0.3)
await asyncio.sleep(0.5)
async with channel.typing():
async with aiohttp.ClientSession() as session:
async with session.post(Mancer_URL + "/v1/generate", json=new_body, headers=headers) as response:
@ -370,7 +377,7 @@ async def process_images_and_generate_responses(message):
break
if not reply_punctuation:
# Check for new messages after the bot's last message
new_messages_object = [msg async for msg in channel.history(after=discord.Object(id=last_bot_message_id)) if last_bot_message_id]
new_messages_object = [msg async for msg in channel.history(after=nextcord.Object(id=last_bot_message_id)) if last_bot_message_id]
new_messages = "\n".join([f"{m.author}: \n{m.content}" for m in new_messages_object]) + "\n"
try:
latest_message = new_messages_object[-1]
@ -381,7 +388,7 @@ async def process_images_and_generate_responses(message):
new_messages = old_messages
# If new messages are found, regenerate the response
if old_messages == new_messages or not reply_punctuation:
if old_messages == new_messages or not reply_punctuation and not re.fullmatch(r'[!?]+', reply_content):
logging.info('old_messages == new_messages')
# Check if the response consists of max amount of max_new_tokens
if x_output_tokens == 150:
@ -467,7 +474,7 @@ async def process_images_and_generate_responses(message):
new_body["prompt"] = prompt
if reply_content.endswith(("!", "?")) and _ >= 2:
return
except discord.errors.HTTPException:
except nextcord.errors.HTTPException:
pass
else:
logging.info('old_messages != new_messages')
@ -476,13 +483,14 @@ async def process_images_and_generate_responses(message):
except:
pass
if not reply_punctuation:
old_messages_object = [msg async for msg in channel.history(after=discord.Object(id=last_bot_message_id)) if last_bot_message_id]
old_messages_object = [msg async for msg in channel.history(after=nextcord.Object(id=last_bot_message_id)) if last_bot_message_id]
old_messages = "\n".join([f"{m.author}: \n{m.content}" for m in old_messages_object]) + "\n"
except requests.exceptions.RequestException as error:
logging.info(error)
except Exception as error:
logging.info(str(error))
logging.info(str(traceback.format_exc()))
@bot.tree.command(name="ping", description="Check if I'm alive!")
async def ping(interaction:discord.Interaction):
@bot.slash_command(name="ping", description="Check if I'm alive!")
async def ping(interaction:nextcord.Interaction):
try:
latency = round(bot.latency * 1000)
@ -500,8 +508,8 @@ async def ping(interaction:discord.Interaction):
await interaction.response.send_message(response, ephemeral=True)
@bot.tree.command(name="showprompt", description="Check my current prompt in full!")
async def showprompt(interaction:discord.Interaction):
@bot.slash_command(name="showprompt", description="Check my current prompt in full!")
async def showprompt(interaction:nextcord.Interaction):
global prompt
if 'prompt' not in globals():
await interaction.response.send_message("Send a message to me first to see the prompt!", ephemeral=True)
@ -586,9 +594,24 @@ async def showprompt(interaction:discord.Interaction):
await interaction.response.send_message(message, ephemeral=True)
def main():
token = os.environ['DISCORD_BOT_TOKEN']
bot.run(token)
@bot.event
async def on_resume():
if __name__ == '__main__':
main()
# Re-register intents
intents = nextcord.Intents.default()
intents.message_content = True
bot.intents = intents
logging.info('Bot is resumed')
def main():
bot.loop.create_task(message_queue.process_messages())
# Run bot
token = os.environ['DISCORD_BOT_TOKEN']
bot.run(token)
if __name__ == "__main__":
main()

224
llama_tokenizer_lite.py Normal file

File diff suppressed because one or more lines are too long

View File

@ -1,5 +1,5 @@
discord.py
requests
PyYAML
nextcord
filetype
transformers[LlamaTokenizer]
sentencepiece # dependency of LlamaTokenizer
aiohttp