Uses custom tokenizer, nextcord, alpine, and more

2023-11-03 22:09:32 +08:00 · 2023-11-03 22:09:32 +08:00 · a82cbe2b01
parent 94a7d75a8a
commit a82cbe2b01
5 changed files with 291 additions and 42 deletions
--- a/21
+++ b/21
@ -1,17 +1,18 @@
-# Base image
-FROM python:3.11-slim-buster
+# Use Alpine 3.11 as base image
+FROM python:3.11-alpine

-# Create app directory
-WORKDIR /app
-
-# Install Python dependencies
+# Install dependencies
 COPY requirements.txt ./
-RUN pip install --no-cache-dir -r requirements.txt
+RUN pip install -r requirements.txt
+
+# Set working directory 
+WORKDIR /app

 # Bundle app source
 COPY . .

-# Expose ports
-EXPOSE 8080
+# Expose port
+EXPOSE 8080 

-CMD python bot.py
+# Run bot
+CMD ["python", "bot.py"]
--- a/README.md
+++ b/README.md
@ -2,4 +2,5 @@

 A Discord bot that tries to use free and unreliable APIs of various AI models to simulate a Discord user.

+Tokenizer modified from: https://github.com/belladoreai/llama-tokenizer-js
 To be determined.
--- a/bot.py
+++ b/bot.py
@ -1,13 +1,15 @@
 import os
-import discord
 import requests
 import json
 import yaml
 import asyncio
-from discord.ext import commands
+import nextcord
+from nextcord.ext import commands
+import re
 import threading
-from transformers import LlamaTokenizer
+from llama_tokenizer_lite import LlamaTokenizerLite
 import logging
+import traceback
 from datetime import timedelta
 import tempfile
 import filetype
@ -23,14 +25,15 @@ def similar(a, b):

 logging.basicConfig(level=logging.INFO, format='%(message)s')

-tokenizer = LlamaTokenizer.from_pretrained('hf-internal-testing/llama-tokenizer') 
-
 Mancer_Key = os.getenv("MANCER_KEY")
 Mancer_Model = "mytholite"
 Mancer_URL = "https://neuro.mancer.tech/webui/"+Mancer_Model+"/api"

-intents = discord.Intents.all()
-bot = commands.Bot(command_prefix="i dont know if i should hate discord or discord.py sssaaa", intents=intents)
+intents = nextcord.Intents.default()
+intents.message_content = True
+bot = nextcord.Client()
+
+tokenizer = LlamaTokenizerLite()

 oldest_message_timestamp = None
 last_bot_message_id = None
@ -96,7 +99,7 @@ class MessageQueue():
 message_queue = MessageQueue()

@bot.event
-async def on_ready():
+async def on_ready():    
    for guild in bot.guilds:
        for channel in guild.text_channels:
            if channel.permissions_for(guild.me).send_messages:
@ -120,15 +123,16 @@ async def on_ready():
                last_bot_message_id = sent_message.id
                await message_queue.add_message(sent_message)
                break
-    await bot.tree.sync()
    logging.info('Bot is ready')

-@bot.listen('on_message')
+@bot.event
 async def on_message(message):
    await message_queue.add_message(message)
+    logging.info('New message received')

-@bot.listen('on_message')
+@bot.event
 async def process_images_and_generate_responses(message):
+    logging.info("process function initialised")
    global oldest_message_timestamp
    global last_bot_message_id
    global image_cache
@ -143,15 +147,18 @@ async def process_images_and_generate_responses(message):
                    oldest_message_timestamp = message.created_at
                    messages.append(message)
                else:
-                    async for m in channel.history(after=oldest_message_timestamp):
+                    async for m in channel.history(limit=200, after=oldest_message_timestamp):
                        messages.append(m)
+                logging.info(f"Number of messages it's reading: {len(messages)}")
                try:
                    latest_message = messages[-1] 
+                    logging.info(f"latest message: {latest_message}")
                    latest_author_id = latest_message.author.id
                except:
                    return 

                if latest_author_id == bot.user.id:
+                    logging.info("latest author is bot")
                    return

                while True:
@ -337,14 +344,14 @@ async def process_images_and_generate_responses(message):
                for id in list(image_cache.keys()):
                    if id not in message_ids:
                        del image_cache[id]
-                old_messages_object = [msg async for msg in channel.history(after=discord.Object(id=last_bot_message_id)) if last_bot_message_id]
+                old_messages_object = [msg async for msg in channel.history(after=nextcord.Object(id=last_bot_message_id)) if last_bot_message_id]
                old_messages = "\n".join([f"{m.author}: \n{m.content}" for m in old_messages_object]) + "\n"
                if len(old_messages) == 0:
                    return

            logging.info("4 before generating stuff")
            if _ != 0:
-                await asyncio.sleep(0.3)
+                await asyncio.sleep(0.5)
            async with channel.typing():
                async with aiohttp.ClientSession() as session:
                    async with session.post(Mancer_URL + "/v1/generate", json=new_body, headers=headers) as response:
@ -370,7 +377,7 @@ async def process_images_and_generate_responses(message):
                                        break
                            if not reply_punctuation:
                                # Check for new messages after the bot's last message
-                                new_messages_object = [msg async for msg in channel.history(after=discord.Object(id=last_bot_message_id)) if last_bot_message_id]
+                                new_messages_object = [msg async for msg in channel.history(after=nextcord.Object(id=last_bot_message_id)) if last_bot_message_id]
                                new_messages = "\n".join([f"{m.author}: \n{m.content}" for m in new_messages_object]) + "\n"
                                try:
                                    latest_message = new_messages_object[-1]
@ -381,7 +388,7 @@ async def process_images_and_generate_responses(message):
                                new_messages = old_messages

                            # If new messages are found, regenerate the response
-                            if old_messages == new_messages or not reply_punctuation:
+                            if old_messages == new_messages or not reply_punctuation and not re.fullmatch(r'[!?]+', reply_content):
                                logging.info('old_messages == new_messages')
                                # Check if the response consists of max amount of max_new_tokens
                                if x_output_tokens == 150:
@ -467,7 +474,7 @@ async def process_images_and_generate_responses(message):
                                            new_body["prompt"] = prompt
                                            if reply_content.endswith(("!", "?")) and _ >= 2:
                                                return
-                                    except discord.errors.HTTPException:
+                                    except nextcord.errors.HTTPException:
                                        pass
                            else:
                                logging.info('old_messages != new_messages')
@ -476,13 +483,14 @@ async def process_images_and_generate_responses(message):
                                except:
                                    pass
                            if not reply_punctuation:
-                                old_messages_object = [msg async for msg in channel.history(after=discord.Object(id=last_bot_message_id)) if last_bot_message_id]
+                                old_messages_object = [msg async for msg in channel.history(after=nextcord.Object(id=last_bot_message_id)) if last_bot_message_id]
                                old_messages = "\n".join([f"{m.author}: \n{m.content}" for m in old_messages_object]) + "\n"
-    except requests.exceptions.RequestException as error:
-        logging.info(error)
+    except Exception as error:
+        logging.info(str(error))
+        logging.info(str(traceback.format_exc()))

-@bot.tree.command(name="ping", description="Check if I'm alive!")
-async def ping(interaction:discord.Interaction):
+@bot.slash_command(name="ping", description="Check if I'm alive!")
+async def ping(interaction:nextcord.Interaction):

    try:
        latency = round(bot.latency * 1000) 
@ -500,8 +508,8 @@ async def ping(interaction:discord.Interaction):
    await interaction.response.send_message(response, ephemeral=True)


-@bot.tree.command(name="showprompt", description="Check my current prompt in full!")
-async def showprompt(interaction:discord.Interaction):
+@bot.slash_command(name="showprompt", description="Check my current prompt in full!")
+async def showprompt(interaction:nextcord.Interaction):
    global prompt 
    if 'prompt' not in globals():
        await interaction.response.send_message("Send a message to me first to see the prompt!", ephemeral=True)
@ -586,9 +594,24 @@ async def showprompt(interaction:discord.Interaction):

    await interaction.response.send_message(message, ephemeral=True)

-def main():
-    token = os.environ['DISCORD_BOT_TOKEN']
-    bot.run(token)
+@bot.event 
+async def on_resume():

-if __name__ == '__main__':
-    main()
+    # Re-register intents
+    intents = nextcord.Intents.default()
+    intents.message_content = True
+    bot.intents = intents
+
+    logging.info('Bot is resumed')
+
+
+def main():
+
+  bot.loop.create_task(message_queue.process_messages())
+
+  # Run bot 
+  token = os.environ['DISCORD_BOT_TOKEN']
+  bot.run(token)
+
+if __name__ == "__main__":
+  main()
--- a/llama_tokenizer_lite.py
+++ b/llama_tokenizer_lite.py
--- a/requirements.txt
+++ b/requirements.txt
@ -1,5 +1,5 @@
-discord.py
 requests
+PyYAML
+nextcord
 filetype
-transformers[LlamaTokenizer]
-sentencepiece # dependency of LlamaTokenizer
+aiohttp