From 727d717b602a39c853e59e86c935feb32bcdc082 Mon Sep 17 00:00:00 2001 From: diamante0018 Date: Mon, 14 Apr 2025 17:53:21 +0200 Subject: [PATCH] feat: basic image upload to AI model (still no context window) --- Dockerfile | 1 + bot/ai/handle_request.py | 32 ++++++++++++++++++++++++--- bot/events_handlers/message_events.py | 14 +++++++++++- 3 files changed, 43 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index fd832c0..b65846c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,6 +17,7 @@ COPY aw.py . COPY LICENSE . ENV BOT_TOKEN="" +ENV GOOGLE_API_KEY="" # Where the database will be stored ENV BOT_DATA_DIR="" diff --git a/bot/ai/handle_request.py b/bot/ai/handle_request.py index 504baf5..acdfe32 100644 --- a/bot/ai/handle_request.py +++ b/bot/ai/handle_request.py @@ -1,20 +1,46 @@ import os +import requests +from google.genai import types from google import genai API_KEY = os.getenv("GOOGLE_API_KEY") -async def forward_to_google_api(prompt): +async def forward_to_google_api(prompt, image_object=None): """ - Forwards the message content and optional image URL to a Google API. + Forwards the message content and optional image object to a Google API. Args: prompt (discord.Message): The message object to forward. + image_object (tuple, optional): A tuple containing the image URL and its MIME type (e.g., ("url", "image/jpeg")). """ + if not API_KEY: + await prompt.reply( + "Google API key is not set. Please contact the administrator.", + mention_author=True, + ) + return + client = genai.Client(api_key=API_KEY) + input = [prompt.content] + if image_object: + try: + image_url, mime_type = image_object + image = requests.get(image_url) + image.raise_for_status() + input.append(types.Part.from_bytes(data=image.content, mime_type=mime_type)) + except requests.RequestException: + await prompt.reply(f"Failed to fetch the image", mention_author=True) + return + response = client.models.generate_content( - model="gemini-2.0-flash", contents=prompt.content + model="gemini-2.0-flash", + contents=input, + config=types.GenerateContentConfig( + max_output_tokens=400, + system_instruction="You are a Discord chat bot named 'AlterWare' who helps users. You should limit your answers to be less than 2000 characters.", + ), ) await prompt.reply( diff --git a/bot/events_handlers/message_events.py b/bot/events_handlers/message_events.py index c9463d9..6fbc13b 100644 --- a/bot/events_handlers/message_events.py +++ b/bot/events_handlers/message_events.py @@ -317,7 +317,19 @@ async def handle_message(message, bot): staff_role = message.guild.get_role(STAFF_ROLE_ID) member = message.guild.get_member(message.author.id) if staff_role in member.roles: - await forward_to_google_api(message) + image_object = None + + for attachment in message.attachments: + if attachment.filename.lower().endswith( + ".jpg" + ) or attachment.filename.lower().endswith(".jpeg"): + image_object = (attachment.url, "image/jpeg") + break + elif attachment.filename.lower().endswith(".png"): + image_object = (attachment.url, "image/png") + break + + await forward_to_google_api(message, image_object) return # Too many mentions