mirror of
https://github.com/alterware/aw-bot.git
synced 2025-10-26 14:15:54 +00:00
135 lines
4.5 KiB
Python
135 lines
4.5 KiB
Python
import os
|
|
import aiohttp
|
|
import asyncio
|
|
from bs4 import BeautifulSoup
|
|
|
|
from bot.log import logger
|
|
|
|
DISCOURSE_BASE_URL = os.getenv("DISCOURSE_BASE_URL")
|
|
API_KEY = os.getenv("DISCOURSE_API_KEY")
|
|
API_USERNAME = os.getenv("DISCOURSE_API_USERNAME")
|
|
|
|
headers = {"Api-Key": API_KEY, "Api-Username": API_USERNAME}
|
|
|
|
|
|
async def get_topics_by_id(topic_id):
|
|
"""
|
|
Async: Fetches a topic by its ID and returns the topic data.
|
|
|
|
Args:
|
|
topic_id (int): The ID of the topic to fetch.
|
|
|
|
Returns:
|
|
dict or None: The topic data if successful, otherwise None.
|
|
"""
|
|
url = f"{DISCOURSE_BASE_URL}/t/{topic_id}.json"
|
|
timeout = aiohttp.ClientTimeout(total=5)
|
|
try:
|
|
async with aiohttp.ClientSession() as session:
|
|
async with session.get(url, headers=headers, timeout=timeout) as response:
|
|
if response.status == 200:
|
|
return await response.json()
|
|
elif response.status == 403:
|
|
logger.error(
|
|
f"Access forbidden for topic {topic_id}: {response.status}"
|
|
)
|
|
return None
|
|
else:
|
|
text = await response.text()
|
|
logger.error(
|
|
f"Error fetching topic {topic_id}: {response.status} - {text}"
|
|
)
|
|
return None
|
|
except asyncio.TimeoutError:
|
|
logger.error(f"Timeout while fetching topic {topic_id}")
|
|
return None
|
|
except aiohttp.ClientError as e:
|
|
logger.error(f"Request failed for topic {topic_id}: {e}")
|
|
return None
|
|
|
|
|
|
async def get_topics_by_tag(tag_name):
|
|
"""
|
|
Async: Fetches all topics with a specific tag and retrieves the cooked string from each post.
|
|
|
|
Args:
|
|
tag_name (str): The name of the tag to filter topics.
|
|
|
|
Returns:
|
|
list: A list of cooked strings from all posts in the topics.
|
|
"""
|
|
url = f"{DISCOURSE_BASE_URL}/tag/{tag_name}.json"
|
|
timeout = aiohttp.ClientTimeout(total=5)
|
|
try:
|
|
async with aiohttp.ClientSession() as session:
|
|
async with session.get(url, headers=headers, timeout=timeout) as response:
|
|
if response.status == 200:
|
|
data = await response.json()
|
|
topics = data.get("topic_list", {}).get("topics", [])
|
|
cooked_strings = []
|
|
for topic in topics:
|
|
topic_id = topic["id"]
|
|
topic_data = await get_topics_by_id(topic_id)
|
|
if topic_data:
|
|
posts = topic_data.get("post_stream", {}).get("posts", [])
|
|
for post in posts:
|
|
cooked_strings.append(post.get("cooked", ""))
|
|
return cooked_strings
|
|
elif response.status == 403:
|
|
logger.error(
|
|
f"Access forbidden for tag '{tag_name}': {response.status}"
|
|
)
|
|
return None
|
|
else:
|
|
text = await response.text()
|
|
logger.error(
|
|
f"Error fetching topics with tag '{tag_name}': {response.status} - {text}"
|
|
)
|
|
return []
|
|
except asyncio.TimeoutError:
|
|
logger.error(f"Timeout while fetching topics with tag '{tag_name}'")
|
|
return []
|
|
except aiohttp.ClientError as e:
|
|
logger.error(f"Request failed for topics with tag {tag_name}: {e}")
|
|
return []
|
|
|
|
|
|
async def fetch_cooked_posts(tag_name):
|
|
"""
|
|
Async: Fetches cooked strings from posts with a specific tag.
|
|
|
|
Args:
|
|
tag_name (str): The name of the tag to filter topics.
|
|
|
|
Returns:
|
|
list: A list of cooked strings from posts with the specified tag.
|
|
"""
|
|
return await get_topics_by_tag(tag_name)
|
|
|
|
|
|
def html_to_text(html_content):
|
|
"""
|
|
Cleans the provided HTML content and converts it to plain text.
|
|
|
|
Args:
|
|
html_content (str): The HTML content to clean.
|
|
|
|
Returns:
|
|
str: The cleaned plain text.
|
|
"""
|
|
soup = BeautifulSoup(html_content, "html.parser")
|
|
return soup.get_text(separator="\n").strip()
|
|
|
|
|
|
def combine_posts_text(posts):
|
|
"""
|
|
Combines the cooked content of all posts into a single plain text block.
|
|
|
|
Args:
|
|
posts (list): A list of posts, each containing a "cooked" HTML string.
|
|
|
|
Returns:
|
|
str: The combined plain text of all posts.
|
|
"""
|
|
return "\n\n".join([html_to_text(post["cooked"]) for post in posts])
|