from progress.spinner import PixelSpinner from bs4 import BeautifulSoup import yaml import requests import locale import os import json import csv if not os.path.isfile("set.yaml"): with open('set.yaml', 'w') as f: yaml.dump({"cookid": "", "useragent": ""}, f) print("Please edit set.yaml") exit() with open('set.yaml', 'r') as f: data = yaml.load(f, Loader=yaml.CLoader) cookie = data["cookid"] useragent = data["useragent"] if cookie == "": print("Please edit set.yaml") exit() # setting URL = "https://nhentai.net/favorites/" APIURL = "https://nhentai.net/api/gallery/" table = [ ["id", "name", "tags"] ] now = 1 allnumbers = [] allnames = [] alltags = [] locate = locale.getdefaultlocale()[0] if locate == "zh_TW": language = { "nodata": "沒有發現離線資料 抓取中請稍後...", "nodata2": "抓取完畢", "usedata": "使用離線資料", "getdata": "抓取資料中...", "403": "403 錯誤,可能被 cloudflare 阻擋,請檢查 cookie 是否正確", "nologin": "未登入,請先登入", "done": "完成" } else: language = { "nodata": "No offline data found, please wait a moment...", "nodata2": "Done", "usedata": "Use offline data", "getdata": "Getting data...", "403": "403 error, maby block by cloudflare , please check if the cookie is correct", "nologin": "Not login, please login first", "done": "Done" } def banner(): data = r" _ _ _ ___ _ \ _ __ ___| |__ _ __ | |_ __ _(_) / __\/_\/\ /\ \ | '_ \ / _ \ '_ \| '_ \| __/ _` | |_____ / _\ //_\\ \ / / \ | | | | __/ | | | | | | || (_| | |_____/ / / _ \ V / \ |_| |_|\___|_| |_|_| |_|\__\__,_|_| \/ \_/ \_/\_/ \ " print(data) def wtfcloudflare(url, method="get", data=None): session = requests.Session() session.headers = { 'Referer': "https://nhentai.net/login/", 'User-Agent': useragent, 'Cookie': cookie, 'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7,zh-CN;q=0.6', 'Accept-Encoding': 'gzip, deflate', } if method == "get": r = session.get(url) elif method == "post": r = session.post(url, data=data) r.encoding = 'utf-8' return r def wtfcloudflare_t(url, method="get", data=None, useragent=None, cookie=None): session = requests.Session() session.headers = { 'Referer': "https://nhentai.net/login/", 'User-Agent': useragent, 'Cookie': cookie, 'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7,zh-CN;q=0.6', 'Accept-Encoding': 'gzip, deflate', } if method == "get": r = session.get(url, stream=True) # Add stream=True for large/binary files elif method == "post": r = session.post(url, data=data, stream=True) # stream=True for binary data r.raise_for_status() # Check for request errors return r def check_pass(): res = wtfcloudflare("https://nhentai.net/") if res.status_code == 403: print(language["403"]) exit() url_list = [] def build_id_list(): # Open and read the CSV file with open('output.csv', 'r', encoding='utf-8-sig') as file: reader = csv.DictReader(file) # Print out the headers to debug the issue print(reader.fieldnames) # This will show the exact header names # Iterate over each row in the CSV for row in reader: # Check if 'id' exists in the row, and if not, print the row for debugging if 'id' in row: formatted_url = f"https://nhentai.net/g/{row['id']}/download" url_list.append(formatted_url) else: print(f"Row without 'id': {row}") banner() check_pass() build_id_list() def get_torrents(): with open('set.yaml', 'r') as f: data = yaml.load(f, Loader=yaml.CLoader) cookie = data["cookid"] useragent = data["useragent"] if cookie == "": print("Please edit set.yaml") exit() for url in url_list: torrent_url = url # Call wtfcloudflare to download the torrent file response = wtfcloudflare_t(torrent_url, useragent=useragent, cookie=cookie) # Extract the ID from the URL for naming the file torrent_id = url.split('/')[4] # The ID is in the 4th segment of the URL # Define the output directory and file name output_dir = "torrents" os.makedirs(output_dir, exist_ok=True) # Create the directory if it doesn't exist torrent_path = os.path.join(output_dir, f"{torrent_id}.torrent") # Save the torrent file to disk with open(torrent_path, 'wb') as torrent_file: for chunk in response.iter_content(chunk_size=8192): if chunk: # Filter out keep-alive chunks torrent_file.write(chunk) print(f"Downloaded torrent: {torrent_path}") if __name__ == '__main__': get_torrents()