diff --git a/gettags.py b/gettags.py index c7b459a..e5ec250 100644 --- a/gettags.py +++ b/gettags.py @@ -1,24 +1,17 @@ from bs4 import BeautifulSoup import requests -import fake_useragent import json import yaml URL = "https://nhentai.net/tags/" -ua = fake_useragent.UserAgent() -useragent = ua.random -with open('set.yaml', 'r') as f: - cookie = yaml.load(f, Loader=yaml.CLoader)["cookid"] - if cookie == "": - print("Please edit set.yaml") - exit() -def wtfcloudflare(url,method="get",data=None): + +def wtfcloudflare(url, method="get", useragent=None, cookie=None, data=None): session = requests.Session() session.headers = { 'Referer': "https://nhentai.net/login/", - 'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36", + 'User-Agent': useragent, 'Cookie': cookie, 'Accept-Language': 'en-US,en;q=0.9', 'Accept-Encoding': 'gzip, deflate, br', @@ -26,15 +19,23 @@ def wtfcloudflare(url,method="get",data=None): if method == "get": r = session.get(url) elif method == "post": - r = session.post(url,data=data) + r = session.post(url, data=data) return r + def get_tags(): + with open('set.yaml', 'r') as f: + cookie = yaml.load(f, Loader=yaml.CLoader)["cookid"] + useragent = yaml.load(f, Loader=yaml.CLoader)["useragent"] + if cookie == "": + print("Please edit set.yaml") + exit() now = 1 tagjson = {} while True: - data = wtfcloudflare(f"{URL}?page={now}") + data = wtfcloudflare(f"{URL}?page={now}", + useragent=useragent, cookie=cookie) soup = BeautifulSoup(data.text, 'html.parser') print(data.text) tags = soup.find_all("a", class_='tag') @@ -49,6 +50,9 @@ def get_tags(): for i in enumerate(tagnumber): tagjson[i[1]] = tagnames[i[0]] now += 1 + if tagjson == {}: + print("something wrong with your cookie or useragent") + exit() with open('tag.json', 'w') as f: json.dump(tagjson, f) return diff --git a/nfavorites.py b/nfavorites.py index f194c1d..feb67dc 100644 --- a/nfavorites.py +++ b/nfavorites.py @@ -3,22 +3,22 @@ from progress.spinner import PixelSpinner from bs4 import BeautifulSoup import yaml import requests -import fake_useragent import locale import os import json import csv -import gevent.monkey -gevent.monkey.patch_all() + if not os.path.isfile("set.yaml"): with open('set.yaml', 'w') as f: - yaml.dump({"cookid": ""}, f) + yaml.dump({"cookid": "", "useragent": ""}, f) print("Please edit set.yaml") exit() with open('set.yaml', 'r') as f: - cookie = yaml.load(f, Loader=yaml.CLoader)["cookid"] + data = yaml.load(f, Loader=yaml.CLoader) + cookie = data["cookid"] + useragent = data["useragent"] if cookie == "": print("Please edit set.yaml") exit() @@ -32,8 +32,6 @@ now = 1 allnumbers = [] allnames = [] alltags = [] -ua = fake_useragent.UserAgent() -useragent = ua.random locate = locale.getdefaultlocale()[0] if locate == "zh_TW": language = { @@ -41,6 +39,7 @@ if locate == "zh_TW": "nodata2": "抓取完畢", "usedata": "使用離線資料", "getdata": "抓取資料中...", + "403": "403 錯誤,可能被 cloudflare 阻擋,請檢查 cookie 是否正確", } else: language = { @@ -48,8 +47,10 @@ else: "nodata2": "Done", "usedata": "Use offline data", "getdata": "Getting data...", + "403": "403 error, maby block by cloudflare , please check if the cookie is correct", } + def banner(): data = """ _ _ _ ___ _ _ __ ___| |__ _ __ | |_ __ _(_) / __\/_\/\ /\ @@ -60,11 +61,13 @@ def banner(): print(data) # request -def wtfcloudflare(url,method="get",data=None): + + +def wtfcloudflare(url, method="get", data=None): session = requests.Session() session.headers = { 'Referer': "https://nhentai.net/login/", - 'User-Agent': "", + 'User-Agent': useragent, 'Cookie': cookie, 'Accept-Language': 'en-US,en;q=0.9', 'Accept-Encoding': 'gzip, deflate, br', @@ -72,9 +75,20 @@ def wtfcloudflare(url,method="get",data=None): if method == "get": r = session.get(url) elif method == "post": - r = session.post(url,data=data) + r = session.post(url, data=data) return r + +def check_pass(): + res = wtfcloudflare("https://nhentai.net/") + if res.status_code == 403: + print(language["403"]) + exit() + + +# --- main --- +banner() +check_pass() if not os.path.isfile("tag.json"): print(language["nodata"]) get_tags() @@ -102,7 +116,6 @@ while True: spinner.next() - with open('tag.json', 'r') as f: tagjson = json.load(f) for i in enumerate(allnumbers): @@ -114,7 +127,6 @@ for i in enumerate(allnumbers): table.append([i[1], allnames[i[0]], tagstr]) - with open('output.csv', 'w', newline='', encoding="utf_8_sig") as csvfile: writer = csv.writer(csvfile) writer.writerows(table)