This commit is contained in:
phillychi3 2024-02-22 18:55:21 +08:00
parent 55d27d0402
commit bbfab783b5
3 changed files with 197 additions and 195 deletions

File diff suppressed because one or more lines are too long

View File

@ -1,62 +1,64 @@
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import requests import requests
import json import json
import yaml import yaml
URL = "https://nhentai.net/tags/" URL = "https://nhentai.net/tags/"
def wtfcloudflare(url, method="get", useragent=None, cookie=None, data=None): def wtfcloudflare(url, method="get", useragent=None, cookie=None, data=None):
session = requests.Session() session = requests.Session()
session.headers = { session.headers = {
'Referer': "https://nhentai.net/login/", 'Referer': "https://nhentai.net/login/",
'User-Agent': useragent, 'User-Agent': useragent,
'Cookie': cookie, 'Cookie': cookie,
'Accept-Language': 'en-US,en;q=0.9', 'Accept-Language': 'en-US,en;q=0.9',
'Accept-Encoding': 'gzip, deflate, br', 'Accept-Encoding': 'gzip, deflate, br',
} }
if method == "get": if method == "get":
r = session.get(url) r = session.get(url)
elif method == "post": elif method == "post":
r = session.post(url, data=data) r = session.post(url, data=data)
return r return r
def get_tags(): def get_tags():
with open('set.yaml', 'r') as f: with open('set.yaml', 'r') as f:
cookie = yaml.load(f, Loader=yaml.CLoader)["cookid"] data = yaml.load(f, Loader=yaml.CLoader)
useragent = yaml.load(f, Loader=yaml.CLoader)["useragent"] cookie = data["cookid"]
if cookie == "": useragent = data["useragent"]
print("Please edit set.yaml") if cookie == "":
exit() print("Please edit set.yaml")
now = 1 exit()
tagjson = {} now = 1
tagjson = {}
while True:
data = wtfcloudflare(f"{URL}?page={now}", while True:
useragent=useragent, cookie=cookie) data = wtfcloudflare(f"{URL}?page={now}",
soup = BeautifulSoup(data.text, 'html.parser') useragent=useragent, cookie=cookie)
print(data.text) soup = BeautifulSoup(data.text, 'html.parser')
tags = soup.find_all("a", class_='tag') tags = soup.find_all("a", class_='tag')
if tags == []: if tags == []:
break break
tagnumbers = [t.get('class') for t in tags] tagnumbers = [t.get('class') for t in tags]
tagnames = [t.find('span', class_='name').get_text() for t in tags] tagnames = [t.find('span', class_='name').get_text() for t in tags]
tagnumber = [] tagnumber = []
for i in tagnumbers: for i in tagnumbers:
fixnum = i[1].replace('tag-', '') fixnum = i[1].replace('tag-', '')
tagnumber.append(fixnum) tagnumber.append(fixnum)
for i in enumerate(tagnumber): for i in enumerate(tagnumber):
tagjson[i[1]] = tagnames[i[0]] tagjson[i[1]] = tagnames[i[0]]
now += 1 print(f"page {now} done")
if tagjson == {}: now += 1
print("something wrong with your cookie or useragent") if tagjson == {}:
exit() print("something wrong with your cookie or useragent")
with open('tag.json', 'w') as f: exit()
json.dump(tagjson, f) with open('tag.json', 'w') as f:
return json.dump(tagjson, f)
print("tag.json saved")
return
if __name__ == '__main__':
get_tags()
if __name__ == '__main__':
get_tags()

View File

@ -1,132 +1,132 @@
from gettags import get_tags from gettags import get_tags
from progress.spinner import PixelSpinner from progress.spinner import PixelSpinner
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import yaml import yaml
import requests import requests
import locale import locale
import os import os
import json import json
import csv import csv
if not os.path.isfile("set.yaml"): if not os.path.isfile("set.yaml"):
with open('set.yaml', 'w') as f: with open('set.yaml', 'w') as f:
yaml.dump({"cookid": "", "useragent": ""}, f) yaml.dump({"cookid": "", "useragent": ""}, f)
print("Please edit set.yaml") print("Please edit set.yaml")
exit() exit()
with open('set.yaml', 'r') as f: with open('set.yaml', 'r') as f:
data = yaml.load(f, Loader=yaml.CLoader) data = yaml.load(f, Loader=yaml.CLoader)
cookie = data["cookid"] cookie = data["cookid"]
useragent = data["useragent"] useragent = data["useragent"]
if cookie == "": if cookie == "":
print("Please edit set.yaml") print("Please edit set.yaml")
exit() exit()
# setting # setting
URL = "https://nhentai.net/favorites/" URL = "https://nhentai.net/favorites/"
APIURL = "https://nhentai.net/api/gallery/" APIURL = "https://nhentai.net/api/gallery/"
table = [ table = [
["id", "name", "tags"] ["id", "name", "tags"]
] ]
now = 1 now = 1
allnumbers = [] allnumbers = []
allnames = [] allnames = []
alltags = [] alltags = []
locate = locale.getdefaultlocale()[0] locate = locale.getdefaultlocale()[0]
if locate == "zh_TW": if locate == "zh_TW":
language = { language = {
"nodata": "沒有發現離線資料 抓取中請稍後...", "nodata": "沒有發現離線資料 抓取中請稍後...",
"nodata2": "抓取完畢", "nodata2": "抓取完畢",
"usedata": "使用離線資料", "usedata": "使用離線資料",
"getdata": "抓取資料中...", "getdata": "抓取資料中...",
"403": "403 錯誤,可能被 cloudflare 阻擋,請檢查 cookie 是否正確", "403": "403 錯誤,可能被 cloudflare 阻擋,請檢查 cookie 是否正確",
} }
else: else:
language = { language = {
"nodata": "No offline data found, please wait a moment...", "nodata": "No offline data found, please wait a moment...",
"nodata2": "Done", "nodata2": "Done",
"usedata": "Use offline data", "usedata": "Use offline data",
"getdata": "Getting data...", "getdata": "Getting data...",
"403": "403 error, maby block by cloudflare , please check if the cookie is correct", "403": "403 error, maby block by cloudflare , please check if the cookie is correct",
} }
def banner(): def banner():
data = """ _ _ _ ___ _ data = """ _ _ _ ___ _
_ __ ___| |__ _ __ | |_ __ _(_) / __\/_\/\ /\ _ __ ___| |__ _ __ | |_ __ _(_) / __\/_\/\ /\
| '_ \ / _ \ '_ \| '_ \| __/ _` | |_____ / _\ //_\\ \ / / | '_ \ / _ \ '_ \| '_ \| __/ _` | |_____ / _\ //_\\ \ / /
| | | | __/ | | | | | | || (_| | |_____/ / / _ \ V / | | | | __/ | | | | | | || (_| | |_____/ / / _ \ V /
|_| |_|\___|_| |_|_| |_|\__\__,_|_| \/ \_/ \_/\_/ |_| |_|\___|_| |_|_| |_|\__\__,_|_| \/ \_/ \_/\_/
""" """
print(data) print(data)
# request # request
def wtfcloudflare(url, method="get", data=None): def wtfcloudflare(url, method="get", data=None):
session = requests.Session() session = requests.Session()
session.headers = { session.headers = {
'Referer': "https://nhentai.net/login/", 'Referer': "https://nhentai.net/login/",
'User-Agent': useragent, 'User-Agent': useragent,
'Cookie': cookie, 'Cookie': cookie,
'Accept-Language': 'en-US,en;q=0.9', 'Accept-Language': 'en-US,en;q=0.9',
'Accept-Encoding': 'gzip, deflate, br', 'Accept-Encoding': 'gzip, deflate, br',
} }
if method == "get": if method == "get":
r = session.get(url) r = session.get(url)
elif method == "post": elif method == "post":
r = session.post(url, data=data) r = session.post(url, data=data)
return r return r
def check_pass(): def check_pass():
res = wtfcloudflare("https://nhentai.net/") res = wtfcloudflare("https://nhentai.net/")
if res.status_code == 403: if res.status_code == 403:
print(language["403"]) print(language["403"])
exit() exit()
# --- main --- # --- main ---
banner() banner()
check_pass() check_pass()
if not os.path.isfile("tag.json"): if not os.path.isfile("tag.json"):
print(language["nodata"]) print(language["nodata"])
get_tags() get_tags()
print(language["nodata2"]) print(language["nodata2"])
print(language["usedata"]) print(language["usedata"])
spinner = PixelSpinner(language["getdata"]) spinner = PixelSpinner(language["getdata"])
while True: while True:
data = wtfcloudflare(f"{URL}?page={now}") data = wtfcloudflare(f"{URL}?page={now}")
soup = BeautifulSoup(data.text, 'html.parser') soup = BeautifulSoup(data.text, 'html.parser')
book = soup.find_all("div", class_='gallery-favorite') book = soup.find_all("div", class_='gallery-favorite')
if book == []: if book == []:
break break
numbers = [t.get('data-id') for t in book] numbers = [t.get('data-id') for t in book]
names = [t.find('div', class_="caption").get_text() for t in book] names = [t.find('div', class_="caption").get_text() for t in book]
tags_ = [t.find('div', class_="gallery").get('data-tags') for t in book] tags_ = [t.find('div', class_="gallery").get('data-tags') for t in book]
tags = [] tags = []
for i in tags_: for i in tags_:
tags__ = i.split(' ') tags__ = i.split(' ')
tags.append(tags__) tags.append(tags__)
allnumbers.extend(numbers) allnumbers.extend(numbers)
allnames.extend(names) allnames.extend(names)
alltags.extend(tags) alltags.extend(tags)
now += 1 now += 1
spinner.next() spinner.next()
with open('tag.json', 'r') as f: with open('tag.json', 'r') as f:
tagjson = json.load(f) tagjson = json.load(f)
for i in enumerate(allnumbers): for i in enumerate(allnumbers):
tagstr = "" tagstr = ""
for j in alltags[i[0]]: for j in alltags[i[0]]:
if j in tagjson: if j in tagjson:
tagstr += tagjson[j] + ", " tagstr += tagjson[j] + ", "
table.append([i[1], allnames[i[0]], tagstr]) table.append([i[1], allnames[i[0]], tagstr])
with open('output.csv', 'w', newline='', encoding="utf_8_sig") as csvfile: with open('output.csv', 'w', newline='', encoding="utf_8_sig") as csvfile:
writer = csv.writer(csvfile) writer = csv.writer(csvfile)
writer.writerows(table) writer.writerows(table)