fix #5

2024-02-22 18:55:21 +08:00 · 2024-02-22 18:55:21 +08:00 · bbfab783b5
commit bbfab783b5
parent 55d27d0402
3 changed files with 197 additions and 195 deletions
--- a/example_tag.json
+++ b/example_tag.json
--- a/gettags.py
+++ b/gettags.py
@ -1,62 +1,64 @@
-from bs4 import BeautifulSoup
+from bs4 import BeautifulSoup
-import requests
+import requests
-import json
+import json
-import yaml
+import yaml
-
+
-
+
-URL = "https://nhentai.net/tags/"
+URL = "https://nhentai.net/tags/"
-
+
-
+
-def wtfcloudflare(url, method="get", useragent=None, cookie=None, data=None):
+def wtfcloudflare(url, method="get", useragent=None, cookie=None, data=None):
-    session = requests.Session()
+    session = requests.Session()
-    session.headers = {
+    session.headers = {
-        'Referer': "https://nhentai.net/login/",
+        'Referer': "https://nhentai.net/login/",
-        'User-Agent': useragent,
+        'User-Agent': useragent,
-        'Cookie': cookie,
+        'Cookie': cookie,
-        'Accept-Language': 'en-US,en;q=0.9',
+        'Accept-Language': 'en-US,en;q=0.9',
-        'Accept-Encoding': 'gzip, deflate, br',
+        'Accept-Encoding': 'gzip, deflate, br',
-    }
+    }
-    if method == "get":
+    if method == "get":
-        r = session.get(url)
+        r = session.get(url)
-    elif method == "post":
+    elif method == "post":
-        r = session.post(url, data=data)
+        r = session.post(url, data=data)
-    return r
+    return r
-
+
-
+
-def get_tags():
+def get_tags():
-    with open('set.yaml', 'r') as f:
+    with open('set.yaml', 'r') as f:
-        cookie = yaml.load(f, Loader=yaml.CLoader)["cookid"]
+        data = yaml.load(f, Loader=yaml.CLoader)
-        useragent = yaml.load(f, Loader=yaml.CLoader)["useragent"]
+        cookie = data["cookid"]
-        if cookie == "":
+        useragent = data["useragent"]
-            print("Please edit set.yaml")
+        if cookie == "":
-            exit()
+            print("Please edit set.yaml")
-    now = 1
+            exit()
-    tagjson = {}
+    now = 1
-
+    tagjson = {}
-    while True:
+
-        data = wtfcloudflare(f"{URL}?page={now}",
+    while True:
-                             useragent=useragent, cookie=cookie)
+        data = wtfcloudflare(f"{URL}?page={now}",
-        soup = BeautifulSoup(data.text, 'html.parser')
+                             useragent=useragent, cookie=cookie)
-        print(data.text)
+        soup = BeautifulSoup(data.text, 'html.parser')
-        tags = soup.find_all("a", class_='tag')
+        tags = soup.find_all("a", class_='tag')
-        if tags == []:
+        if tags == []:
-            break
+            break
-        tagnumbers = [t.get('class') for t in tags]
+        tagnumbers = [t.get('class') for t in tags]
-        tagnames = [t.find('span', class_='name').get_text() for t in tags]
+        tagnames = [t.find('span', class_='name').get_text() for t in tags]
-        tagnumber = []
+        tagnumber = []
-        for i in tagnumbers:
+        for i in tagnumbers:
-            fixnum = i[1].replace('tag-', '')
+            fixnum = i[1].replace('tag-', '')
-            tagnumber.append(fixnum)
+            tagnumber.append(fixnum)
-        for i in enumerate(tagnumber):
+        for i in enumerate(tagnumber):
-            tagjson[i[1]] = tagnames[i[0]]
+            tagjson[i[1]] = tagnames[i[0]]
-        now += 1
+        print(f"page {now} done")
-    if tagjson == {}:
+        now += 1
-        print("something wrong with your cookie or useragent")
+    if tagjson == {}:
-        exit()
+        print("something wrong with your cookie or useragent")
-    with open('tag.json', 'w') as f:
+        exit()
-        json.dump(tagjson, f)
+    with open('tag.json', 'w') as f:
-    return
+        json.dump(tagjson, f)
-
+    print("tag.json saved")
-
+    return
-if __name__ == '__main__':
+
-    get_tags()
+
 if __name__ == '__main__':
    get_tags()
--- a/nfavorites.py
+++ b/nfavorites.py
@ -1,132 +1,132 @@
-from gettags import get_tags
+from gettags import get_tags
-from progress.spinner import PixelSpinner
+from progress.spinner import PixelSpinner
-from bs4 import BeautifulSoup
+from bs4 import BeautifulSoup
-import yaml
+import yaml
-import requests
+import requests
-import locale
+import locale
-import os
+import os
-import json
+import json
-import csv
+import csv
-
+
-
+
-if not os.path.isfile("set.yaml"):
+if not os.path.isfile("set.yaml"):
-    with open('set.yaml', 'w') as f:
+    with open('set.yaml', 'w') as f:
-        yaml.dump({"cookid": "", "useragent": ""}, f)
+        yaml.dump({"cookid": "", "useragent": ""}, f)
-    print("Please edit set.yaml")
+    print("Please edit set.yaml")
-    exit()
+    exit()
-
+
-with open('set.yaml', 'r') as f:
+with open('set.yaml', 'r') as f:
-    data = yaml.load(f, Loader=yaml.CLoader)
+    data = yaml.load(f, Loader=yaml.CLoader)
-    cookie = data["cookid"]
+    cookie = data["cookid"]
-    useragent = data["useragent"]
+    useragent = data["useragent"]
-    if cookie == "":
+    if cookie == "":
-        print("Please edit set.yaml")
+        print("Please edit set.yaml")
-        exit()
+        exit()
-# setting
+# setting
-URL = "https://nhentai.net/favorites/"
+URL = "https://nhentai.net/favorites/"
-APIURL = "https://nhentai.net/api/gallery/"
+APIURL = "https://nhentai.net/api/gallery/"
-table = [
+table = [
-    ["id", "name", "tags"]
+    ["id", "name", "tags"]
-]
+]
-now = 1
+now = 1
-allnumbers = []
+allnumbers = []
-allnames = []
+allnames = []
-alltags = []
+alltags = []
-locate = locale.getdefaultlocale()[0]
+locate = locale.getdefaultlocale()[0]
-if locate == "zh_TW":
+if locate == "zh_TW":
-    language = {
+    language = {
-        "nodata": "沒有發現離線資料 抓取中請稍後...",
+        "nodata": "沒有發現離線資料 抓取中請稍後...",
-        "nodata2": "抓取完畢",
+        "nodata2": "抓取完畢",
-        "usedata": "使用離線資料",
+        "usedata": "使用離線資料",
-        "getdata": "抓取資料中...",
+        "getdata": "抓取資料中...",
-        "403": "403 錯誤，可能被 cloudflare 阻擋，請檢查 cookie 是否正確",
+        "403": "403 錯誤，可能被 cloudflare 阻擋，請檢查 cookie 是否正確",
-    }
+    }
-else:
+else:
-    language = {
+    language = {
-        "nodata": "No offline data found, please wait a moment...",
+        "nodata": "No offline data found, please wait a moment...",
-        "nodata2": "Done",
+        "nodata2": "Done",
-        "usedata": "Use offline data",
+        "usedata": "Use offline data",
-        "getdata": "Getting data...",
+        "getdata": "Getting data...",
-        "403": "403 error, maby block by cloudflare , please check if the cookie is correct",
+        "403": "403 error, maby block by cloudflare , please check if the cookie is correct",
-    }
+    }
-
+
-
+
-def banner():
+def banner():
-    data = """            _           _        _         ___  _         
+    data = """            _           _        _         ___  _
- _ __   ___| |__  _ __ | |_ __ _(_)       / __\/_\/\   /\ 
+ _ __   ___| |__  _ __ | |_ __ _(_)       / __\/_\/\   /\
-| '_ \ / _ \ '_ \| '_ \| __/ _` | |_____ / _\ //_\\ \ / / 
+| '_ \ / _ \ '_ \| '_ \| __/ _` | |_____ / _\ //_\\ \ / /
-| | | |  __/ | | | | | | || (_| | |_____/ /  /  _  \ V /  
+| | | |  __/ | | | | | | || (_| | |_____/ /  /  _  \ V /
-|_| |_|\___|_| |_|_| |_|\__\__,_|_|     \/   \_/ \_/\_/   
+|_| |_|\___|_| |_|_| |_|\__\__,_|_|     \/   \_/ \_/\_/
-                                                          """
+                                                          """
-    print(data)
+    print(data)
-
+
-# request
+# request
-
+
-
+
-def wtfcloudflare(url, method="get", data=None):
+def wtfcloudflare(url, method="get", data=None):
-    session = requests.Session()
+    session = requests.Session()
-    session.headers = {
+    session.headers = {
-        'Referer': "https://nhentai.net/login/",
+        'Referer': "https://nhentai.net/login/",
-        'User-Agent': useragent,
+        'User-Agent': useragent,
-        'Cookie': cookie,
+        'Cookie': cookie,
-        'Accept-Language': 'en-US,en;q=0.9',
+        'Accept-Language': 'en-US,en;q=0.9',
-        'Accept-Encoding': 'gzip, deflate, br',
+        'Accept-Encoding': 'gzip, deflate, br',
-    }
+    }
-    if method == "get":
+    if method == "get":
-        r = session.get(url)
+        r = session.get(url)
-    elif method == "post":
+    elif method == "post":
-        r = session.post(url, data=data)
+        r = session.post(url, data=data)
-    return r
+    return r
-
+
-
+
-def check_pass():
+def check_pass():
-    res = wtfcloudflare("https://nhentai.net/")
+    res = wtfcloudflare("https://nhentai.net/")
-    if res.status_code == 403:
+    if res.status_code == 403:
-        print(language["403"])
+        print(language["403"])
-        exit()
+        exit()
-
+
-
+
-# --- main ---
+# --- main ---
-banner()
+banner()
-check_pass()
+check_pass()
-if not os.path.isfile("tag.json"):
+if not os.path.isfile("tag.json"):
-    print(language["nodata"])
+    print(language["nodata"])
-    get_tags()
+    get_tags()
-    print(language["nodata2"])
+    print(language["nodata2"])
-print(language["usedata"])
+print(language["usedata"])
-
+
-spinner = PixelSpinner(language["getdata"])
+spinner = PixelSpinner(language["getdata"])
-while True:
+while True:
-    data = wtfcloudflare(f"{URL}?page={now}")
+    data = wtfcloudflare(f"{URL}?page={now}")
-    soup = BeautifulSoup(data.text, 'html.parser')
+    soup = BeautifulSoup(data.text, 'html.parser')
-    book = soup.find_all("div", class_='gallery-favorite')
+    book = soup.find_all("div", class_='gallery-favorite')
-    if book == []:
+    if book == []:
-        break
+        break
-    numbers = [t.get('data-id') for t in book]
+    numbers = [t.get('data-id') for t in book]
-    names = [t.find('div', class_="caption").get_text() for t in book]
+    names = [t.find('div', class_="caption").get_text() for t in book]
-    tags_ = [t.find('div', class_="gallery").get('data-tags') for t in book]
+    tags_ = [t.find('div', class_="gallery").get('data-tags') for t in book]
-    tags = []
+    tags = []
-    for i in tags_:
+    for i in tags_:
-        tags__ = i.split(' ')
+        tags__ = i.split(' ')
-        tags.append(tags__)
+        tags.append(tags__)
-    allnumbers.extend(numbers)
+    allnumbers.extend(numbers)
-    allnames.extend(names)
+    allnames.extend(names)
-    alltags.extend(tags)
+    alltags.extend(tags)
-    now += 1
+    now += 1
-    spinner.next()
+    spinner.next()
-
+
-
+
-with open('tag.json', 'r') as f:
+with open('tag.json', 'r') as f:
-    tagjson = json.load(f)
+    tagjson = json.load(f)
-for i in enumerate(allnumbers):
+for i in enumerate(allnumbers):
-    tagstr = ""
+    tagstr = ""
-    for j in alltags[i[0]]:
+    for j in alltags[i[0]]:
-        if j in tagjson:
+        if j in tagjson:
-            tagstr += tagjson[j] + ", "
+            tagstr += tagjson[j] + ", "
-
+
-    table.append([i[1], allnames[i[0]], tagstr])
+    table.append([i[1], allnames[i[0]], tagstr])
-
+
-
+
-with open('output.csv', 'w', newline='', encoding="utf_8_sig") as csvfile:
+with open('output.csv', 'w', newline='', encoding="utf_8_sig") as csvfile:
-    writer = csv.writer(csvfile)
+    writer = csv.writer(csvfile)
-    writer.writerows(table)
+    writer.writerows(table)