first

2022-04-03 03:15:42 +08:00
parent 1cd42d2f5e
commit d5b237d06b
7 changed files with 210 additions and 1 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,6 @@
+*.part
+*.pyc
+*.log
+set.yaml
+tag.json
+output.csv
--- a/README.md
+++ b/README.md
@@ -1 +1,20 @@
-# nhentai-favorites
+# nhentai-favorites
+
+### how to use?
+`pip install -r ".\requirements.txt"`
+
+
+Rename_me_set.yaml (rename)-> set.yaml  
+enter your cookie  
+open nfavorites.py
+
+### how to get my cookie?
+
+open https://nhentai.net/favorites/  
+press F12  
+switch to network menu  
+find favorites/  
+copy cookie to set.yaml
+
+
+![alt text](https://github.com/phillychi3/nhentai-favorites]/blob/main/image/csv.png?raw=true)
--- a/Rename_me_set.yaml
+++ b/Rename_me_set.yaml
@@ -0,0 +1 @@
+cookid: ""
--- a/gettags.py
+++ b/gettags.py
@@ -0,0 +1,40 @@
+import gevent.monkey
+gevent.monkey.patch_all()
+import json
+
+import fake_useragent
+import requests
+from bs4 import BeautifulSoup
+
+url = "https://nhentai.net/tags/"
+def get_tags():
+    now = 1
+    tagjson = {}
+
+    while True:
+        ua = fake_useragent.UserAgent()
+        useragent = ua.random
+        headers = {
+            'user-agent': useragent
+        }
+        data = requests.get(f"{url}?page={now}", headers=headers)
+        soup = BeautifulSoup(data.text, 'html.parser')
+        tags = soup.find_all("a", class_='tag')
+        if tags == []:
+            break
+        tagnumbers = [t.get('class') for t in tags]
+        tagnames = [t.find('span', class_='name').get_text() for t in tags]
+        tagnumber = []
+        for i in tagnumbers:
+            fixnum = i[1].replace('tag-', '')
+            tagnumber.append(fixnum)
+        for i in enumerate(tagnumber):
+            tagjson[i[1]] = tagnames[i[0]]
+        now += 1
+    with open('tag.json', 'w') as f:
+        json.dump(tagjson, f)
+    return
+
+
+if __name__ == '__main__':
+    get_tags()
--- a/image/csv.png
+++ b/image/csv.png
--- a/nfavorites.py
+++ b/nfavorites.py
@@ -0,0 +1,137 @@
+import gevent.monkey
+
+gevent.monkey.patch_all()
+import csv
+import json
+import os
+import queue
+import random
+import threading
+import time
+
+import fake_useragent
+import requests
+import yaml
+from bs4 import BeautifulSoup
+from progress.spinner import PixelSpinner
+
+from gettags import get_tags
+
+
+with open('set.yaml', 'r') as f:
+    cookie = yaml.load(f, Loader=yaml.CLoader)["cookid"]
+# setting
+url = "https://nhentai.net/favorites/"
+apiurl = "https://nhentai.net/api/gallery/"
+table = [
+    ["id", "name", "tags"]
+]
+now = 1
+allnumbers = []
+allnames = []
+alltags = []
+
+class gettagonline(threading.Thread):
+    def __init__(self, queue,number):
+        threading.Thread.__init__(self)
+        self.number = number
+        self.queue = queue
+
+    def run(self):
+        while self.queue.qsize() > 0:
+            num = self.queue.get()
+            #print("get %d: %s" % (self.number, num))
+            ua = fake_useragent.UserAgent()
+            useragent = ua.random
+            headers = {
+                'user-agent': useragent
+            }
+            r = requests.get(apiurl + num, headers=headers)
+            data = r.json()
+            ctag = []
+            for i in enumerate(data['tags']):
+                ctag.append(i[1]['name'])
+            alltags.append(ctag)
+            time.sleep(random.uniform(0.5,1))
+
+
+
+set1 = input("請問要使用離線資料嗎?(y/n)(默認為否)")
+if set1 == "y".lower() or  set1 == "yes".lower() :
+    if not os.path.isfile("tag.json"):
+        print("沒有發現離線資料 抓取中請稍後...")
+        get_tags()
+        print("抓取完畢")
+    print("使用離線資料")
+else:
+    print("使用線上資料")
+    threadscount = input("請輸入要使用幾個線程(默認為5 不可超過10)")
+    if threadscount == "":
+        threadscount = 5
+    else:
+        try:
+            threadscount = int(threadscount)
+            if threadscount > 10:
+                threadscount = 10
+        except:
+            threadscount = 5
+
+spinner = PixelSpinner('抓取資料中...')
+while True:
+    ua = fake_useragent.UserAgent()
+    useragent = ua.random
+    headers = {
+        'user-agent': useragent,
+        'cookie': cookie
+    }
+    data = requests.get(f"{url}?page={now}", headers=headers)
+    soup = BeautifulSoup(data.text, 'html.parser')
+    book = soup.find_all("div", class_='gallery-favorite')
+    if book == []:
+        break
+    numbers = [t.get('data-id') for t in book]
+    names = [t.find('div',class_="caption").get_text() for t in book]
+    tags_ = [t.find('div',class_="gallery").get('data-tags') for t in book]
+    tags = []
+    for i in tags_:
+        tags__ = i.split(' ')
+        tags.append(tags__)
+    allnumbers.extend(numbers)
+    allnames.extend(names)
+    alltags.extend(tags)
+    now += 1
+    spinner.next()
+
+
+
+if set1 == "y".lower() or  set1 == "yes".lower() :
+    with open('tag.json', 'r') as f:
+        tagjson = json.load(f)
+    for i in enumerate(allnumbers):
+        tagstr = ""
+        for j in alltags[i[0]]:
+            if j in tagjson:
+                tagstr += tagjson[j] + ", "
+
+        table.append([i[1], allnames[i[0]], tagstr])    
+else:
+    alltags=[] # 清空
+    get_tags_queue = queue.Queue()
+    threads = []
+    for i in allnumbers:
+        get_tags_queue.put(i)
+    for i in range(threadscount):
+        t = gettagonline(get_tags_queue,i)
+        t.start()
+        threads.append(t)
+    for t in threads:
+        t.join()
+    
+
+    for i in enumerate(allnumbers):
+        table.append([i[1], allnames[i[0]], alltags[i[0]]])    
+
+
+with open('output.csv', 'w', newline='',encoding="utf_8_sig") as csvfile:
+  writer = csv.writer(csvfile)
+  writer.writerows(table)
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,6 @@
+PyYAML == 5.4.1
+bs4 
+fake_useragent == 0.1.11
+gevent == 21.1.2
+progress == 1.6
+requests == 2.27.1