diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..12444fc --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +*.part +*.pyc +*.log +set.yaml +tag.json +output.csv \ No newline at end of file diff --git a/README.md b/README.md index e7ce25f..3dffd8f 100644 --- a/README.md +++ b/README.md @@ -1 +1,20 @@ -# nhentai-favorites \ No newline at end of file +# nhentai-favorites + +### how to use? +`pip install -r ".\requirements.txt"` + + +Rename_me_set.yaml (rename)-> set.yaml +enter your cookie +open nfavorites.py + +### how to get my cookie? + +open https://nhentai.net/favorites/ +press F12 +switch to network menu +find favorites/ +copy cookie to set.yaml + + +![alt text](https://github.com/phillychi3/nhentai-favorites]/blob/main/image/csv.png?raw=true) diff --git a/Rename_me_set.yaml b/Rename_me_set.yaml new file mode 100644 index 0000000..f656f48 --- /dev/null +++ b/Rename_me_set.yaml @@ -0,0 +1 @@ +cookid: "" \ No newline at end of file diff --git a/gettags.py b/gettags.py new file mode 100644 index 0000000..7d9a8ce --- /dev/null +++ b/gettags.py @@ -0,0 +1,40 @@ +import gevent.monkey +gevent.monkey.patch_all() +import json + +import fake_useragent +import requests +from bs4 import BeautifulSoup + +url = "https://nhentai.net/tags/" +def get_tags(): + now = 1 + tagjson = {} + + while True: + ua = fake_useragent.UserAgent() + useragent = ua.random + headers = { + 'user-agent': useragent + } + data = requests.get(f"{url}?page={now}", headers=headers) + soup = BeautifulSoup(data.text, 'html.parser') + tags = soup.find_all("a", class_='tag') + if tags == []: + break + tagnumbers = [t.get('class') for t in tags] + tagnames = [t.find('span', class_='name').get_text() for t in tags] + tagnumber = [] + for i in tagnumbers: + fixnum = i[1].replace('tag-', '') + tagnumber.append(fixnum) + for i in enumerate(tagnumber): + tagjson[i[1]] = tagnames[i[0]] + now += 1 + with open('tag.json', 'w') as f: + json.dump(tagjson, f) + return + + +if __name__ == '__main__': + get_tags() diff --git a/image/csv.png b/image/csv.png new file mode 100644 index 0000000..667d558 Binary files /dev/null and b/image/csv.png differ diff --git a/nfavorites.py b/nfavorites.py new file mode 100644 index 0000000..a4ce012 --- /dev/null +++ b/nfavorites.py @@ -0,0 +1,137 @@ +import gevent.monkey + +gevent.monkey.patch_all() +import csv +import json +import os +import queue +import random +import threading +import time + +import fake_useragent +import requests +import yaml +from bs4 import BeautifulSoup +from progress.spinner import PixelSpinner + +from gettags import get_tags + + +with open('set.yaml', 'r') as f: + cookie = yaml.load(f, Loader=yaml.CLoader)["cookid"] +# setting +url = "https://nhentai.net/favorites/" +apiurl = "https://nhentai.net/api/gallery/" +table = [ + ["id", "name", "tags"] +] +now = 1 +allnumbers = [] +allnames = [] +alltags = [] + +class gettagonline(threading.Thread): + def __init__(self, queue,number): + threading.Thread.__init__(self) + self.number = number + self.queue = queue + + def run(self): + while self.queue.qsize() > 0: + num = self.queue.get() + #print("get %d: %s" % (self.number, num)) + ua = fake_useragent.UserAgent() + useragent = ua.random + headers = { + 'user-agent': useragent + } + r = requests.get(apiurl + num, headers=headers) + data = r.json() + ctag = [] + for i in enumerate(data['tags']): + ctag.append(i[1]['name']) + alltags.append(ctag) + time.sleep(random.uniform(0.5,1)) + + + +set1 = input("請問要使用離線資料嗎?(y/n)(默認為否)") +if set1 == "y".lower() or set1 == "yes".lower() : + if not os.path.isfile("tag.json"): + print("沒有發現離線資料 抓取中請稍後...") + get_tags() + print("抓取完畢") + print("使用離線資料") +else: + print("使用線上資料") + threadscount = input("請輸入要使用幾個線程(默認為5 不可超過10)") + if threadscount == "": + threadscount = 5 + else: + try: + threadscount = int(threadscount) + if threadscount > 10: + threadscount = 10 + except: + threadscount = 5 + +spinner = PixelSpinner('抓取資料中...') +while True: + ua = fake_useragent.UserAgent() + useragent = ua.random + headers = { + 'user-agent': useragent, + 'cookie': cookie + } + data = requests.get(f"{url}?page={now}", headers=headers) + soup = BeautifulSoup(data.text, 'html.parser') + book = soup.find_all("div", class_='gallery-favorite') + if book == []: + break + numbers = [t.get('data-id') for t in book] + names = [t.find('div',class_="caption").get_text() for t in book] + tags_ = [t.find('div',class_="gallery").get('data-tags') for t in book] + tags = [] + for i in tags_: + tags__ = i.split(' ') + tags.append(tags__) + allnumbers.extend(numbers) + allnames.extend(names) + alltags.extend(tags) + now += 1 + spinner.next() + + + +if set1 == "y".lower() or set1 == "yes".lower() : + with open('tag.json', 'r') as f: + tagjson = json.load(f) + for i in enumerate(allnumbers): + tagstr = "" + for j in alltags[i[0]]: + if j in tagjson: + tagstr += tagjson[j] + ", " + + table.append([i[1], allnames[i[0]], tagstr]) +else: + alltags=[] # 清空 + get_tags_queue = queue.Queue() + threads = [] + for i in allnumbers: + get_tags_queue.put(i) + for i in range(threadscount): + t = gettagonline(get_tags_queue,i) + t.start() + threads.append(t) + for t in threads: + t.join() + + + for i in enumerate(allnumbers): + table.append([i[1], allnames[i[0]], alltags[i[0]]]) + + +with open('output.csv', 'w', newline='',encoding="utf_8_sig") as csvfile: + writer = csv.writer(csvfile) + writer.writerows(table) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e822f0a --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +PyYAML == 5.4.1 +bs4 +fake_useragent == 0.1.11 +gevent == 21.1.2 +progress == 1.6 +requests == 2.27.1