diff --git a/.gitignore b/.gitignore index 12444fc..af27279 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,9 @@ *.part *.pyc *.log + +# output set.yaml tag.json -output.csv \ No newline at end of file +output.csv +torrents diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..0e259d4 --- /dev/null +++ b/LICENSE @@ -0,0 +1,121 @@ +Creative Commons Legal Code + +CC0 1.0 Universal + + CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE + LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN + ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS + INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES + REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS + PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM + THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED + HEREUNDER. + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator +and subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for +the purpose of contributing to a commons of creative, cultural and +scientific works ("Commons") that the public can reliably and without fear +of later claims of infringement build upon, modify, incorporate in other +works, reuse and redistribute as freely as possible in any form whatsoever +and for any purposes, including without limitation commercial purposes. +These owners may contribute to the Commons to promote the ideal of a free +culture and the further production of creative, cultural and scientific +works, or to gain reputation or greater distribution for their Work in +part through the use and efforts of others. + +For these and/or other purposes and motivations, and without any +expectation of additional consideration or compensation, the person +associating CC0 with a Work (the "Affirmer"), to the extent that he or she +is an owner of Copyright and Related Rights in the Work, voluntarily +elects to apply CC0 to the Work and publicly distribute the Work under its +terms, with knowledge of his or her Copyright and Related Rights in the +Work and the meaning and intended legal effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not +limited to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, + communicate, and translate a Work; + ii. moral rights retained by the original author(s) and/or performer(s); +iii. publicity and privacy rights pertaining to a person's image or + likeness depicted in a Work; + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + v. rights protecting the extraction, dissemination, use and reuse of data + in a Work; + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation + thereof, including any amended or successor version of such + directive); and +vii. other similar, equivalent or corresponding rights throughout the + world based on applicable law or treaty, and any national + implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention +of, applicable law, Affirmer hereby overtly, fully, permanently, +irrevocably and unconditionally waives, abandons, and surrenders all of +Affirmer's Copyright and Related Rights and associated claims and causes +of action, whether now known or unknown (including existing as well as +future claims and causes of action), in the Work (i) in all territories +worldwide, (ii) for the maximum duration provided by applicable law or +treaty (including future time extensions), (iii) in any current or future +medium and for any number of copies, and (iv) for any purpose whatsoever, +including without limitation commercial, advertising or promotional +purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each +member of the public at large and to the detriment of Affirmer's heirs and +successors, fully intending that such Waiver shall not be subject to +revocation, rescission, cancellation, termination, or any other legal or +equitable action to disrupt the quiet enjoyment of the Work by the public +as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason +be judged legally invalid or ineffective under applicable law, then the +Waiver shall be preserved to the maximum extent permitted taking into +account Affirmer's express Statement of Purpose. In addition, to the +extent the Waiver is so judged Affirmer hereby grants to each affected +person a royalty-free, non transferable, non sublicensable, non exclusive, +irrevocable and unconditional license to exercise Affirmer's Copyright and +Related Rights in the Work (i) in all territories worldwide, (ii) for the +maximum duration provided by applicable law or treaty (including future +time extensions), (iii) in any current or future medium and for any number +of copies, and (iv) for any purpose whatsoever, including without +limitation commercial, advertising or promotional purposes (the +"License"). The License shall be deemed effective as of the date CC0 was +applied by Affirmer to the Work. Should any part of the License for any +reason be judged legally invalid or ineffective under applicable law, such +partial invalidity or ineffectiveness shall not invalidate the remainder +of the License, and in such case Affirmer hereby affirms that he or she +will not (i) exercise any of his or her remaining Copyright and Related +Rights in the Work or (ii) assert any associated claims and causes of +action with respect to the Work, in either case contrary to Affirmer's +express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + b. Affirmer offers the Work as-is and makes no representations or + warranties of any kind concerning the Work, express, implied, + statutory or otherwise, including without limitation warranties of + title, merchantability, fitness for a particular purpose, non + infringement, or the absence of latent or other defects, accuracy, or + the present or absence of errors, whether or not discoverable, all to + the greatest extent permissible under applicable law. + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without + limitation any person's Copyright and Related Rights in the Work. + Further, Affirmer disclaims responsibility for obtaining any necessary + consents, permissions or other rights required for any use of the + Work. + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to + this CC0 or use of the Work. diff --git a/README.md b/README.md index 82e2988..dc7b058 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,9 @@ # nhentai-favorites +Zǎoshang hǎo zhōngguó xiànzài wǒ yǒu BING CHILLING 🥶🍦 wǒ hěn xǐhuān BING CHILLING 🥶🍦 dànshì sùdù yǔ jīqíng 9 bǐ BING CHILLING 🥶🍦 sùdù yǔ jīqíng sùdù yǔ jīqíng 9 wǒ zuì xǐhuān suǒyǐ…xiànzài shì yīnyuè shíjiān zhǔnbèi 1 2 3 liǎng gè lǐbài yǐhòu sùdù yǔ jīqíng 9 ×3 bùyào wàngjì bùyào cu òguò jìdé qù diànyǐngyuàn kàn sùdù yǔ jīqíng 9 yīn wéi fēicháng hǎo diànyǐng dòngzuò fēicháng hǎo chàbùduō yīyàng BING CHILLING 🥶🍦zàijiàn 🥶🍦 + +This project is a meme but it works until you have too many favorites to scrape and you get rate limited, or so I was told by a friend, not that I would know. + ## how to use? `pip install -r ".\requirements.txt"` diff --git a/scraper.py b/scraper.py new file mode 100644 index 0000000..be36d4f --- /dev/null +++ b/scraper.py @@ -0,0 +1,163 @@ +from progress.spinner import PixelSpinner +from bs4 import BeautifulSoup +import yaml +import requests +import locale +import os +import json +import csv + +if not os.path.isfile("set.yaml"): + with open('set.yaml', 'w') as f: + yaml.dump({"cookid": "", "useragent": ""}, f) + print("Please edit set.yaml") + exit() + +with open('set.yaml', 'r') as f: + data = yaml.load(f, Loader=yaml.CLoader) + cookie = data["cookid"] + useragent = data["useragent"] + if cookie == "": + print("Please edit set.yaml") + exit() +# setting +URL = "https://nhentai.net/favorites/" +APIURL = "https://nhentai.net/api/gallery/" +table = [ + ["id", "name", "tags"] +] +now = 1 +allnumbers = [] +allnames = [] +alltags = [] +locate = locale.getdefaultlocale()[0] +if locate == "zh_TW": + language = { + "nodata": "沒有發現離線資料 抓取中請稍後...", + "nodata2": "抓取完畢", + "usedata": "使用離線資料", + "getdata": "抓取資料中...", + "403": "403 錯誤,可能被 cloudflare 阻擋,請檢查 cookie 是否正確", + "nologin": "未登入,請先登入", + "done": "完成" + } +else: + language = { + "nodata": "No offline data found, please wait a moment...", + "nodata2": "Done", + "usedata": "Use offline data", + "getdata": "Getting data...", + "403": "403 error, maby block by cloudflare , please check if the cookie is correct", + "nologin": "Not login, please login first", + "done": "Done" + } + + +def banner(): + data = r" _ _ _ ___ _ \ + _ __ ___| |__ _ __ | |_ __ _(_) / __\/_\/\ /\ \ + | '_ \ / _ \ '_ \| '_ \| __/ _` | |_____ / _\ //_\\ \ / / \ + | | | | __/ | | | | | | || (_| | |_____/ / / _ \ V / \ + |_| |_|\___|_| |_|_| |_|\__\__,_|_| \/ \_/ \_/\_/ \ + " + print(data) + +def wtfcloudflare(url, method="get", data=None): + session = requests.Session() + session.headers = { + 'Referer': "https://nhentai.net/login/", + 'User-Agent': useragent, + 'Cookie': cookie, + 'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7,zh-CN;q=0.6', + 'Accept-Encoding': 'gzip, deflate', + } + if method == "get": + r = session.get(url) + elif method == "post": + r = session.post(url, data=data) + r.encoding = 'utf-8' + return r + +def wtfcloudflare_t(url, method="get", data=None, useragent=None, cookie=None): + session = requests.Session() + session.headers = { + 'Referer': "https://nhentai.net/login/", + 'User-Agent': useragent, + 'Cookie': cookie, + 'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7,zh-CN;q=0.6', + 'Accept-Encoding': 'gzip, deflate', + } + + if method == "get": + r = session.get(url, stream=True) # Add stream=True for large/binary files + elif method == "post": + r = session.post(url, data=data, stream=True) # stream=True for binary data + + r.raise_for_status() # Check for request errors + return r + + +def check_pass(): + res = wtfcloudflare("https://nhentai.net/") + if res.status_code == 403: + print(language["403"]) + exit() + +url_list = [] + +def build_id_list(): + # Open and read the CSV file + with open('output.csv', 'r', encoding='utf-8-sig') as file: + reader = csv.DictReader(file) + + # Print out the headers to debug the issue + print(reader.fieldnames) # This will show the exact header names + + # Iterate over each row in the CSV + for row in reader: + # Check if 'id' exists in the row, and if not, print the row for debugging + if 'id' in row: + formatted_url = f"https://nhentai.net/g/{row['id']}/download" + url_list.append(formatted_url) + else: + print(f"Row without 'id': {row}") + +banner() +check_pass() +build_id_list() + +def get_torrents(): + with open('set.yaml', 'r') as f: + data = yaml.load(f, Loader=yaml.CLoader) + cookie = data["cookid"] + useragent = data["useragent"] + if cookie == "": + print("Please edit set.yaml") + exit() + + for url in url_list: + torrent_url = url + + # Call wtfcloudflare to download the torrent file + response = wtfcloudflare_t(torrent_url, useragent=useragent, cookie=cookie) + + # Extract the ID from the URL for naming the file + torrent_id = url.split('/')[4] # The ID is in the 4th segment of the URL + + # Define the output directory and file name + output_dir = "torrents" + os.makedirs(output_dir, exist_ok=True) # Create the directory if it doesn't exist + torrent_path = os.path.join(output_dir, f"{torrent_id}.torrent") + + # Save the torrent file to disk + with open(torrent_path, 'wb') as torrent_file: + for chunk in response.iter_content(chunk_size=8192): + if chunk: # Filter out keep-alive chunks + torrent_file.write(chunk) + + print(f"Downloaded torrent: {torrent_path}") + + +if __name__ == '__main__': + get_torrents() +