fix format

This commit is contained in:
phillychi3 2023-03-12 13:18:25 +08:00
parent 2f3f03bcb1
commit 1ca169aef8
2 changed files with 45 additions and 34 deletions

16
.vscode/launch.json vendored Normal file
View File

@ -0,0 +1,16 @@
{
// 使 IntelliSense
//
// : https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python: 目前的檔案",
"type": "python",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
"justMyCode": true
}
]
}

View File

@ -1,21 +1,18 @@
import gevent.monkey
gevent.monkey.patch_all()
import csv
import json
import os
import queue
import random
import threading
import time
import fake_useragent
import requests
import yaml
from bs4 import BeautifulSoup
from progress.spinner import PixelSpinner
from gettags import get_tags from gettags import get_tags
from progress.spinner import PixelSpinner
from bs4 import BeautifulSoup
import yaml
import requests
import fake_useragent
import time
import threading
import random
import queue
import os
import json
import csv
import gevent.monkey
gevent.monkey.patch_all()
with open('set.yaml', 'r') as f: with open('set.yaml', 'r') as f:
@ -31,8 +28,9 @@ allnumbers = []
allnames = [] allnames = []
alltags = [] alltags = []
class gettagonline(threading.Thread): class gettagonline(threading.Thread):
def __init__(self, queue,number): def __init__(self, queue, number):
threading.Thread.__init__(self) threading.Thread.__init__(self)
self.number = number self.number = number
self.queue = queue self.queue = queue
@ -40,7 +38,7 @@ class gettagonline(threading.Thread):
def run(self): def run(self):
while self.queue.qsize() > 0: while self.queue.qsize() > 0:
num = self.queue.get() num = self.queue.get()
#print("get %d: %s" % (self.number, num)) # print("get %d: %s" % (self.number, num))
ua = fake_useragent.UserAgent() ua = fake_useragent.UserAgent()
useragent = ua.random useragent = ua.random
headers = { headers = {
@ -52,12 +50,11 @@ class gettagonline(threading.Thread):
for i in enumerate(data['tags']): for i in enumerate(data['tags']):
ctag.append(i[1]['name']) ctag.append(i[1]['name'])
alltags.append(ctag) alltags.append(ctag)
time.sleep(random.uniform(0.5,1)) time.sleep(random.uniform(0.5, 1))
set1 = input("請問要使用離線資料嗎?(y/n)(默認為否)") set1 = input("請問要使用離線資料嗎?(y/n)(默認為否)")
if set1 == "y".lower() or set1 == "yes".lower() : if set1 == "y".lower() or set1 == "yes".lower():
if not os.path.isfile("tag.json"): if not os.path.isfile("tag.json"):
print("沒有發現離線資料 抓取中請稍後...") print("沒有發現離線資料 抓取中請稍後...")
get_tags() get_tags()
@ -90,8 +87,8 @@ while True:
if book == []: if book == []:
break break
numbers = [t.get('data-id') for t in book] numbers = [t.get('data-id') for t in book]
names = [t.find('div',class_="caption").get_text() for t in book] names = [t.find('div', class_="caption").get_text() for t in book]
tags_ = [t.find('div',class_="gallery").get('data-tags') for t in book] tags_ = [t.find('div', class_="gallery").get('data-tags') for t in book]
tags = [] tags = []
for i in tags_: for i in tags_:
tags__ = i.split(' ') tags__ = i.split(' ')
@ -103,8 +100,7 @@ while True:
spinner.next() spinner.next()
if set1 == "y".lower() or set1 == "yes".lower():
if set1 == "y".lower() or set1 == "yes".lower() :
with open('tag.json', 'r') as f: with open('tag.json', 'r') as f:
tagjson = json.load(f) tagjson = json.load(f)
for i in enumerate(allnumbers): for i in enumerate(allnumbers):
@ -113,25 +109,24 @@ if set1 == "y".lower() or set1 == "yes".lower() :
if j in tagjson: if j in tagjson:
tagstr += tagjson[j] + ", " tagstr += tagjson[j] + ", "
table.append([i[1], allnames[i[0]], tagstr]) table.append([i[1], allnames[i[0]], tagstr])
else: else:
alltags=[] # 清空 alltags = [] # 清空
get_tags_queue = queue.Queue() get_tags_queue = queue.Queue()
threads = [] threads = []
for i in allnumbers: for i in allnumbers:
get_tags_queue.put(i) get_tags_queue.put(i)
for i in range(threadscount): for i in range(threadscount):
t = gettagonline(get_tags_queue,i) t = gettagonline(get_tags_queue, i)
t.start() t.start()
threads.append(t) threads.append(t)
for t in threads: for t in threads:
t.join() t.join()
for i in enumerate(allnumbers): for i in enumerate(allnumbers):
table.append([i[1], allnames[i[0]], alltags[i[0]]]) table.append([i[1], allnames[i[0]], alltags[i[0]]])
with open('output.csv', 'w', newline='',encoding="utf_8_sig") as csvfile: with open('output.csv', 'w', newline='', encoding="utf_8_sig") as csvfile:
writer = csv.writer(csvfile) writer = csv.writer(csvfile)
writer.writerows(table) writer.writerows(table)