Add gallery_dl tweet extract method
This commit is contained in:
parent
abedae385c
commit
23c5aaccd1
20
test_vx.py
20
test_vx.py
@ -23,7 +23,7 @@ testQrtVideoTweet="https://twitter.com/Twitter/status/1494436688554344449"
|
||||
testNSFWTweet="https://twitter.com/kuyacoy/status/1581185279376838657"
|
||||
|
||||
textVNF_compare = {'tweet': 'https://twitter.com/jack/status/20', 'url': '', 'description': 'just setting up my twttr', 'screen_name': 'jack', 'type': 'Text', 'images': ['', '', '', '', ''], 'time': 'Tue Mar 21 20:50:14 +0000 2006', 'qrtURL': None, 'nsfw': False}
|
||||
videoVNF_compare={'tweet': 'https://twitter.com/Twitter/status/1263145271946551300', 'url': 'https://video.twimg.com/amplify_video/1263145212760805376/vid/1280x720/9jous8HM0_duxL0w.mp4?tag=13', 'description': 'Testing, testing...\n\nA new way to have a convo with exactly who you want. We’re starting with a small % globally, so keep your 👀 out to see it in action. https://t.co/pV53mvjAVT', 'thumbnail': 'http://pbs.twimg.com/media/EYeX7akWsAIP1_1.jpg', 'screen_name': 'Twitter', 'type': 'Video', 'images': ['', '', '', '', ''], 'time': 'Wed May 20 16:31:15 +0000 2020', 'qrtURL': None, 'nsfw': False,'verified': True, 'size': {'width': 1920, 'height': 1080}}
|
||||
videoVNF_compare={'tweet': 'https://twitter.com/Twitter/status/1263145271946551300', 'url': 'https://video.twimg.com/amplify_video/1263145212760805376/vid/1280x720/9jous8HM0_duxL0w.mp4?tag=13', 'description': 'Testing, testing...\n\nA new way to have a convo with exactly who you want. We’re starting with a small % globally, so keep your 👀 out to see it in action. https://t.co/pV53mvjAVT', 'thumbnail': 'https://pbs.twimg.com/media/EYeX7akWsAIP1_1.jpg', 'screen_name': 'Twitter', 'type': 'Video', 'images': ['', '', '', '', ''], 'time': 'Wed May 20 16:31:15 +0000 2020', 'qrtURL': None, 'nsfw': False,'verified': True, 'size': {'width': 1920, 'height': 1080}}
|
||||
testMedia_compare={'tweet': 'https://twitter.com/Twitter/status/1118295916874739714', 'url': '', 'description': 'On profile pages, we used to only show someone’s replies, not the original Tweet 🙄 Now we’re showing both so you can follow the conversation more easily! https://t.co/LSBEZYFqmY', 'thumbnail': 'https://pbs.twimg.com/media/D4TS4xeX4AA02DI.jpg', 'screen_name': 'Twitter', 'type': 'Image', 'images': ['https://pbs.twimg.com/media/D4TS4xeX4AA02DI.jpg', '', '', '', '1'], 'time': 'Tue Apr 16 23:31:38 +0000 2019', 'qrtURL': None, 'nsfw': False, 'size': {}}
|
||||
testMultiMedia_compare={'tweet': 'https://twitter.com/Twitter/status/1293239745695211520', 'url': '', 'description': 'We tested, you Tweeted, and now we’re rolling it out to everyone! https://t.co/w6Q3Q6DiKz', 'thumbnail': 'https://pbs.twimg.com/media/EfJ-C-JU0AAQL_C.jpg', 'screen_name': 'Twitter', 'type': 'Image', 'images': ['https://pbs.twimg.com/media/EfJ-C-JU0AAQL_C.jpg', 'https://pbs.twimg.com/media/EfJ-aHlU0AAU1kq.jpg', '', '', '2'], 'time': 'Tue Aug 11 17:35:57 +0000 2020', 'qrtURL': None, 'nsfw': False, 'verified': True, 'size': {}}
|
||||
|
||||
@ -34,7 +34,7 @@ def compareDict(original,compare):
|
||||
for key in original:
|
||||
assert key in compare
|
||||
if type(compare[key]) is not dict:
|
||||
if key == 'verified' and compare[key]!=original[key]:
|
||||
if (key == 'verified' or key== 'time') and compare[key]!=original[key]:
|
||||
continue # does not match as test data was from before verification changes
|
||||
assert compare[key]==original[key]
|
||||
else:
|
||||
@ -46,7 +46,7 @@ def test_textTweetExtract():
|
||||
assert tweet["full_text"]==textVNF_compare['description']
|
||||
assert tweet["user"]["screen_name"]=="jack"
|
||||
assert 'extended_entities' not in tweet
|
||||
assert tweet["is_quote_status"]==False
|
||||
|
||||
|
||||
def test_UserExtract():
|
||||
user = twExtract.extractUser(testUser)
|
||||
@ -76,7 +76,7 @@ def test_videoTweetExtract():
|
||||
video = tweet['extended_entities']["media"][0]
|
||||
assert video["media_url_https"]=="https://pbs.twimg.com/media/EYeX7akWsAIP1_1.jpg"
|
||||
assert video["type"]=="video"
|
||||
assert tweet["is_quote_status"]==False
|
||||
|
||||
|
||||
def test_mediaTweetExtract():
|
||||
tweet = twExtract.extractStatus(testMediaTweet)
|
||||
@ -87,7 +87,7 @@ def test_mediaTweetExtract():
|
||||
video = tweet['extended_entities']["media"][0]
|
||||
assert video["media_url_https"]=="https://pbs.twimg.com/media/D4TS4xeX4AA02DI.jpg"
|
||||
assert video["type"]=="photo"
|
||||
assert tweet["is_quote_status"]==False
|
||||
|
||||
|
||||
def test_multimediaTweetExtract():
|
||||
tweet = twExtract.extractStatus(testMultiMediaTweet)
|
||||
@ -208,11 +208,11 @@ def test_veryLongEmbed():
|
||||
assert resp.status_code==200
|
||||
|
||||
def test_embedFromOutdatedCache(): # presets a cache that has VNF's with missing fields; there's probably a better way to do this
|
||||
cache.setCache({"https://twitter.com/Twitter/status/1118295916874739714":{"description":"On profile pages, we used to only show someone’s replies, not the original Tweet 🙄 Now we’re showing both so you can follow the conversation more easily! https://t.co/LSBEZYFqmY","hits":0,"images":["https://pbs.twimg.com/media/D4TS4xeX4AA02DI.jpg","","","","1"],"likes":5033,"nsfw":False,"pfp":"http://pbs.twimg.com/profile_images/1488548719062654976/u6qfBBkF_normal.jpg","qrt":{},"rts":754,"screen_name":"Twitter","thumbnail":"https://pbs.twimg.com/media/D4TS4xeX4AA02DI.jpg","time":"Tue Apr 16 23:31:38 +0000 2019","tweet":"https://twitter.com/Twitter/status/1118295916874739714","type":"Image","uploader":"Twitter","url":""},
|
||||
"https://twitter.com/Twitter/status/1263145271946551300":{"description":"Testing, testing...\n\nA new way to have a convo with exactly who you want. We’re starting with a small % globally, so keep your 👀 out to see it in action. https://t.co/pV53mvjAVT","hits":0,"images":["","","","",""],"likes":61584,"nsfw":False,"pfp":"http://pbs.twimg.com/profile_images/1488548719062654976/u6qfBBkF_normal.jpg","qrt":{},"rts":17138,"screen_name":"Twitter","thumbnail":"http://pbs.twimg.com/media/EYeX7akWsAIP1_1.jpg","time":"Wed May 20 16:31:15 +0000 2020","tweet":"https://twitter.com/Twitter/status/1263145271946551300","type":"Video","uploader":"Twitter","url":"https://video.twimg.com/amplify_video/1263145212760805376/vid/1280x720/9jous8HM0_duxL0w.mp4?tag=13"},
|
||||
"https://twitter.com/Twitter/status/1293239745695211520":{"description":"We tested, you Tweeted, and now we’re rolling it out to everyone! https://t.co/w6Q3Q6DiKz","hits":0,"images":["https://pbs.twimg.com/media/EfJ-C-JU0AAQL_C.jpg","https://pbs.twimg.com/media/EfJ-aHlU0AAU1kq.jpg","","","2"],"likes":5707,"nsfw":False,"pfp":"http://pbs.twimg.com/profile_images/1488548719062654976/u6qfBBkF_normal.jpg","qrt":{},"rts":1416,"screen_name":"Twitter","thumbnail":"https://pbs.twimg.com/media/EfJ-C-JU0AAQL_C.jpg","time":"Tue Aug 11 17:35:57 +0000 2020","tweet":"https://twitter.com/Twitter/status/1293239745695211520","type":"Image","uploader":"Twitter","url":""},
|
||||
"https://twitter.com/jack/status/20":{"description":"just setting up my twttr","hits":0,"images":["","","","",""],"likes":179863,"nsfw":False,"pfp":"http://pbs.twimg.com/profile_images/1115644092329758721/AFjOr-K8_normal.jpg","qrt":{},"rts":122021,"screen_name":"jack","thumbnail":"","time":"Tue Mar 21 20:50:14 +0000 2006","tweet":"https://twitter.com/jack/status/20","type":"Text","uploader":"jack","url":""},
|
||||
testQrtVideoTweet:{'tweet': 'https://twitter.com/Twitter/status/1494436688554344449', 'url': '', 'description': 'https://twitter.com/TwitterSupport/status/1494386367467593737', 'thumbnail': '', 'uploader': 'Twitter', 'screen_name': 'Twitter', 'pfp': 'http://pbs.twimg.com/profile_images/1488548719062654976/u6qfBBkF_normal.jpg', 'type': 'Text', 'images': ['', '', '', '', ''], 'likes': 5186, 'rts': 703, 'time': 'Thu Feb 17 22:20:46 +0000 2022', 'qrt': {'desc': 'Keep your fave DM convos easily accessible by pinning them! You can now pin up to six conversations that will stay at the top of your DM inbox.\n\nAvailable on Android, iOS, and web. https://t.co/kIjlzf9XLJ', 'handle': 'Twitter Support', 'screen_name': 'TwitterSupport', 'verified': True, 'id': '1494386367467593737'}, 'nsfw': False, 'verified': True, 'size': {}}
|
||||
cache.setCache({"https://twitter.com/Twitter/status/1118295916874739714":{"description":"On profile pages, we used to only show someone’s replies, not the original Tweet 🙄 Now we’re showing both so you can follow the conversation more easily! https://t.co/LSBEZYFqmY","hits":0,"images":["https://pbs.twimg.com/media/D4TS4xeX4AA02DI.jpg","","","","1"],"likes":5033,"nsfw":False,"pfp":"https://pbs.twimg.com/profile_images/1488548719062654976/u6qfBBkF_normal.jpg","qrt":{},"rts":754,"screen_name":"Twitter","thumbnail":"https://pbs.twimg.com/media/D4TS4xeX4AA02DI.jpg","time":"Tue Apr 16 23:31:38 +0000 2019","tweet":"https://twitter.com/Twitter/status/1118295916874739714","type":"Image","uploader":"Twitter","url":""},
|
||||
"https://twitter.com/Twitter/status/1263145271946551300":{"description":"Testing, testing...\n\nA new way to have a convo with exactly who you want. We’re starting with a small % globally, so keep your 👀 out to see it in action. https://t.co/pV53mvjAVT","hits":0,"images":["","","","",""],"likes":61584,"nsfw":False,"pfp":"https://pbs.twimg.com/profile_images/1488548719062654976/u6qfBBkF_normal.jpg","qrt":{},"rts":17138,"screen_name":"Twitter","thumbnail":"https://pbs.twimg.com/media/EYeX7akWsAIP1_1.jpg","time":"Wed May 20 16:31:15 +0000 2020","tweet":"https://twitter.com/Twitter/status/1263145271946551300","type":"Video","uploader":"Twitter","url":"https://video.twimg.com/amplify_video/1263145212760805376/vid/1280x720/9jous8HM0_duxL0w.mp4?tag=13"},
|
||||
"https://twitter.com/Twitter/status/1293239745695211520":{"description":"We tested, you Tweeted, and now we’re rolling it out to everyone! https://t.co/w6Q3Q6DiKz","hits":0,"images":["https://pbs.twimg.com/media/EfJ-C-JU0AAQL_C.jpg","https://pbs.twimg.com/media/EfJ-aHlU0AAU1kq.jpg","","","2"],"likes":5707,"nsfw":False,"pfp":"https://pbs.twimg.com/profile_images/1488548719062654976/u6qfBBkF_normal.jpg","qrt":{},"rts":1416,"screen_name":"Twitter","thumbnail":"https://pbs.twimg.com/media/EfJ-C-JU0AAQL_C.jpg","time":"Tue Aug 11 17:35:57 +0000 2020","tweet":"https://twitter.com/Twitter/status/1293239745695211520","type":"Image","uploader":"Twitter","url":""},
|
||||
"https://twitter.com/jack/status/20":{"description":"just setting up my twttr","hits":0,"images":["","","","",""],"likes":179863,"nsfw":False,"pfp":"https://pbs.twimg.com/profile_images/1115644092329758721/AFjOr-K8_normal.jpg","qrt":{},"rts":122021,"screen_name":"jack","thumbnail":"","time":"Tue Mar 21 20:50:14 +0000 2006","tweet":"https://twitter.com/jack/status/20","type":"Text","uploader":"jack","url":""},
|
||||
testQrtVideoTweet:{'tweet': 'https://twitter.com/Twitter/status/1494436688554344449', 'url': '', 'description': 'https://twitter.com/TwitterSupport/status/1494386367467593737', 'thumbnail': '', 'uploader': 'Twitter', 'screen_name': 'Twitter', 'pfp': 'https://pbs.twimg.com/profile_images/1488548719062654976/u6qfBBkF_normal.jpg', 'type': 'Text', 'images': ['', '', '', '', ''], 'likes': 5186, 'rts': 703, 'time': 'Thu Feb 17 22:20:46 +0000 2022', 'qrt': {'desc': 'Keep your fave DM convos easily accessible by pinning them! You can now pin up to six conversations that will stay at the top of your DM inbox.\n\nAvailable on Android, iOS, and web. https://t.co/kIjlzf9XLJ', 'handle': 'Twitter Support', 'screen_name': 'TwitterSupport', 'verified': True, 'id': '1494386367467593737'}, 'nsfw': False, 'verified': True, 'size': {}}
|
||||
})
|
||||
#embed time
|
||||
resp = client.get(testTextTweet.replace("https://twitter.com",""),headers={"User-Agent":"test"})
|
||||
|
@ -20,52 +20,89 @@ def getGuestToken():
|
||||
guestToken = json.loads(r.text)["guest_token"]
|
||||
return guestToken
|
||||
|
||||
def extractStatus_fallback(url):
|
||||
try:
|
||||
# get tweet ID
|
||||
m = re.search(pathregex, url)
|
||||
if m is None:
|
||||
raise twExtractError.TwExtractError(400, "Extract error")
|
||||
twid = m.group(2)
|
||||
if config["config"]["workaroundTokens"] == None:
|
||||
raise twExtractError.TwExtractError(400, "Extract error (no tokens defined)")
|
||||
# get tweet
|
||||
tokens = config["config"]["workaroundTokens"].split(",")
|
||||
for authToken in tokens:
|
||||
try:
|
||||
csrfToken=str(uuid.uuid4()).replace('-', '')
|
||||
tweet = requests.get("https://api.twitter.com/1.1/statuses/show/" + twid + ".json?tweet_mode=extended&cards_platform=Web-12&include_cards=1&include_reply_count=1&include_user_entities=0", headers={"Authorization":bearer,"Cookie":f"auth_token={authToken}; ct0={csrfToken}; ","x-twitter-active-user":"yes","x-twitter-auth-type":"OAuth2Session","x-twitter-client-language":"en","x-csrf-token":csrfToken,"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0"})
|
||||
output = tweet.json()
|
||||
if "errors" in output:
|
||||
# try another token
|
||||
continue
|
||||
except Exception as e:
|
||||
def extractStatus_token(url):
|
||||
# get tweet ID
|
||||
m = re.search(pathregex, url)
|
||||
if m is None:
|
||||
raise twExtractError.TwExtractError(400, "Extract error")
|
||||
twid = m.group(2)
|
||||
if config["config"]["workaroundTokens"] == None:
|
||||
raise twExtractError.TwExtractError(400, "Extract error (no tokens defined)")
|
||||
# get tweet
|
||||
tokens = config["config"]["workaroundTokens"].split(",")
|
||||
for authToken in tokens:
|
||||
try:
|
||||
csrfToken=str(uuid.uuid4()).replace('-', '')
|
||||
tweet = requests.get("https://api.twitter.com/1.1/statuses/show/" + twid + ".json?tweet_mode=extended&cards_platform=Web-12&include_cards=1&include_reply_count=1&include_user_entities=0", headers={"Authorization":bearer,"Cookie":f"auth_token={authToken}; ct0={csrfToken}; ","x-twitter-active-user":"yes","x-twitter-auth-type":"OAuth2Session","x-twitter-client-language":"en","x-csrf-token":csrfToken,"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0"})
|
||||
output = tweet.json()
|
||||
if "errors" in output:
|
||||
# try another token
|
||||
continue
|
||||
return output
|
||||
raise twExtractError.TwExtractError(400, "Extract error")
|
||||
except Exception as e:
|
||||
raise twExtractError.TwExtractError(400, "Extract error")
|
||||
except Exception as e:
|
||||
continue
|
||||
return output
|
||||
raise twExtractError.TwExtractError(400, "Extract error")
|
||||
|
||||
def extractStatus_guestToken(url):
|
||||
# get tweet ID
|
||||
m = re.search(pathregex, url)
|
||||
if m is None:
|
||||
return extractStatus_token(url)
|
||||
twid = m.group(2)
|
||||
# get guest token
|
||||
guestToken = getGuestToken()
|
||||
# get tweet
|
||||
tweet = requests.get("https://api.twitter.com/1.1/statuses/show/" + twid + ".json?tweet_mode=extended&cards_platform=Web-12&include_cards=1&include_reply_count=1&include_user_entities=0", headers={"Authorization":bearer, "x-guest-token":guestToken})
|
||||
output = tweet.json()
|
||||
if "errors" in output:
|
||||
# pick the first error and create a twExtractError
|
||||
error = output["errors"][0]
|
||||
raise twExtractError.TwExtractError(error["code"], error["message"])
|
||||
return output
|
||||
|
||||
def extractStatus_syndication(url):
|
||||
# https://github.com/mikf/gallery-dl/blob/46cae04aa3a113c7b6bbee1bb468669564b14ae8/gallery_dl/extractor/twitter.py#L1784
|
||||
m = re.search(pathregex, url)
|
||||
if m is None:
|
||||
return extractStatus_token(url)
|
||||
twid = m.group(2)
|
||||
tweet = requests.get("https://cdn.syndication.twimg.com/tweet-result?id=" + twid)
|
||||
|
||||
|
||||
if tweet.status_code == 404:
|
||||
raise twExtractError.TwExtractError(404, "Tweet not found")
|
||||
output = tweet.json()
|
||||
if "errors" in output:
|
||||
# pick the first error and create a twExtractError
|
||||
error = output["errors"][0]
|
||||
raise twExtractError.TwExtractError(error["code"], error["message"])
|
||||
|
||||
# change returned data to match the one from the other methods
|
||||
output['full_text'] = output['text']
|
||||
output['user']['profile_image_url'] = output['user']['profile_image_url_https']
|
||||
output['retweet_count']=0
|
||||
if 'mediaDetails' in output:
|
||||
output['extended_entities'] = {'media':output['mediaDetails']}
|
||||
for media in output['extended_entities']['media']:
|
||||
media['media_url'] = media['media_url_https']
|
||||
if 'quoted_tweet' in output:
|
||||
output['quoted_status'] = output['quoted_tweet']
|
||||
quotedID=output['quoted_tweet']['id_str']
|
||||
quotedScreenName=output['quoted_tweet']['user']['screen_name']
|
||||
output['quoted_status_permalink'] = {'expanded':f"https://twitter.com/{quotedScreenName}/status/{quotedID}"}
|
||||
|
||||
#output['user']['']
|
||||
|
||||
return output
|
||||
|
||||
def extractStatus(url):
|
||||
try:
|
||||
# get tweet ID
|
||||
m = re.search(pathregex, url)
|
||||
if m is None:
|
||||
return extractStatus_fallback(url)
|
||||
twid = m.group(2)
|
||||
# get guest token
|
||||
guestToken = getGuestToken()
|
||||
# get tweet
|
||||
tweet = requests.get("https://api.twitter.com/1.1/statuses/show/" + twid + ".json?tweet_mode=extended&cards_platform=Web-12&include_cards=1&include_reply_count=1&include_user_entities=0", headers={"Authorization":bearer, "x-guest-token":guestToken})
|
||||
output = tweet.json()
|
||||
if "errors" in output:
|
||||
# pick the first error and create a twExtractError
|
||||
error = output["errors"][0]
|
||||
raise twExtractError.TwExtractError(error["code"], error["message"])
|
||||
return output
|
||||
except Exception as e:
|
||||
return extractStatus_fallback(url)
|
||||
methods=[extractStatus_guestToken,extractStatus_syndication,extractStatus_token]
|
||||
for method in methods:
|
||||
try:
|
||||
return method(url)
|
||||
except twExtractError.TwExtractError as e:
|
||||
continue
|
||||
raise twExtractError.TwExtractError(400, "Extract error")
|
||||
|
||||
def extractUser(url):
|
||||
useId=True
|
||||
|
Loading…
x
Reference in New Issue
Block a user