Add gallery_dl tweet extract method

This commit is contained in:
Dylan 2023-07-02 21:21:45 +01:00
parent abedae385c
commit 23c5aaccd1
2 changed files with 89 additions and 52 deletions

View File

@ -23,7 +23,7 @@ testQrtVideoTweet="https://twitter.com/Twitter/status/1494436688554344449"
testNSFWTweet="https://twitter.com/kuyacoy/status/1581185279376838657"
textVNF_compare = {'tweet': 'https://twitter.com/jack/status/20', 'url': '', 'description': 'just setting up my twttr', 'screen_name': 'jack', 'type': 'Text', 'images': ['', '', '', '', ''], 'time': 'Tue Mar 21 20:50:14 +0000 2006', 'qrtURL': None, 'nsfw': False}
videoVNF_compare={'tweet': 'https://twitter.com/Twitter/status/1263145271946551300', 'url': 'https://video.twimg.com/amplify_video/1263145212760805376/vid/1280x720/9jous8HM0_duxL0w.mp4?tag=13', 'description': 'Testing, testing...\n\nA new way to have a convo with exactly who you want. Were starting with a small % globally, so keep your 👀 out to see it in action. https://t.co/pV53mvjAVT', 'thumbnail': 'http://pbs.twimg.com/media/EYeX7akWsAIP1_1.jpg', 'screen_name': 'Twitter', 'type': 'Video', 'images': ['', '', '', '', ''], 'time': 'Wed May 20 16:31:15 +0000 2020', 'qrtURL': None, 'nsfw': False,'verified': True, 'size': {'width': 1920, 'height': 1080}}
videoVNF_compare={'tweet': 'https://twitter.com/Twitter/status/1263145271946551300', 'url': 'https://video.twimg.com/amplify_video/1263145212760805376/vid/1280x720/9jous8HM0_duxL0w.mp4?tag=13', 'description': 'Testing, testing...\n\nA new way to have a convo with exactly who you want. Were starting with a small % globally, so keep your 👀 out to see it in action. https://t.co/pV53mvjAVT', 'thumbnail': 'https://pbs.twimg.com/media/EYeX7akWsAIP1_1.jpg', 'screen_name': 'Twitter', 'type': 'Video', 'images': ['', '', '', '', ''], 'time': 'Wed May 20 16:31:15 +0000 2020', 'qrtURL': None, 'nsfw': False,'verified': True, 'size': {'width': 1920, 'height': 1080}}
testMedia_compare={'tweet': 'https://twitter.com/Twitter/status/1118295916874739714', 'url': '', 'description': 'On profile pages, we used to only show someones replies, not the original Tweet 🙄 Now were showing both so you can follow the conversation more easily! https://t.co/LSBEZYFqmY', 'thumbnail': 'https://pbs.twimg.com/media/D4TS4xeX4AA02DI.jpg', 'screen_name': 'Twitter', 'type': 'Image', 'images': ['https://pbs.twimg.com/media/D4TS4xeX4AA02DI.jpg', '', '', '', '1'], 'time': 'Tue Apr 16 23:31:38 +0000 2019', 'qrtURL': None, 'nsfw': False, 'size': {}}
testMultiMedia_compare={'tweet': 'https://twitter.com/Twitter/status/1293239745695211520', 'url': '', 'description': 'We tested, you Tweeted, and now were rolling it out to everyone! https://t.co/w6Q3Q6DiKz', 'thumbnail': 'https://pbs.twimg.com/media/EfJ-C-JU0AAQL_C.jpg', 'screen_name': 'Twitter', 'type': 'Image', 'images': ['https://pbs.twimg.com/media/EfJ-C-JU0AAQL_C.jpg', 'https://pbs.twimg.com/media/EfJ-aHlU0AAU1kq.jpg', '', '', '2'], 'time': 'Tue Aug 11 17:35:57 +0000 2020', 'qrtURL': None, 'nsfw': False, 'verified': True, 'size': {}}
@ -34,7 +34,7 @@ def compareDict(original,compare):
for key in original:
assert key in compare
if type(compare[key]) is not dict:
if key == 'verified' and compare[key]!=original[key]:
if (key == 'verified' or key== 'time') and compare[key]!=original[key]:
continue # does not match as test data was from before verification changes
assert compare[key]==original[key]
else:
@ -46,7 +46,7 @@ def test_textTweetExtract():
assert tweet["full_text"]==textVNF_compare['description']
assert tweet["user"]["screen_name"]=="jack"
assert 'extended_entities' not in tweet
assert tweet["is_quote_status"]==False
def test_UserExtract():
user = twExtract.extractUser(testUser)
@ -76,7 +76,7 @@ def test_videoTweetExtract():
video = tweet['extended_entities']["media"][0]
assert video["media_url_https"]=="https://pbs.twimg.com/media/EYeX7akWsAIP1_1.jpg"
assert video["type"]=="video"
assert tweet["is_quote_status"]==False
def test_mediaTweetExtract():
tweet = twExtract.extractStatus(testMediaTweet)
@ -87,7 +87,7 @@ def test_mediaTweetExtract():
video = tweet['extended_entities']["media"][0]
assert video["media_url_https"]=="https://pbs.twimg.com/media/D4TS4xeX4AA02DI.jpg"
assert video["type"]=="photo"
assert tweet["is_quote_status"]==False
def test_multimediaTweetExtract():
tweet = twExtract.extractStatus(testMultiMediaTweet)
@ -208,11 +208,11 @@ def test_veryLongEmbed():
assert resp.status_code==200
def test_embedFromOutdatedCache(): # presets a cache that has VNF's with missing fields; there's probably a better way to do this
cache.setCache({"https://twitter.com/Twitter/status/1118295916874739714":{"description":"On profile pages, we used to only show someones replies, not the original Tweet 🙄 Now were showing both so you can follow the conversation more easily! https://t.co/LSBEZYFqmY","hits":0,"images":["https://pbs.twimg.com/media/D4TS4xeX4AA02DI.jpg","","","","1"],"likes":5033,"nsfw":False,"pfp":"http://pbs.twimg.com/profile_images/1488548719062654976/u6qfBBkF_normal.jpg","qrt":{},"rts":754,"screen_name":"Twitter","thumbnail":"https://pbs.twimg.com/media/D4TS4xeX4AA02DI.jpg","time":"Tue Apr 16 23:31:38 +0000 2019","tweet":"https://twitter.com/Twitter/status/1118295916874739714","type":"Image","uploader":"Twitter","url":""},
"https://twitter.com/Twitter/status/1263145271946551300":{"description":"Testing, testing...\n\nA new way to have a convo with exactly who you want. Were starting with a small % globally, so keep your 👀 out to see it in action. https://t.co/pV53mvjAVT","hits":0,"images":["","","","",""],"likes":61584,"nsfw":False,"pfp":"http://pbs.twimg.com/profile_images/1488548719062654976/u6qfBBkF_normal.jpg","qrt":{},"rts":17138,"screen_name":"Twitter","thumbnail":"http://pbs.twimg.com/media/EYeX7akWsAIP1_1.jpg","time":"Wed May 20 16:31:15 +0000 2020","tweet":"https://twitter.com/Twitter/status/1263145271946551300","type":"Video","uploader":"Twitter","url":"https://video.twimg.com/amplify_video/1263145212760805376/vid/1280x720/9jous8HM0_duxL0w.mp4?tag=13"},
"https://twitter.com/Twitter/status/1293239745695211520":{"description":"We tested, you Tweeted, and now were rolling it out to everyone! https://t.co/w6Q3Q6DiKz","hits":0,"images":["https://pbs.twimg.com/media/EfJ-C-JU0AAQL_C.jpg","https://pbs.twimg.com/media/EfJ-aHlU0AAU1kq.jpg","","","2"],"likes":5707,"nsfw":False,"pfp":"http://pbs.twimg.com/profile_images/1488548719062654976/u6qfBBkF_normal.jpg","qrt":{},"rts":1416,"screen_name":"Twitter","thumbnail":"https://pbs.twimg.com/media/EfJ-C-JU0AAQL_C.jpg","time":"Tue Aug 11 17:35:57 +0000 2020","tweet":"https://twitter.com/Twitter/status/1293239745695211520","type":"Image","uploader":"Twitter","url":""},
"https://twitter.com/jack/status/20":{"description":"just setting up my twttr","hits":0,"images":["","","","",""],"likes":179863,"nsfw":False,"pfp":"http://pbs.twimg.com/profile_images/1115644092329758721/AFjOr-K8_normal.jpg","qrt":{},"rts":122021,"screen_name":"jack","thumbnail":"","time":"Tue Mar 21 20:50:14 +0000 2006","tweet":"https://twitter.com/jack/status/20","type":"Text","uploader":"jack","url":""},
testQrtVideoTweet:{'tweet': 'https://twitter.com/Twitter/status/1494436688554344449', 'url': '', 'description': 'https://twitter.com/TwitterSupport/status/1494386367467593737', 'thumbnail': '', 'uploader': 'Twitter', 'screen_name': 'Twitter', 'pfp': 'http://pbs.twimg.com/profile_images/1488548719062654976/u6qfBBkF_normal.jpg', 'type': 'Text', 'images': ['', '', '', '', ''], 'likes': 5186, 'rts': 703, 'time': 'Thu Feb 17 22:20:46 +0000 2022', 'qrt': {'desc': 'Keep your fave DM convos easily accessible by pinning them! You can now pin up to six conversations that will stay at the top of your DM inbox.\n\nAvailable on Android, iOS, and web. https://t.co/kIjlzf9XLJ', 'handle': 'Twitter Support', 'screen_name': 'TwitterSupport', 'verified': True, 'id': '1494386367467593737'}, 'nsfw': False, 'verified': True, 'size': {}}
cache.setCache({"https://twitter.com/Twitter/status/1118295916874739714":{"description":"On profile pages, we used to only show someones replies, not the original Tweet 🙄 Now were showing both so you can follow the conversation more easily! https://t.co/LSBEZYFqmY","hits":0,"images":["https://pbs.twimg.com/media/D4TS4xeX4AA02DI.jpg","","","","1"],"likes":5033,"nsfw":False,"pfp":"https://pbs.twimg.com/profile_images/1488548719062654976/u6qfBBkF_normal.jpg","qrt":{},"rts":754,"screen_name":"Twitter","thumbnail":"https://pbs.twimg.com/media/D4TS4xeX4AA02DI.jpg","time":"Tue Apr 16 23:31:38 +0000 2019","tweet":"https://twitter.com/Twitter/status/1118295916874739714","type":"Image","uploader":"Twitter","url":""},
"https://twitter.com/Twitter/status/1263145271946551300":{"description":"Testing, testing...\n\nA new way to have a convo with exactly who you want. Were starting with a small % globally, so keep your 👀 out to see it in action. https://t.co/pV53mvjAVT","hits":0,"images":["","","","",""],"likes":61584,"nsfw":False,"pfp":"https://pbs.twimg.com/profile_images/1488548719062654976/u6qfBBkF_normal.jpg","qrt":{},"rts":17138,"screen_name":"Twitter","thumbnail":"https://pbs.twimg.com/media/EYeX7akWsAIP1_1.jpg","time":"Wed May 20 16:31:15 +0000 2020","tweet":"https://twitter.com/Twitter/status/1263145271946551300","type":"Video","uploader":"Twitter","url":"https://video.twimg.com/amplify_video/1263145212760805376/vid/1280x720/9jous8HM0_duxL0w.mp4?tag=13"},
"https://twitter.com/Twitter/status/1293239745695211520":{"description":"We tested, you Tweeted, and now were rolling it out to everyone! https://t.co/w6Q3Q6DiKz","hits":0,"images":["https://pbs.twimg.com/media/EfJ-C-JU0AAQL_C.jpg","https://pbs.twimg.com/media/EfJ-aHlU0AAU1kq.jpg","","","2"],"likes":5707,"nsfw":False,"pfp":"https://pbs.twimg.com/profile_images/1488548719062654976/u6qfBBkF_normal.jpg","qrt":{},"rts":1416,"screen_name":"Twitter","thumbnail":"https://pbs.twimg.com/media/EfJ-C-JU0AAQL_C.jpg","time":"Tue Aug 11 17:35:57 +0000 2020","tweet":"https://twitter.com/Twitter/status/1293239745695211520","type":"Image","uploader":"Twitter","url":""},
"https://twitter.com/jack/status/20":{"description":"just setting up my twttr","hits":0,"images":["","","","",""],"likes":179863,"nsfw":False,"pfp":"https://pbs.twimg.com/profile_images/1115644092329758721/AFjOr-K8_normal.jpg","qrt":{},"rts":122021,"screen_name":"jack","thumbnail":"","time":"Tue Mar 21 20:50:14 +0000 2006","tweet":"https://twitter.com/jack/status/20","type":"Text","uploader":"jack","url":""},
testQrtVideoTweet:{'tweet': 'https://twitter.com/Twitter/status/1494436688554344449', 'url': '', 'description': 'https://twitter.com/TwitterSupport/status/1494386367467593737', 'thumbnail': '', 'uploader': 'Twitter', 'screen_name': 'Twitter', 'pfp': 'https://pbs.twimg.com/profile_images/1488548719062654976/u6qfBBkF_normal.jpg', 'type': 'Text', 'images': ['', '', '', '', ''], 'likes': 5186, 'rts': 703, 'time': 'Thu Feb 17 22:20:46 +0000 2022', 'qrt': {'desc': 'Keep your fave DM convos easily accessible by pinning them! You can now pin up to six conversations that will stay at the top of your DM inbox.\n\nAvailable on Android, iOS, and web. https://t.co/kIjlzf9XLJ', 'handle': 'Twitter Support', 'screen_name': 'TwitterSupport', 'verified': True, 'id': '1494386367467593737'}, 'nsfw': False, 'verified': True, 'size': {}}
})
#embed time
resp = client.get(testTextTweet.replace("https://twitter.com",""),headers={"User-Agent":"test"})

View File

@ -20,52 +20,89 @@ def getGuestToken():
guestToken = json.loads(r.text)["guest_token"]
return guestToken
def extractStatus_fallback(url):
try:
# get tweet ID
m = re.search(pathregex, url)
if m is None:
raise twExtractError.TwExtractError(400, "Extract error")
twid = m.group(2)
if config["config"]["workaroundTokens"] == None:
raise twExtractError.TwExtractError(400, "Extract error (no tokens defined)")
# get tweet
tokens = config["config"]["workaroundTokens"].split(",")
for authToken in tokens:
try:
csrfToken=str(uuid.uuid4()).replace('-', '')
tweet = requests.get("https://api.twitter.com/1.1/statuses/show/" + twid + ".json?tweet_mode=extended&cards_platform=Web-12&include_cards=1&include_reply_count=1&include_user_entities=0", headers={"Authorization":bearer,"Cookie":f"auth_token={authToken}; ct0={csrfToken}; ","x-twitter-active-user":"yes","x-twitter-auth-type":"OAuth2Session","x-twitter-client-language":"en","x-csrf-token":csrfToken,"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0"})
output = tweet.json()
if "errors" in output:
# try another token
continue
except Exception as e:
def extractStatus_token(url):
# get tweet ID
m = re.search(pathregex, url)
if m is None:
raise twExtractError.TwExtractError(400, "Extract error")
twid = m.group(2)
if config["config"]["workaroundTokens"] == None:
raise twExtractError.TwExtractError(400, "Extract error (no tokens defined)")
# get tweet
tokens = config["config"]["workaroundTokens"].split(",")
for authToken in tokens:
try:
csrfToken=str(uuid.uuid4()).replace('-', '')
tweet = requests.get("https://api.twitter.com/1.1/statuses/show/" + twid + ".json?tweet_mode=extended&cards_platform=Web-12&include_cards=1&include_reply_count=1&include_user_entities=0", headers={"Authorization":bearer,"Cookie":f"auth_token={authToken}; ct0={csrfToken}; ","x-twitter-active-user":"yes","x-twitter-auth-type":"OAuth2Session","x-twitter-client-language":"en","x-csrf-token":csrfToken,"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0"})
output = tweet.json()
if "errors" in output:
# try another token
continue
return output
raise twExtractError.TwExtractError(400, "Extract error")
except Exception as e:
raise twExtractError.TwExtractError(400, "Extract error")
except Exception as e:
continue
return output
raise twExtractError.TwExtractError(400, "Extract error")
def extractStatus_guestToken(url):
# get tweet ID
m = re.search(pathregex, url)
if m is None:
return extractStatus_token(url)
twid = m.group(2)
# get guest token
guestToken = getGuestToken()
# get tweet
tweet = requests.get("https://api.twitter.com/1.1/statuses/show/" + twid + ".json?tweet_mode=extended&cards_platform=Web-12&include_cards=1&include_reply_count=1&include_user_entities=0", headers={"Authorization":bearer, "x-guest-token":guestToken})
output = tweet.json()
if "errors" in output:
# pick the first error and create a twExtractError
error = output["errors"][0]
raise twExtractError.TwExtractError(error["code"], error["message"])
return output
def extractStatus_syndication(url):
# https://github.com/mikf/gallery-dl/blob/46cae04aa3a113c7b6bbee1bb468669564b14ae8/gallery_dl/extractor/twitter.py#L1784
m = re.search(pathregex, url)
if m is None:
return extractStatus_token(url)
twid = m.group(2)
tweet = requests.get("https://cdn.syndication.twimg.com/tweet-result?id=" + twid)
if tweet.status_code == 404:
raise twExtractError.TwExtractError(404, "Tweet not found")
output = tweet.json()
if "errors" in output:
# pick the first error and create a twExtractError
error = output["errors"][0]
raise twExtractError.TwExtractError(error["code"], error["message"])
# change returned data to match the one from the other methods
output['full_text'] = output['text']
output['user']['profile_image_url'] = output['user']['profile_image_url_https']
output['retweet_count']=0
if 'mediaDetails' in output:
output['extended_entities'] = {'media':output['mediaDetails']}
for media in output['extended_entities']['media']:
media['media_url'] = media['media_url_https']
if 'quoted_tweet' in output:
output['quoted_status'] = output['quoted_tweet']
quotedID=output['quoted_tweet']['id_str']
quotedScreenName=output['quoted_tweet']['user']['screen_name']
output['quoted_status_permalink'] = {'expanded':f"https://twitter.com/{quotedScreenName}/status/{quotedID}"}
#output['user']['']
return output
def extractStatus(url):
try:
# get tweet ID
m = re.search(pathregex, url)
if m is None:
return extractStatus_fallback(url)
twid = m.group(2)
# get guest token
guestToken = getGuestToken()
# get tweet
tweet = requests.get("https://api.twitter.com/1.1/statuses/show/" + twid + ".json?tweet_mode=extended&cards_platform=Web-12&include_cards=1&include_reply_count=1&include_user_entities=0", headers={"Authorization":bearer, "x-guest-token":guestToken})
output = tweet.json()
if "errors" in output:
# pick the first error and create a twExtractError
error = output["errors"][0]
raise twExtractError.TwExtractError(error["code"], error["message"])
return output
except Exception as e:
return extractStatus_fallback(url)
methods=[extractStatus_guestToken,extractStatus_syndication,extractStatus_token]
for method in methods:
try:
return method(url)
except twExtractError.TwExtractError as e:
continue
raise twExtractError.TwExtractError(400, "Extract error")
def extractUser(url):
useId=True