From 764e30be02fe04904c4b0eb7d574cb69b0a79e5c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 27 Jan 2025 01:24:25 +0000 Subject: [PATCH 01/28] Bump boto3 from 1.35.18 to 1.36.6 Bumps [boto3](https://github.com/boto/boto3) from 1.35.18 to 1.36.6. - [Release notes](https://github.com/boto/boto3/releases) - [Commits](https://github.com/boto/boto3/compare/1.35.18...1.36.6) --- updated-dependencies: - dependency-name: boto3 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index c2a7318..15f2d11 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ pymongo==4.8.0 -boto3==1.35.18 +boto3==1.36.6 requests==2.32.3 Pillow==10.4.0 Flask==2.2.3 From cd392168918bda7659ef215845b9370dbdae6c3b Mon Sep 17 00:00:00 2001 From: Dylan Date: Sun, 22 Jun 2025 13:20:01 +0100 Subject: [PATCH 02/28] Revert "Updated tweet history endpoint & temporarily using guest token" This reverts commit fc17870b0663b524a8c87347d8041de78a978945. --- twExtract/__init__.py | 11 +++++------ vxApi.py | 8 -------- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/twExtract/__init__.py b/twExtract/__init__.py index e4dccae..1af5bf3 100644 --- a/twExtract/__init__.py +++ b/twExtract/__init__.py @@ -38,8 +38,8 @@ tweetDetailGraphqlFeatures='{"rweb_tipjar_consumption_enabled":true,"responsive_ tweetDetailGraphql_api="e7RKseIxLu7HgkWNKZ6qnw" # this is for UserTweets endpoint -tweetFeedGraphqlFeatures='{"rweb_video_screen_enabled":false,"profile_label_improvements_pcf_label_in_post_enabled":true,"rweb_tipjar_consumption_enabled":true,"verified_phone_label_enabled":false,"creator_subscriptions_tweet_preview_api_enabled":true,"responsive_web_graphql_timeline_navigation_enabled":true,"responsive_web_graphql_skip_user_profile_image_extensions_enabled":false,"premium_content_api_read_enabled":false,"communities_web_enable_tweet_community_results_fetch":true,"c9s_tweet_anatomy_moderator_badge_enabled":true,"responsive_web_grok_analyze_button_fetch_trends_enabled":false,"responsive_web_grok_analyze_post_followups_enabled":false,"responsive_web_jetfuel_frame":false,"responsive_web_grok_share_attachment_enabled":true,"articles_preview_enabled":true,"responsive_web_edit_tweet_api_enabled":true,"graphql_is_translatable_rweb_tweet_is_translatable_enabled":true,"view_counts_everywhere_api_enabled":true,"longform_notetweets_consumption_enabled":true,"responsive_web_twitter_article_tweet_consumption_enabled":true,"tweet_awards_web_tipping_enabled":false,"responsive_web_grok_show_grok_translated_post":false,"responsive_web_grok_analysis_button_from_backend":true,"creator_subscriptions_quote_tweet_preview_enabled":false,"freedom_of_speech_not_reach_fetch_enabled":true,"standardized_nudges_misinfo":true,"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled":true,"longform_notetweets_rich_text_read_enabled":true,"longform_notetweets_inline_media_enabled":true,"responsive_web_grok_image_annotation_enabled":true,"responsive_web_enhance_cards_enabled":false}' -tweetFeedGraphql_api="Li2XXGESVev94TzFtntrgA" +tweetFeedGraphqlFeatures='{"rweb_video_screen_enabled":false,"profile_label_improvements_pcf_label_in_post_enabled":true,"rweb_tipjar_consumption_enabled":true,"verified_phone_label_enabled":false,"creator_subscriptions_tweet_preview_api_enabled":true,"responsive_web_graphql_timeline_navigation_enabled":true,"responsive_web_graphql_skip_user_profile_image_extensions_enabled":false,"premium_content_api_read_enabled":false,"communities_web_enable_tweet_community_results_fetch":true,"c9s_tweet_anatomy_moderator_badge_enabled":true,"responsive_web_grok_analyze_button_fetch_trends_enabled":false,"responsive_web_grok_analyze_post_followups_enabled":true,"responsive_web_jetfuel_frame":false,"responsive_web_grok_share_attachment_enabled":true,"articles_preview_enabled":true,"responsive_web_edit_tweet_api_enabled":true,"graphql_is_translatable_rweb_tweet_is_translatable_enabled":true,"view_counts_everywhere_api_enabled":true,"longform_notetweets_consumption_enabled":true,"responsive_web_twitter_article_tweet_consumption_enabled":true,"tweet_awards_web_tipping_enabled":false,"responsive_web_grok_show_grok_translated_post":false,"responsive_web_grok_analysis_button_from_backend":true,"creator_subscriptions_quote_tweet_preview_enabled":false,"freedom_of_speech_not_reach_fetch_enabled":true,"standardized_nudges_misinfo":true,"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled":true,"longform_notetweets_rich_text_read_enabled":true,"longform_notetweets_inline_media_enabled":true,"responsive_web_grok_image_annotation_enabled":true,"responsive_web_enhance_cards_enabled":false}' +tweetFeedGraphql_api="OAx9yEcW3JA9bPo63pcYlA" twitterUrl = "x.com" # doubt this will change but just in case class TwExtractError(Exception): @@ -109,7 +109,6 @@ def getAuthHeaders(btoken,authToken=None,guestToken=None): headers["x-twitter-auth-type"] = "OAuth2Session" if guestToken is not None: headers["x-guest-token"] = guestToken - headers["Cookie"] = f"gt={guestToken}; ct0={csrfToken}; guest_id=v1:174804309415864668;" return headers @@ -523,11 +522,11 @@ def extractUserFeedFromId(userId,workaroundTokens): # TODO: https://api.twitter.com/graphql/x31u1gdnjcqtiVZFc1zWnQ/UserWithProfileTweetsQueryV2?variables={"cursor":"?","includeTweetImpression":true,"includeHasBirdwatchNotes":false,"includeEditPerspective":false,"includeEditControl":true,"count":40,"rest_id":"12","includeTweetVisibilityNudge":true,"autoplay_enabled":true}&features={"longform_notetweets_inline_media_enabled":true,"super_follow_badge_privacy_enabled":true,"longform_notetweets_rich_text_read_enabled":true,"super_follow_user_api_enabled":true,"unified_cards_ad_metadata_container_dynamic_card_content_query_enabled":true,"super_follow_tweet_api_enabled":true,"articles_api_enabled":true,"android_graphql_skip_api_media_color_palette":true,"creator_subscriptions_tweet_preview_api_enabled":true,"freedom_of_speech_not_reach_fetch_enabled":true,"tweetypie_unmention_optimization_enabled":true,"longform_notetweets_consumption_enabled":true,"subscriptions_verification_info_enabled":true,"blue_business_profile_image_shape_enabled":true,"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled":true,"immersive_video_status_linkable_timestamps":false,"super_follow_exclusive_tweet_notifications_enabled":true} continue try: - vars = json.loads('{"userId":"0","count":20,"includePromotedContent":true,"withQuickPromoteEligibilityTweetFields":true,"withVoice":true}') + vars = json.loads('{"userId":"0","count":20,"includePromotedContent":true,"withCommunity":true,"withVoice":true}') vars['userId'] = str(userId) vars['includePromotedContent'] = False # idk if this works - reqHeaders = getAuthHeaders(v2bearer,guestToken=getGuestToken()) - endpoint=f"/i/api/graphql/{tweetFeedGraphql_api}/UserTweets" + reqHeaders = getAuthHeaders(bearer,authToken=authToken) + endpoint=f"/i/api/graphql/{tweetFeedGraphql_api}/UserTweetsAndReplies" reqHeaders["x-client-transaction-id"] = twUtils.generate_transaction_id("GET",endpoint) feed = requests.get(f"https://{twitterUrl}{endpoint}", {'variables':json.dumps(vars),'features':tweetFeedGraphqlFeatures,'fieldToggles':'{"withArticlePlainText":false}'},headers=reqHeaders) if feed.status_code == 403: diff --git a/vxApi.py b/vxApi.py index 83bdbfb..5583475 100644 --- a/vxApi.py +++ b/vxApi.py @@ -230,14 +230,6 @@ def getApiResponse(tweet,include_txt=False,include_rtf=False): if 'in_reply_to_status_id_str' in tweetL and tweetL['in_reply_to_status_id_str'] != None: replyingToID = tweetL['in_reply_to_status_id_str'] - if 'screen_name' not in userL and 'core' in tweet["core"]["user_results"]["result"]: - userL['screen_name'] = tweet["core"]["user_results"]["result"]["core"]["screen_name"] - if 'name' not in userL: - userL['name'] = tweet["core"]["user_results"]["result"]["core"]["name"] - - if 'profile_image_url_https' not in userL and 'avatar' in tweet["core"]["user_results"]["result"]: - userL['profile_image_url_https'] = tweet["core"]["user_results"]["result"]["avatar"]["image_url"] - apiObject = { "text": twText, "likes": tweetL["favorite_count"], From a6be414129b6c2038d816883ac92c4ed7f3ba2ec Mon Sep 17 00:00:00 2001 From: Dylan Date: Fri, 11 Jul 2025 18:36:19 +0100 Subject: [PATCH 03/28] Fix direct embeds redirecting to gif version (#281) --- twitfix.py | 2 +- utils.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/twitfix.py b/twitfix.py index d1356c4..59b153c 100644 --- a/twitfix.py +++ b/twitfix.py @@ -415,7 +415,7 @@ def twitfix(sub_path): embeddingMedia = tweetData['hasMedia'] renderMedia = None if embeddingMedia: - renderMedia = determineMediaToEmbed(tweetData,embedIndex) + renderMedia = determineMediaToEmbed(tweetData,embedIndex,convertGif=False) # direct embeds should always prioritize the main tweet, so don't check for qrt # determine what type of media we're dealing with if not embeddingMedia and qrt is None: diff --git a/utils.py b/utils.py index 483c63d..b3656f8 100644 --- a/utils.py +++ b/utils.py @@ -64,7 +64,7 @@ def determineEmbedTweet(tweetData): return tweetData['qrt'] return tweetData -def determineMediaToEmbed(tweetData,embedIndex = -1): +def determineMediaToEmbed(tweetData,embedIndex = -1,convertGif = True): if tweetData['allSameType'] and tweetData['media_extended'][0]['type'] == "image" and embedIndex == -1 and tweetData['combinedMediaUrl'] != None: return {"url":tweetData['combinedMediaUrl'],"type":"image"} else: @@ -82,7 +82,7 @@ def determineMediaToEmbed(tweetData,embedIndex = -1): if media['type'] == "image": return media elif media['type'] == "video" or media['type'] == "gif": - if media['type'] == "gif": + if media['type'] == "gif" and convertGif: if config['config']['gifConvertAPI'] != "" and config['config']['gifConvertAPI'] != "none": vurl=media['originalUrl'] if 'originalUrl' in media else media['url'] media['url'] = config['config']['gifConvertAPI'] + "/convert?url=" + vurl From 911a49b04f223ea8eacf63320cb8acc2fae06e24 Mon Sep 17 00:00:00 2001 From: Dylan Date: Mon, 28 Jul 2025 16:45:10 +0100 Subject: [PATCH 04/28] Use cache tags --- twitfix.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/twitfix.py b/twitfix.py index 59b153c..c1ffd4f 100644 --- a/twitfix.py +++ b/twitfix.py @@ -67,13 +67,14 @@ def isValidUserAgent(user_agent): return False def message(text): - return render_template( + rendered = render_template( 'default.html', message = text, color = config['config']['color'], appname = config['config']['appname'], repo = config['config']['repo'], url = config['config']['url'] ) + return Response(rendered, mimetype='text/html',headers={"Cache-Tag": "message"}) def generateActivityLink(tweetData,media=None,mediatype=None,embedIndex=-1): global user_agent @@ -422,26 +423,26 @@ def twitfix(sub_path): return renderTextTweetEmbed(tweetData) else: if renderMedia['type'] == "image": - return render_template("rawimage.html",media=renderMedia) + return Response(render_template("rawimage.html",media=renderMedia),headers={"Cache-Tag": "embed"}) elif renderMedia['type'] == "video" or renderMedia['type'] == "gif": - return render_template("rawvideo.html",media=renderMedia) + return Response(render_template("rawvideo.html",media=renderMedia),headers={"Cache-Tag": "embed"}) else: # full embed embedTweetData = determineEmbedTweet(tweetData) embeddingMedia = embedTweetData['hasMedia'] if "article" in embedTweetData and embedTweetData["article"] is not None: - return renderArticleTweetEmbed(tweetData," • See original tweet for full article") + return Response(renderArticleTweetEmbed(tweetData," • See original tweet for full article"),headers={"Cache-Tag": "embed"}) elif not embeddingMedia: - return renderTextTweetEmbed(tweetData) + return Response(renderTextTweetEmbed(tweetData),headers={"Cache-Tag": "embed"}) else: media = determineMediaToEmbed(embedTweetData,embedIndex) suffix="" if "suffix" in media: suffix = media["suffix"] if media['type'] == "image": - return renderImageTweetEmbed(tweetData,media['url'] , appnameSuffix=suffix,embedIndex=embedIndex) + return Response(renderImageTweetEmbed(tweetData,media['url'] , appnameSuffix=suffix,embedIndex=embedIndex),headers={"Cache-Tag": "embed"}) elif media['type'] == "video" or media['type'] == "gif": - return renderVideoTweetEmbed(tweetData,media,appnameSuffix=suffix,embedIndex=embedIndex) + return Response(renderVideoTweetEmbed(tweetData,media,appnameSuffix=suffix,embedIndex=embedIndex),headers={"Cache-Tag": "embed"}) return message(msgs.failedToScan) From f4d1308b938ba04db271f55e6e7f774940b4b870 Mon Sep 17 00:00:00 2001 From: Dylan Date: Thu, 7 Aug 2025 18:55:25 +0100 Subject: [PATCH 05/28] Add Cache-Control to messages --- twitfix.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/twitfix.py b/twitfix.py index c1ffd4f..020c3e9 100644 --- a/twitfix.py +++ b/twitfix.py @@ -74,7 +74,7 @@ def message(text): appname = config['config']['appname'], repo = config['config']['repo'], url = config['config']['url'] ) - return Response(rendered, mimetype='text/html',headers={"Cache-Tag": "message"}) + return Response(rendered, mimetype='text/html',headers={"Cache-Tag": "message", "Cache-Control": "max-age=1760, public"}) def generateActivityLink(tweetData,media=None,mediatype=None,embedIndex=-1): global user_agent From 4ac17cf4514c44458f7943780e0e8bb39652c59d Mon Sep 17 00:00:00 2001 From: Dylan Date: Sat, 23 Aug 2025 15:01:47 +0100 Subject: [PATCH 06/28] parallelize extractStatusV2 --- test_vx_extract.py | 2 +- twExtract/__init__.py | 112 ++++++++++++++++++++++++++---------------- 2 files changed, 71 insertions(+), 43 deletions(-) diff --git a/test_vx_extract.py b/test_vx_extract.py index 18613e0..43f32cf 100644 --- a/test_vx_extract.py +++ b/test_vx_extract.py @@ -38,7 +38,7 @@ def test_twextract_textTweetExtract(): assert tweet["user"]["screen_name"]=="jack" assert 'extended_entities' not in tweet -def test_twextract_extractV2(): # remove this when v2 is default +def test_twextract_extractV2(): tweet = twExtract.extractStatusV2(testTextTweet,workaroundTokens=tokens) def test_twextract_UserExtract(): diff --git a/twExtract/__init__.py b/twExtract/__init__.py index 1af5bf3..678af8c 100644 --- a/twExtract/__init__.py +++ b/twExtract/__init__.py @@ -9,6 +9,7 @@ from oauthlib import oauth1 import sys sys.path.append(os.path.dirname(os.path.realpath(__file__))) import twUtils +import concurrent.futures bearer="Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw" v2bearer="Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA" androidBearer="Bearer AAAAAAAAAAAAAAAAAAAAAFXzAwAAAAAAMHCxpeSDG1gLNLghVe8d74hl6k4%3DRUMF4xAQLsbeBhTSRrCiQpJtxoGWeyHrDb5te2jpGskWDFW82F" @@ -50,6 +51,37 @@ class TwExtractError(Exception): def __str__(self): return self.msg +def parallel_token_request(twid, tokens, request_function): + results = [] + errors = [] + def try_token(token): + try: + result = request_function(twid, token) + return {'success': True, 'result': result} + except Exception as e: + return {'success': False, 'error': str(e)} + + with concurrent.futures.ThreadPoolExecutor(max_workers=min(2, len(tokens))) as executor: + futures = {executor.submit(try_token, token): token for token in tokens} + for future in concurrent.futures.as_completed(futures): + result = future.result() + if result['success']: + results.append(result) + else: + errors.append(result) + + # Early return if success + if result['success']: + for f in futures: # Cancel remaining futures + if not f.done(): + f.cancel() + return result['result'] + + # all tokens failed + if errors: + raise TwExtractError(400, f"All tokens failed. Last error: {errors[-1]['error']}") + return None + def cycleBearerTokenGet(url,headers): global bearerTokens rateLimitRemaining = None @@ -237,51 +269,47 @@ def extractStatusV2(url,workaroundTokens): # get tweet tokens = workaroundTokens random.shuffle(tokens) - for authToken in tokens: + def request_with_token(twid, authToken): + vars = json.loads('{"includeTweetImpression":true,"includeHasBirdwatchNotes":false,"includeEditPerspective":false,"rest_ids":["x"],"includeEditControl":true,"includeCommunityTweetRelationship":true,"includeTweetVisibilityNudge":true}') + vars['rest_ids'][0] = str(twid) + tweet = twitterApiGet(f"https://x.com/i/api/graphql/{v2graphql_api}/TweetResultsByIdsQuery?variables={urllib.parse.quote(json.dumps(vars))}&features={urllib.parse.quote(v2Features)}",authToken=authToken,btoken=v2bearer) try: - vars = json.loads('{"includeTweetImpression":true,"includeHasBirdwatchNotes":false,"includeEditPerspective":false,"rest_ids":["x"],"includeEditControl":true,"includeCommunityTweetRelationship":true,"includeTweetVisibilityNudge":true}') - vars['rest_ids'][0] = str(twid) - tweet = twitterApiGet(f"https://x.com/i/api/graphql/{v2graphql_api}/TweetResultsByIdsQuery?variables={urllib.parse.quote(json.dumps(vars))}&features={urllib.parse.quote(v2Features)}",authToken=authToken) - try: - rateLimitRemaining = tweet.headers.get("x-rate-limit-remaining") - print(f"Twitter Token Rate limit remaining: {rateLimitRemaining}") - except: # for some reason the header is not always present - pass - if tweet.status_code == 429: - print("Rate limit reached for token (429)") - # try another token + rateLimitRemaining = tweet.headers.get("x-rate-limit-remaining") + print(f"Twitter Token Rate limit remaining: {rateLimitRemaining}") + except: # for some reason the header is not always present + pass + if tweet.status_code == 429: + print("Rate limit reached for token (429)") + # try another token + raise TwExtractError(400, "Extract error: rate limit reached") + output = tweet.json() + + if "errors" in output: + print(f"Error in output: {json.dumps(output['errors'])}") + # try another token + raise TwExtractError(400, "Extract error: errors in output - "+json.dumps(output['errors'])) + entries=output['data']['tweet_results'] + tweetEntry=None + for entry in entries: + if 'result' not in entry: + print("Tweet result not found in entry") continue - output = tweet.json() - - if "errors" in output: - print(f"Error in output: {json.dumps(output['errors'])}") - # try another token - continue - entries=output['data']['tweet_results'] - tweetEntry=None - for entry in entries: - if 'result' not in entry: - print("Tweet result not found in entry") - continue - result = entry['result'] - if '__typename' in result and result['__typename'] == 'TweetWithVisibilityResults': - result=result['tweet'] - elif '__typename' in result and result['__typename'] == 'TweetUnavailable': - if 'reason' in result: - return {'error':'Tweet unavailable: '+result['reason']} - return {'error':'Tweet unavailable'} - if 'rest_id' in result and result['rest_id'] == twid: - tweetEntry=result - break - tweet=tweetEntry - if tweet is None: - print("Tweet 404") - return {'error':'Tweet not found (404); May be due to invalid tweet, changes in Twitter\'s API, or a protected account.'} - except Exception as e: - print(f"Exception in extractStatusV2: {str(e)}") - continue + result = entry['result'] + if '__typename' in result and result['__typename'] == 'TweetWithVisibilityResults': + result=result['tweet'] + elif '__typename' in result and result['__typename'] == 'TweetUnavailable': + if 'reason' in result: + return {'error':'Tweet unavailable: '+result['reason']} + return {'error':'Tweet unavailable'} + if 'rest_id' in result and result['rest_id'] == twid: + tweetEntry=result + break + tweet=tweetEntry + if tweet is None: + print("Tweet 404") + return {'error':'Tweet not found (404); May be due to invalid tweet, changes in Twitter\'s API, or a protected account.'} return tweet - raise TwExtractError(400, "Extract error") + return parallel_token_request(twid, tokens, request_with_token) def extractStatusV2Android(url,workaroundTokens): # get tweet ID From efc03399abb277561ad98c20b572619574996720 Mon Sep 17 00:00:00 2001 From: Dylan Date: Sat, 23 Aug 2025 15:05:21 +0100 Subject: [PATCH 07/28] Prioritize extractStatusV2 in extract logic --- twExtract/__init__.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/twExtract/__init__.py b/twExtract/__init__.py index 678af8c..31e820b 100644 --- a/twExtract/__init__.py +++ b/twExtract/__init__.py @@ -385,10 +385,9 @@ def extractStatusV2TweetDetail(url,workaroundTokens): random.shuffle(tokens) for authToken in tokens: try: - vars = json.loads('{"focalTweetId":"0","with_rux_injections":false,"includePromotedContent":true,"withCommunity":true,"withQuickPromoteEligibilityTweetFields":true,"withBirdwatchNotes":true,"withVoice":true,"withV2Timeline":true}') vars['focalTweetId'] = str(twid) - tweet = twitterApiGet(f"https://x.com/i/api/graphql/{tweetDetailGraphql_api}/TweetDetail?variables={urllib.parse.quote(json.dumps(vars))}&features={urllib.parse.quote(tweetDetailGraphqlFeatures)}", authToken=authToken) + tweet = twitterApiGet(f"https://x.com/i/api/graphql/{tweetDetailGraphql_api}/TweetDetail?variables={urllib.parse.quote(json.dumps(vars))}&features={urllib.parse.quote(tweetDetailGraphqlFeatures)}", authToken=authToken,btoken=v2bearer) try: rateLimitRemaining = tweet.headers.get("x-rate-limit-remaining") print(f"Twitter Token Rate limit remaining: {rateLimitRemaining}") @@ -493,7 +492,7 @@ def fixTweetData(tweet): return tweet def extractStatus(url,workaroundTokens=None): - methods=[extractStatusV2Anon,extractStatusV2TweetDetail,extractStatusV2Android,extractStatusV2] + methods=[extractStatusV2Anon,extractStatusV2,extractStatusV2TweetDetail,extractStatusV2Android] for method in methods: try: result = method(url,workaroundTokens) From 589abd68e90d359eccd5a518ef24082b9ad0abd5 Mon Sep 17 00:00:00 2001 From: Dylan Date: Sat, 23 Aug 2025 15:35:23 +0100 Subject: [PATCH 08/28] Parallelize other extraction functions --- twExtract/__init__.py | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/twExtract/__init__.py b/twExtract/__init__.py index 31e820b..752d18e 100644 --- a/twExtract/__init__.py +++ b/twExtract/__init__.py @@ -319,15 +319,13 @@ def extractStatusV2Android(url,workaroundTokens): twid = m.group(2) if workaroundTokens == None: raise TwExtractError(400, "Extract error (no tokens defined)") - # get tweet tokens = workaroundTokens random.shuffle(tokens) - for authToken in tokens: + def request_with_token(twid, authToken): try: - vars = json.loads('{"referrer":"home","includeTweetImpression":true,"includeHasBirdwatchNotes":false,"isReaderMode":false,"includeEditPerspective":false,"includeEditControl":true,"focalTweetId":0,"includeCommunityTweetRelationship":true,"includeTweetVisibilityNudge":true}') vars['focalTweetId'] = int(twid) - tweet = twitterApiGet(f"https://x.com/i/api/graphql/{androidGraphql_api}/ConversationTimelineV2?variables={urllib.parse.quote(json.dumps(vars))}&features={urllib.parse.quote(androidGraphqlFeatures)}", authToken=authToken) + tweet = twitterApiGet(f"https://x.com/i/api/graphql/{androidGraphql_api}/ConversationTimelineV2?variables={urllib.parse.quote(json.dumps(vars))}&features={urllib.parse.quote(androidGraphqlFeatures)}", authToken=authToken,btoken=androidBearer) try: rateLimitRemaining = tweet.headers.get("x-rate-limit-remaining") print(f"Twitter Android Token Rate limit remaining: {rateLimitRemaining}") @@ -336,13 +334,13 @@ def extractStatusV2Android(url,workaroundTokens): if tweet.status_code == 429: print("Rate limit reached for android token") # try another token - continue + raise TwExtractError(400, "Extract error: rate limit reached") output = tweet.json() if "errors" in output: print(f"Error in output: {json.dumps(output['errors'])}") # try another token - continue + raise TwExtractError(400, "Extract error: errors in output - "+json.dumps(output['errors'])) entries = None for instruction in output['data']['timeline_response']['instructions']: if instruction["__typename"] == "TimelineAddEntries": @@ -366,11 +364,11 @@ def extractStatusV2Android(url,workaroundTokens): print("Tweet 404") return {'error':'Tweet not found (404); May be due to invalid tweet, changes in Twitter\'s API, or a protected account.'} except Exception as e: - print(f"Exception in extractStatusV2: {str(e)}") - continue + print(f"Exception in extractStatusV2Android: {str(e)}") + raise TwExtractError(400, "Extract error") return tweet - raise TwExtractError(400, "Extract error") + return parallel_token_request(twid, tokens, request_with_token) def extractStatusV2TweetDetail(url,workaroundTokens): # get tweet ID @@ -383,7 +381,7 @@ def extractStatusV2TweetDetail(url,workaroundTokens): # get tweet tokens = workaroundTokens random.shuffle(tokens) - for authToken in tokens: + def request_with_token(twid, authToken): try: vars = json.loads('{"focalTweetId":"0","with_rux_injections":false,"includePromotedContent":true,"withCommunity":true,"withQuickPromoteEligibilityTweetFields":true,"withBirdwatchNotes":true,"withVoice":true,"withV2Timeline":true}') vars['focalTweetId'] = str(twid) @@ -396,13 +394,13 @@ def extractStatusV2TweetDetail(url,workaroundTokens): if tweet.status_code == 429: print("Rate limit reached for token") # try another token - continue + raise TwExtractError(400, "Extract error: rate limit reached") output = tweet.json() if "errors" in output: print(f"Error in output: {json.dumps(output['errors'])}") # try another token - continue + raise TwExtractError(400, "Extract error: errors in output - "+json.dumps(output['errors'])) entries = None for instruction in output['data']['threaded_conversation_with_injections_v2']['instructions']: if instruction["type"] == "TimelineAddEntries": @@ -427,10 +425,10 @@ def extractStatusV2TweetDetail(url,workaroundTokens): return {'error':'Tweet not found (404); May be due to invalid tweet, changes in Twitter\'s API, or a protected account.'} except Exception as e: print(f"Exception in extractStatusV2: {str(e)}") - continue + raise TwExtractError(400, "Extract error") return tweet - raise TwExtractError(400, "Extract error") + return parallel_token_request(twid, tokens, request_with_token) def extractStatusV2Anon(url,x): # get tweet ID @@ -492,7 +490,7 @@ def fixTweetData(tweet): return tweet def extractStatus(url,workaroundTokens=None): - methods=[extractStatusV2Anon,extractStatusV2,extractStatusV2TweetDetail,extractStatusV2Android] + methods=[extractStatusV2Anon,extractStatusV2,extractStatusV2Android,extractStatusV2TweetDetail] for method in methods: try: result = method(url,workaroundTokens) From b34844e2596832144fd4750589f9e842a46e7da8 Mon Sep 17 00:00:00 2001 From: Dylan Date: Sat, 23 Aug 2025 16:13:31 +0100 Subject: [PATCH 09/28] Avoid calling extractStatusV2Anon twice --- twitfix.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/twitfix.py b/twitfix.py index 020c3e9..5fb764c 100644 --- a/twitfix.py +++ b/twitfix.py @@ -266,19 +266,13 @@ def getTweetData(twitter_url,include_txt="false",include_rtf="false"): return cachedVNF try: - rawTweetData = twExtract.extractStatusV2Anon(twitter_url, None) + if config['config']['workaroundTokens'] is not None: + workaroundTokens = config['config']['workaroundTokens'].split(",") + else: + workaroundTokens = None + rawTweetData = twExtract.extractStatus(twitter_url,workaroundTokens=workaroundTokens) except: rawTweetData = None - if rawTweetData is None: - try: - if config['config']['workaroundTokens'] is not None: - workaroundTokens = config['config']['workaroundTokens'].split(",") - else: - workaroundTokens = None - - rawTweetData = twExtract.extractStatus(twitter_url,workaroundTokens=workaroundTokens) - except: - rawTweetData = None if rawTweetData == None or 'error' in rawTweetData: return None From a314d5f65eff61d1f02d1ad22c6c45971b90058d Mon Sep 17 00:00:00 2001 From: Dylan Date: Sat, 23 Aug 2025 16:24:34 +0100 Subject: [PATCH 10/28] Temporarily remove unreliable methods --- twExtract/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/twExtract/__init__.py b/twExtract/__init__.py index 752d18e..74a3a42 100644 --- a/twExtract/__init__.py +++ b/twExtract/__init__.py @@ -490,7 +490,8 @@ def fixTweetData(tweet): return tweet def extractStatus(url,workaroundTokens=None): - methods=[extractStatusV2Anon,extractStatusV2,extractStatusV2Android,extractStatusV2TweetDetail] + # TODO: commented out methods are too slow/unreliable at the moment + methods=[extractStatusV2Anon,extractStatusV2]#,extractStatusV2Android,extractStatusV2TweetDetail] # for method in methods: try: result = method(url,workaroundTokens) From 98196b0e30d500418a0b5c6bfec5d3c60d6d9e1a Mon Sep 17 00:00:00 2001 From: Dylan Date: Sat, 23 Aug 2025 16:25:17 +0100 Subject: [PATCH 11/28] Reduce function memory size --- serverless.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/serverless.yml b/serverless.yml index 9ef561b..990545a 100644 --- a/serverless.yml +++ b/serverless.yml @@ -47,7 +47,7 @@ functions: handler: wsgi_handler.handler url: true timeout: 15 - memorySize: 500 + memorySize: 128 layers: - Ref: PythonRequirementsLambdaLayer From cbf55e74292bdd61f8edcda18b0b1e90c447377f Mon Sep 17 00:00:00 2001 From: Dylan Date: Wed, 10 Sep 2025 16:46:05 +0100 Subject: [PATCH 12/28] User API working again, with_tweets still broken. #284 --- test_api.py | 1 - test_vx_extract.py | 10 +++++++--- twExtract/__init__.py | 19 ++++++++++++++----- vxApi.py | 23 ++++++++++++----------- 4 files changed, 33 insertions(+), 20 deletions(-) diff --git a/test_api.py b/test_api.py index 26222a6..14d1e62 100644 --- a/test_api.py +++ b/test_api.py @@ -57,7 +57,6 @@ def test_api_user(): def test_api_user_suspended(): resp = client.get(testUserSuspended.replace("https://twitter.com","https://api.vxtwitter.com"),headers={"User-Agent":"test"}) jData = resp.get_json() - assert resp.status_code==500 assert 'suspended' in jData["error"] def test_api_user_private(): diff --git a/test_vx_extract.py b/test_vx_extract.py index 43f32cf..c3864a4 100644 --- a/test_vx_extract.py +++ b/test_vx_extract.py @@ -3,6 +3,7 @@ import os import twExtract import utils from vx_testdata import * +import twitfix def test_twextract_syndicationAPI(): tweet = twExtract.extractStatus_syndication(testMediaTweet,workaroundTokens=tokens) @@ -42,20 +43,23 @@ def test_twextract_extractV2(): tweet = twExtract.extractStatusV2(testTextTweet,workaroundTokens=tokens) def test_twextract_UserExtract(): - user = twExtract.extractUser(testUser,workaroundTokens=tokens) + rawUserData = twExtract.extractUser(testUser,workaroundTokens=tokens) + user = twitfix.getApiUserResponse(rawUserData) assert user["screen_name"]=="jack" assert user["id"]==12 assert user["created_at"] == "Tue Mar 21 20:50:14 +0000 2006" def test_twextract_UserExtractID(): - user = twExtract.extractUser(testUserIDUrl,workaroundTokens=tokens) + rawUserData = twExtract.extractUser(testUserIDUrl,workaroundTokens=tokens) + user = twitfix.getApiUserResponse(rawUserData) assert user["screen_name"]=="jack" assert user["id"]==12 assert user["created_at"] == "Tue Mar 21 20:50:14 +0000 2006" def test_twextract_UserExtractWeirdURLs(): for url in testUserWeirdURLs: - user = twExtract.extractUser(url,workaroundTokens=tokens) + rawUserData = twExtract.extractUser(url,workaroundTokens=tokens) + user = twitfix.getApiUserResponse(rawUserData) assert user["screen_name"]=="jack" assert user["id"]==12 assert user["created_at"] == "Tue Mar 21 20:50:14 +0000 2006" diff --git a/twExtract/__init__.py b/twExtract/__init__.py index 74a3a42..9ac8b52 100644 --- a/twExtract/__init__.py +++ b/twExtract/__init__.py @@ -42,6 +42,9 @@ tweetDetailGraphql_api="e7RKseIxLu7HgkWNKZ6qnw" tweetFeedGraphqlFeatures='{"rweb_video_screen_enabled":false,"profile_label_improvements_pcf_label_in_post_enabled":true,"rweb_tipjar_consumption_enabled":true,"verified_phone_label_enabled":false,"creator_subscriptions_tweet_preview_api_enabled":true,"responsive_web_graphql_timeline_navigation_enabled":true,"responsive_web_graphql_skip_user_profile_image_extensions_enabled":false,"premium_content_api_read_enabled":false,"communities_web_enable_tweet_community_results_fetch":true,"c9s_tweet_anatomy_moderator_badge_enabled":true,"responsive_web_grok_analyze_button_fetch_trends_enabled":false,"responsive_web_grok_analyze_post_followups_enabled":true,"responsive_web_jetfuel_frame":false,"responsive_web_grok_share_attachment_enabled":true,"articles_preview_enabled":true,"responsive_web_edit_tweet_api_enabled":true,"graphql_is_translatable_rweb_tweet_is_translatable_enabled":true,"view_counts_everywhere_api_enabled":true,"longform_notetweets_consumption_enabled":true,"responsive_web_twitter_article_tweet_consumption_enabled":true,"tweet_awards_web_tipping_enabled":false,"responsive_web_grok_show_grok_translated_post":false,"responsive_web_grok_analysis_button_from_backend":true,"creator_subscriptions_quote_tweet_preview_enabled":false,"freedom_of_speech_not_reach_fetch_enabled":true,"standardized_nudges_misinfo":true,"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled":true,"longform_notetweets_rich_text_read_enabled":true,"longform_notetweets_inline_media_enabled":true,"responsive_web_grok_image_annotation_enabled":true,"responsive_web_enhance_cards_enabled":false}' tweetFeedGraphql_api="OAx9yEcW3JA9bPo63pcYlA" +userByScreenNameGraphqlFeatures='{"hidden_profile_subscriptions_enabled":true,"payments_enabled":false,"profile_label_improvements_pcf_label_in_post_enabled":true,"rweb_tipjar_consumption_enabled":true,"verified_phone_label_enabled":false,"subscriptions_verification_info_is_identity_verified_enabled":true,"subscriptions_verification_info_verified_since_enabled":true,"highlights_tweets_tab_ui_enabled":true,"responsive_web_twitter_article_notes_tab_enabled":true,"subscriptions_feature_can_gift_premium":true,"creator_subscriptions_tweet_preview_api_enabled":true,"responsive_web_graphql_skip_user_profile_image_extensions_enabled":false,"responsive_web_graphql_timeline_navigation_enabled":true}' +userByScreenNameGraphql_api="96tVxbPqMZDoYB5pmzezKA" + twitterUrl = "x.com" # doubt this will change but just in case class TwExtractError(Exception): def __init__(self, code, message): @@ -522,20 +525,26 @@ def extractUser(url,workaroundTokens): if authToken.startswith("oa|"): # oauth token not supported atm continue try: - - reqHeaders = getAuthHeaders(bearer,authToken=authToken) + vars=json.loads('{"screen_name":"","withGrokTranslatedBio":false}') + reqHeaders = getAuthHeaders(v2bearer,authToken=authToken) if not useId: - user = requests.get(f"https://api.{twitterUrl}/1.1/users/show.json?screen_name={screen_name}",headers=reqHeaders) + vars['screen_name'] = screen_name + user = requests.get(f"https://x.com/i/api/graphql/{userByScreenNameGraphql_api}/UserByScreenName",{'variables':json.dumps(vars),'features':userByScreenNameGraphqlFeatures,'fieldToggles':'{"withAuxiliaryUserLabels":true}'},headers=reqHeaders) else: - user = requests.get(f"https://api.{twitterUrl}/1.1/users/show.json?user_id={screen_name}",headers=reqHeaders) + raise NotImplementedError("User ID method not implemented") + #user = requests.get(f"https://api.{twitterUrl}/1.1/users/show.json?user_id={screen_name}",headers=reqHeaders) output = user.json() if "errors" in output: # pick the first error and create a twExtractError error = output["errors"][0] raise TwExtractError(error["code"], error["message"]) + elif 'user' not in output['data']: + raise TwExtractError(404, "User not found.") + elif output['data']['user']['result']['__typename'] == 'UserUnavailable': + raise TwExtractError(404, output['data']['user']['result']['message']) return output except Exception as e: - if hasattr(e,"msg") and (e.msg == 'User has been suspended.' or e.msg == 'User not found.'): + if hasattr(e,"msg") and ('suspended' in e.msg or e.msg == 'User not found.'): raise e continue raise TwExtractError(400, "Extract error") diff --git a/vxApi.py b/vxApi.py index 5583475..4cf6298 100644 --- a/vxApi.py +++ b/vxApi.py @@ -4,18 +4,19 @@ from configHandler import config from utils import stripEndTCO def getApiUserResponse(user): + userResult = user["data"]["user"]["result"] return { - "id": user["id"], - "screen_name": user["screen_name"], - "name": user["name"], - "profile_image_url": user["profile_image_url_https"], - "description": user["description"], - "location": user["location"], - "followers_count": user["followers_count"], - "following_count": user["friends_count"], - "tweet_count": user["statuses_count"], - "created_at": user["created_at"], - "protected": user["protected"], + "id": int(userResult["rest_id"]), + "screen_name": userResult["core"]["screen_name"], + "name": userResult["core"]["name"], + "profile_image_url": userResult['avatar']["image_url"], + "description": userResult["legacy"]["description"], + "location": userResult["location"]["location"], + "followers_count": userResult["legacy"]["followers_count"], + "following_count": userResult["legacy"]["friends_count"], + "tweet_count": userResult["legacy"]["statuses_count"], + "created_at": userResult["core"]["created_at"], + "protected": userResult["privacy"]["protected"], "fetched_on": int(datetime.now().timestamp()), } From 7a97adcf43a83354316b8f437e7dd5608343dbb3 Mon Sep 17 00:00:00 2001 From: Dylan Date: Wed, 10 Sep 2025 17:06:42 +0100 Subject: [PATCH 13/28] Tweet feed & User by ID working #284 --- twExtract/__init__.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/twExtract/__init__.py b/twExtract/__init__.py index 9ac8b52..a73ec71 100644 --- a/twExtract/__init__.py +++ b/twExtract/__init__.py @@ -42,8 +42,9 @@ tweetDetailGraphql_api="e7RKseIxLu7HgkWNKZ6qnw" tweetFeedGraphqlFeatures='{"rweb_video_screen_enabled":false,"profile_label_improvements_pcf_label_in_post_enabled":true,"rweb_tipjar_consumption_enabled":true,"verified_phone_label_enabled":false,"creator_subscriptions_tweet_preview_api_enabled":true,"responsive_web_graphql_timeline_navigation_enabled":true,"responsive_web_graphql_skip_user_profile_image_extensions_enabled":false,"premium_content_api_read_enabled":false,"communities_web_enable_tweet_community_results_fetch":true,"c9s_tweet_anatomy_moderator_badge_enabled":true,"responsive_web_grok_analyze_button_fetch_trends_enabled":false,"responsive_web_grok_analyze_post_followups_enabled":true,"responsive_web_jetfuel_frame":false,"responsive_web_grok_share_attachment_enabled":true,"articles_preview_enabled":true,"responsive_web_edit_tweet_api_enabled":true,"graphql_is_translatable_rweb_tweet_is_translatable_enabled":true,"view_counts_everywhere_api_enabled":true,"longform_notetweets_consumption_enabled":true,"responsive_web_twitter_article_tweet_consumption_enabled":true,"tweet_awards_web_tipping_enabled":false,"responsive_web_grok_show_grok_translated_post":false,"responsive_web_grok_analysis_button_from_backend":true,"creator_subscriptions_quote_tweet_preview_enabled":false,"freedom_of_speech_not_reach_fetch_enabled":true,"standardized_nudges_misinfo":true,"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled":true,"longform_notetweets_rich_text_read_enabled":true,"longform_notetweets_inline_media_enabled":true,"responsive_web_grok_image_annotation_enabled":true,"responsive_web_enhance_cards_enabled":false}' tweetFeedGraphql_api="OAx9yEcW3JA9bPo63pcYlA" -userByScreenNameGraphqlFeatures='{"hidden_profile_subscriptions_enabled":true,"payments_enabled":false,"profile_label_improvements_pcf_label_in_post_enabled":true,"rweb_tipjar_consumption_enabled":true,"verified_phone_label_enabled":false,"subscriptions_verification_info_is_identity_verified_enabled":true,"subscriptions_verification_info_verified_since_enabled":true,"highlights_tweets_tab_ui_enabled":true,"responsive_web_twitter_article_notes_tab_enabled":true,"subscriptions_feature_can_gift_premium":true,"creator_subscriptions_tweet_preview_api_enabled":true,"responsive_web_graphql_skip_user_profile_image_extensions_enabled":false,"responsive_web_graphql_timeline_navigation_enabled":true}' +userByScreenNameGraphqlFeatures='{"rweb_xchat_enabled":false,"hidden_profile_subscriptions_enabled":true,"payments_enabled":false,"profile_label_improvements_pcf_label_in_post_enabled":true,"rweb_tipjar_consumption_enabled":true,"verified_phone_label_enabled":false,"subscriptions_verification_info_is_identity_verified_enabled":true,"subscriptions_verification_info_verified_since_enabled":true,"highlights_tweets_tab_ui_enabled":true,"responsive_web_twitter_article_notes_tab_enabled":true,"subscriptions_feature_can_gift_premium":true,"creator_subscriptions_tweet_preview_api_enabled":true,"responsive_web_graphql_skip_user_profile_image_extensions_enabled":false,"responsive_web_graphql_timeline_navigation_enabled":true}' userByScreenNameGraphql_api="96tVxbPqMZDoYB5pmzezKA" +userByRestIdGraphql_api="8r5oa_2vD0WkhIAOkY4TTA" twitterUrl = "x.com" # doubt this will change but just in case class TwExtractError(Exception): @@ -525,14 +526,15 @@ def extractUser(url,workaroundTokens): if authToken.startswith("oa|"): # oauth token not supported atm continue try: - vars=json.loads('{"screen_name":"","withGrokTranslatedBio":false}') reqHeaders = getAuthHeaders(v2bearer,authToken=authToken) if not useId: + vars=json.loads('{"screen_name":"","withGrokTranslatedBio":false}') vars['screen_name'] = screen_name user = requests.get(f"https://x.com/i/api/graphql/{userByScreenNameGraphql_api}/UserByScreenName",{'variables':json.dumps(vars),'features':userByScreenNameGraphqlFeatures,'fieldToggles':'{"withAuxiliaryUserLabels":true}'},headers=reqHeaders) else: - raise NotImplementedError("User ID method not implemented") - #user = requests.get(f"https://api.{twitterUrl}/1.1/users/show.json?user_id={screen_name}",headers=reqHeaders) + vars=json.loads('{"userId":"","withGrokTranslatedBio":false}') + vars['userId'] = screen_name + user = requests.get(f"https://x.com/i/api/graphql/{userByRestIdGraphql_api}/UserByRestId",{'variables':json.dumps(vars),'features':userByScreenNameGraphqlFeatures,'fieldToggles':'{"withAuxiliaryUserLabels":true}'},headers=reqHeaders) output = user.json() if "errors" in output: # pick the first error and create a twExtractError @@ -560,11 +562,11 @@ def extractUserFeedFromId(userId,workaroundTokens): vars = json.loads('{"userId":"0","count":20,"includePromotedContent":true,"withCommunity":true,"withVoice":true}') vars['userId'] = str(userId) vars['includePromotedContent'] = False # idk if this works - reqHeaders = getAuthHeaders(bearer,authToken=authToken) + reqHeaders = getAuthHeaders(v2bearer,authToken=authToken) endpoint=f"/i/api/graphql/{tweetFeedGraphql_api}/UserTweetsAndReplies" reqHeaders["x-client-transaction-id"] = twUtils.generate_transaction_id("GET",endpoint) feed = requests.get(f"https://{twitterUrl}{endpoint}", {'variables':json.dumps(vars),'features':tweetFeedGraphqlFeatures,'fieldToggles':'{"withArticlePlainText":false}'},headers=reqHeaders) - if feed.status_code == 403: + if feed.status_code == 403 or feed.status_code == 404: raise TwExtractError(403, "Extract error") output = feed.json() if "errors" in output: From 1bc50830f552e7a1a52d7e9521d13f546c8f3bec Mon Sep 17 00:00:00 2001 From: Dylan Date: Wed, 10 Sep 2025 17:29:15 +0100 Subject: [PATCH 14/28] Add id_str for media: Fixes #283 --- test_api.py | 4 ++++ testgen.py | 1 + vxApi.py | 2 ++ vx_testdata.py | 24 ++++++++++++------------ 4 files changed, 19 insertions(+), 12 deletions(-) diff --git a/test_api.py b/test_api.py index 14d1e62..4fe58d0 100644 --- a/test_api.py +++ b/test_api.py @@ -48,6 +48,10 @@ def test_api_include_rtf_nomedia(): assert resp.status_code==200 assert not any(".rtf" in i for i in jData["mediaURLs"]) +def test_api_mixedmedia(): + resp = client.get(testMixedMediaTweet.replace("https://twitter.com","https://api.vxtwitter.com")+"?include_txt=true",headers={"User-Agent":"test"}) + assert resp.status_code==200 + def test_api_user(): resp = client.get(testUser.replace("https://twitter.com","https://api.vxtwitter.com"),headers={"User-Agent":"test"}) jData = resp.get_json() diff --git a/testgen.py b/testgen.py index 473eafb..e35f8a2 100644 --- a/testgen.py +++ b/testgen.py @@ -32,5 +32,6 @@ with open('generated.txt', 'w',encoding='utf-8') as f: del VNF['user_name'] del VNF['user_profile_image_url'] del VNF['communityNote'] + del VNF['fetched_on'] # write in a format that can be copy-pasted into a python file, i.e testTextTweet={... f.write(f"{test}_compare={VNF}\n") \ No newline at end of file diff --git a/vxApi.py b/vxApi.py index 4cf6298..c34acf7 100644 --- a/vxApi.py +++ b/vxApi.py @@ -84,6 +84,7 @@ def getApiResponse(tweet,include_txt=False,include_rtf=False): extendedInfo["duration_millis"] = 0 extendedInfo["thumbnail_url"] = i["media_url_https"] extendedInfo["altText"] = altText + extendedInfo["id_str"] = i["id_str"] media_extended.append(extendedInfo) else: media.append(i["media_url_https"]) @@ -95,6 +96,7 @@ def getApiResponse(tweet,include_txt=False,include_rtf=False): extendedInfo["type"] = "image" extendedInfo["size"] = {"width":i["original_info"]["width"],"height":i["original_info"]["height"]} extendedInfo["thumbnail_url"] = i["media_url_https"] + extendedInfo["id_str"] = i["id_str"] media_extended.append(extendedInfo) if "hashtags" in tweetL["entities"]: diff --git a/vx_testdata.py b/vx_testdata.py index 3644aee..f80308b 100644 --- a/vx_testdata.py +++ b/vx_testdata.py @@ -6,7 +6,7 @@ testVideoTweet="https://twitter.com/pdxdylan/status/1540398733669666818" testMediaTweet="https://twitter.com/pdxdylan/status/1534672932106035200" testMultiMediaTweet="https://twitter.com/pdxdylan/status/1532006436703715331" testQRTTweet="https://twitter.com/pdxdylan/status/1611477137319514129" -testQrtCeptionTweet="https://twitter.com/CatherineShu/status/585253766271672320" # TODO: tweet is deleted +testQrtCeptionTweet="https://twitter.com/CatherineShu/status/585253766271672320" testQrtVideoTweet="https://twitter.com/pdxdylan/status/1674561759422578690" testNSFWTweet="https://twitter.com/kuyacoy/status/1581185279376838657" testPollTweet="https://twitter.com/norm/status/651169346518056960" @@ -14,17 +14,17 @@ testMixedMediaTweet="https://twitter.com/bigbeerfest/status/1760638922084741177" testVinePlayerTweet="https://twitter.com/Roblox/status/583302104342638592" testRetweetTweet="https://twitter.com/pdxdylan/status/1828570470222045294" -testTextTweet_compare={'text': 'just setting up my twttr', 'date': 'Tue Mar 21 20:50:14 +0000 2006', 'tweetURL': 'https://twitter.com/jack/status/20', 'tweetID': '20', 'conversationID': '20', 'mediaURLs': [], 'media_extended': [], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': True, 'hasMedia': False, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'date_epoch': 1142974214} -testVideoTweet_compare={'text': 'TikTok embeds on Discord/Telegram bait you with a fake play button, but to see the actual video you have to go to their website.\nAs a request from a friend, I made it so that if you add "vx" before "tiktok" on any link, it fixes that. https://t.co/QYpiVXUIrW', 'date': 'Fri Jun 24 18:17:31 +0000 2022', 'tweetURL': 'https://twitter.com/pdxdylan/status/1540398733669666818', 'tweetID': '1540398733669666818', 'conversationID': '1540398733669666818', 'mediaURLs': ['https://video.twimg.com/ext_tw_video/1540396699037929472/pu/vid/762x528/YxbXbT3X7vq4LWfC.mp4'], 'media_extended': [{'url': 'https://video.twimg.com/ext_tw_video/1540396699037929472/pu/vid/762x528/YxbXbT3X7vq4LWfC.mp4', 'type': 'video', 'size': {'width': 762, 'height': 528}, 'duration_millis': 13650, 'thumbnail_url': 'https://pbs.twimg.com/ext_tw_video_thumb/1540396699037929472/pu/img/l187Z6B9AHHxUKPV.jpg', 'altText': None}], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': True, 'hasMedia': True, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'date_epoch': 1656094651} -testMediaTweet_compare={'text': 'oh.', 'date': 'Wed Jun 08 23:05:14 +0000 2022', 'tweetURL': 'https://twitter.com/pdxdylan/status/1534672932106035200', 'tweetID': '1534672932106035200', 'conversationID': '1534672673422381057', 'mediaURLs': ['https://pbs.twimg.com/media/FUxAt5LWUAMol0N.png'], 'media_extended': [{'url': 'https://pbs.twimg.com/media/FUxAt5LWUAMol0N.png', 'altText': None, 'type': 'image', 'size': {'width': 927, 'height': 534}, 'thumbnail_url': 'https://pbs.twimg.com/media/FUxAt5LWUAMol0N.png'}], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': True, 'hasMedia': True, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'date_epoch': 1654729514} -testMultiMediaTweet_compare={'text': 'Released #Retro64 1.0.9. Besides a lot of internal bug-fixes, this adds quicksand blocks, fixes the rendering for the castle stairs block, and adds a new model, Sonic! \nhttps://github.com/Retro64Mod/Retro64Mod/releases/tag/1.18.2-1.0.9 https://t.co/CWZaw4hzyg', 'date': 'Wed Jun 01 14:29:32 +0000 2022', 'tweetURL': 'https://twitter.com/pdxdylan/status/1532006436703715331', 'tweetID': '1532006436703715331', 'conversationID': '1532006436703715331', 'mediaURLs': ['https://pbs.twimg.com/media/FULF9oxXwAMDI-C.png', 'https://pbs.twimg.com/media/FULGaHkWYAIBV5U.png', 'https://pbs.twimg.com/media/FULGiZnWQAMBRWl.png'], 'media_extended': [{'url': 'https://pbs.twimg.com/media/FULF9oxXwAMDI-C.png', 'altText': None, 'type': 'image', 'size': {'width': 507, 'height': 507}, 'thumbnail_url': 'https://pbs.twimg.com/media/FULF9oxXwAMDI-C.png'}, {'url': 'https://pbs.twimg.com/media/FULGaHkWYAIBV5U.png', 'altText': None, 'type': 'image', 'size': {'width': 396, 'height': 431}, 'thumbnail_url': 'https://pbs.twimg.com/media/FULGaHkWYAIBV5U.png'}, {'url': 'https://pbs.twimg.com/media/FULGiZnWQAMBRWl.png', 'altText': None, 'type': 'image', 'size': {'width': 399, 'height': 341}, 'thumbnail_url': 'https://pbs.twimg.com/media/FULGiZnWQAMBRWl.png'}], 'possibly_sensitive': False, 'hashtags': ['Retro64'], 'qrtURL': None, 'allSameType': True, 'hasMedia': True, 'combinedMediaUrl': 'https://vxtwitter.com/rendercombined.jpg?imgs=https://pbs.twimg.com/media/FULF9oxXwAMDI-C.png,https://pbs.twimg.com/media/FULGaHkWYAIBV5U.png,https://pbs.twimg.com/media/FULGiZnWQAMBRWl.png', 'pollData': None, 'article': None, 'date_epoch': 1654093772} -testQRTTweet_compare={'text': "vxTwitter has gotten a *ton* of usage recently, so I'd appreciate a donation to keep things running!\n", 'date': 'Fri Jan 06 21:37:43 +0000 2023', 'tweetURL': 'https://twitter.com/pdxdylan/status/1611477137319514129', 'tweetID': '1611477137319514129', 'conversationID': '1611476665821003776', 'mediaURLs': [], 'media_extended': [], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': 'https://twitter.com/i/status/1518309187515781125', 'allSameType': True, 'hasMedia': False, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'date_epoch': 1673041063} -testQrtCeptionTweet_compare={'text': 'Testing retweetception ', 'date': 'Tue Apr 07 01:32:26 +0000 2015', 'tweetURL': 'https://twitter.com/CatherineShu/status/585253766271672320', 'tweetID': '585253766271672320', 'conversationID': '585253766271672320', 'mediaURLs': [], 'media_extended': [], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': 'https://twitter.com/i/status/585253161260216320', 'allSameType': True, 'hasMedia': False, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'date_epoch': 1428370346} -testQrtVideoTweet_compare={'text': 'good', 'date': 'Thu Jun 29 23:33:29 +0000 2023', 'tweetURL': 'https://twitter.com/pdxdylan/status/1674561759422578690', 'tweetID': '1674561759422578690', 'conversationID': '1674561759422578690', 'mediaURLs': [], 'media_extended': [], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': 'https://twitter.com/i/status/1674197531301904388', 'allSameType': True, 'hasMedia': False, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'date_epoch': 1688081609} -testNSFWTweet_compare={'text': "ngl, I'm scared on finding out the cute Sprigatito's final evolution..\n\nso i had a bot generate it for me.... and I'm forever scarred https://t.co/itMay87vcS", 'date': 'Sat Oct 15 07:28:42 +0000 2022', 'tweetURL': 'https://twitter.com/kuyacoy/status/1581185279376838657', 'tweetID': '1581185279376838657', 'conversationID': '1581185279376838657', 'mediaURLs': ['https://pbs.twimg.com/media/FfF_gKwXgAIpnpD.jpg'], 'media_extended': [{'url': 'https://pbs.twimg.com/media/FfF_gKwXgAIpnpD.jpg', 'altText': None, 'type': 'image', 'size': {'width': 760, 'height': 926}, 'thumbnail_url': 'https://pbs.twimg.com/media/FfF_gKwXgAIpnpD.jpg'}], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': True, 'hasMedia': True, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'date_epoch': 1665818922} -testPollTweet_compare={'text': 'I know when that hotline bling, that can only:', 'date': 'Mon Oct 05 22:57:25 +0000 2015', 'tweetURL': 'https://twitter.com/norm/status/651169346518056960', 'tweetID': '651169346518056960', 'conversationID': '651169346518056960', 'mediaURLs': [], 'media_extended': [], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': True, 'hasMedia': False, 'combinedMediaUrl': None, 'pollData': {'options': [{'name': 'Mean one thing', 'votes': 124875, 'percent': 78.82}, {'name': 'Mean multiple things', 'votes': 33554, 'percent': 21.18}]}, 'article': None, 'date_epoch': 1444085845} -testMixedMediaTweet_compare={'text': 'Some of us here are definitely big nerds about beer, and could talk your ear off about it for days on end, but some of us are just "beer is nice"', 'date': 'Thu Feb 22 12:13:24 +0000 2024', 'tweetURL': 'https://twitter.com/salebeerfest/status/1760638922084741177', 'tweetID': '1760638922084741177', 'conversationID': '1760638922084741177', 'mediaURLs': ['https://pbs.twimg.com/media/GG8LwfuWoAANKhs.jpg', 'https://video.twimg.com/tweet_video/GG8LwqWX0AAZch0.mp4'], 'media_extended': [{'url': 'https://pbs.twimg.com/media/GG8LwfuWoAANKhs.jpg', 'altText': None, 'type': 'image', 'size': {'width': 858, 'height': 960}, 'thumbnail_url': 'https://pbs.twimg.com/media/GG8LwfuWoAANKhs.jpg'}, {'url': 'https://video.twimg.com/tweet_video/GG8LwqWX0AAZch0.mp4', 'type': 'gif', 'size': {'width': 500, 'height': 500}, 'duration_millis': 0, 'thumbnail_url': 'https://pbs.twimg.com/tweet_video_thumb/GG8LwqWX0AAZch0.jpg', 'altText': None}], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': False, 'hasMedia': True, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'date_epoch': 1708604004} -testVinePlayerTweet_compare={'text': 'You wanted old ROBLOX back, you got it. Check out our sweet "new" look! #BringBackOldROBLOX https://vine.co/v/OL9VqvM6wJh', 'date': 'Wed Apr 01 16:17:13 +0000 2015', 'tweetURL': 'https://twitter.com/Roblox/status/583302104342638592', 'tweetID': '583302104342638592', 'conversationID': '583302104342638592', 'mediaURLs': ['https://v.cdn.vine.co/r/videos/20A1BE53011195086166081318912_3fe3b526b1a.1.5.3156516531034157495.mp4?versionId=DI1mMu7EI6zcLbvgucyp3GHebdz8.9cQ'], 'media_extended': [{'url': 'https://v.cdn.vine.co/r/videos/20A1BE53011195086166081318912_3fe3b526b1a.1.5.3156516531034157495.mp4?versionId=DI1mMu7EI6zcLbvgucyp3GHebdz8.9cQ', 'type': 'video', 'size': {'width': 435, 'height': 435}}], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': True, 'hasMedia': True, 'combinedMediaUrl': None, 'pollData': {'options': []}, 'article': None, 'date_epoch': 1427905033} +testTextTweet_compare={'text': 'just setting up my twttr', 'date': 'Tue Mar 21 20:50:14 +0000 2006', 'tweetURL': 'https://twitter.com/jack/status/20', 'tweetID': '20', 'conversationID': '20', 'mediaURLs': [], 'media_extended': [], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': True, 'hasMedia': False, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'lang': 'en', 'replyingTo': None, 'replyingToID': None, 'retweetURL': None, 'date_epoch': 1142974214} +testVideoTweet_compare={'text': 'TikTok embeds on Discord/Telegram bait you with a fake play button, but to see the actual video you have to go to their website.\nAs a request from a friend, I made it so that if you add "vx" before "tiktok" on any link, it fixes that. https://t.co/QYpiVXUIrW', 'date': 'Fri Jun 24 18:17:31 +0000 2022', 'tweetURL': 'https://twitter.com/pdxdylan/status/1540398733669666818', 'tweetID': '1540398733669666818', 'conversationID': '1540398733669666818', 'mediaURLs': ['https://video.twimg.com/ext_tw_video/1540396699037929472/pu/vid/762x528/YxbXbT3X7vq4LWfC.mp4'], 'media_extended': [{'url': 'https://video.twimg.com/ext_tw_video/1540396699037929472/pu/vid/762x528/YxbXbT3X7vq4LWfC.mp4', 'type': 'video', 'size': {'width': 762, 'height': 528}, 'duration_millis': 13650, 'thumbnail_url': 'https://pbs.twimg.com/ext_tw_video_thumb/1540396699037929472/pu/img/l187Z6B9AHHxUKPV.jpg', 'altText': None, 'id_str': '1540396699037929472'}], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': True, 'hasMedia': True, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'lang': 'en', 'replyingTo': None, 'replyingToID': None, 'retweetURL': None, 'date_epoch': 1656094651} +testMediaTweet_compare={'text': 'oh.', 'date': 'Wed Jun 08 23:05:14 +0000 2022', 'tweetURL': 'https://twitter.com/pdxdylan/status/1534672932106035200', 'tweetID': '1534672932106035200', 'conversationID': '1534672673422381057', 'mediaURLs': ['https://pbs.twimg.com/media/FUxAt5LWUAMol0N.png'], 'media_extended': [{'url': 'https://pbs.twimg.com/media/FUxAt5LWUAMol0N.png', 'altText': None, 'type': 'image', 'size': {'width': 927, 'height': 534}, 'thumbnail_url': 'https://pbs.twimg.com/media/FUxAt5LWUAMol0N.png', 'id_str': '1534672730213208067'}], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': True, 'hasMedia': True, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'lang': 'und', 'replyingTo': 'pdxdylan', 'replyingToID': '1534672673422381057', 'retweetURL': None, 'date_epoch': 1654729514} +testMultiMediaTweet_compare={'text': 'Released #Retro64 1.0.9. Besides a lot of internal bug-fixes, this adds quicksand blocks, fixes the rendering for the castle stairs block, and adds a new model, Sonic! \nhttps://github.com/Retro64Mod/Retro64Mod/releases/tag/1.18.2-1.0.9 https://t.co/CWZaw4hzyg', 'date': 'Wed Jun 01 14:29:32 +0000 2022', 'tweetURL': 'https://twitter.com/pdxdylan/status/1532006436703715331', 'tweetID': '1532006436703715331', 'conversationID': '1532006436703715331', 'mediaURLs': ['https://pbs.twimg.com/media/FULF9oxXwAMDI-C.png', 'https://pbs.twimg.com/media/FULGaHkWYAIBV5U.png', 'https://pbs.twimg.com/media/FULGiZnWQAMBRWl.png'], 'media_extended': [{'url': 'https://pbs.twimg.com/media/FULF9oxXwAMDI-C.png', 'altText': None, 'type': 'image', 'size': {'width': 507, 'height': 507}, 'thumbnail_url': 'https://pbs.twimg.com/media/FULF9oxXwAMDI-C.png', 'id_str': '1532004485966577667'}, {'url': 'https://pbs.twimg.com/media/FULGaHkWYAIBV5U.png', 'altText': None, 'type': 'image', 'size': {'width': 396, 'height': 431}, 'thumbnail_url': 'https://pbs.twimg.com/media/FULGaHkWYAIBV5U.png', 'id_str': '1532004975269797890'}, {'url': 'https://pbs.twimg.com/media/FULGiZnWQAMBRWl.png', 'altText': None, 'type': 'image', 'size': {'width': 399, 'height': 341}, 'thumbnail_url': 'https://pbs.twimg.com/media/FULGiZnWQAMBRWl.png', 'id_str': '1532005117553164291'}], 'possibly_sensitive': False, 'hashtags': ['Retro64'], 'qrtURL': None, 'allSameType': True, 'hasMedia': True, 'combinedMediaUrl': 'https://vxtwitter.com/rendercombined.jpg?imgs=https://pbs.twimg.com/media/FULF9oxXwAMDI-C.png,https://pbs.twimg.com/media/FULGaHkWYAIBV5U.png,https://pbs.twimg.com/media/FULGiZnWQAMBRWl.png', 'pollData': None, 'article': None, 'lang': 'en', 'replyingTo': None, 'replyingToID': None, 'retweetURL': None, 'date_epoch': 1654093772} +testQRTTweet_compare={'text': "vxTwitter has gotten a *ton* of usage recently, so I'd appreciate a donation to keep things running!\nhttps://x.com/pdxdylan/status/1518309187515781125", 'date': 'Fri Jan 06 21:37:43 +0000 2023', 'tweetURL': 'https://twitter.com/pdxdylan/status/1611477137319514129', 'tweetID': '1611477137319514129', 'conversationID': '1611476665821003776', 'mediaURLs': [], 'media_extended': [], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': 'https://twitter.com/i/status/1518309187515781125', 'allSameType': True, 'hasMedia': False, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'lang': 'en', 'replyingTo': 'pdxdylan', 'replyingToID': '1611476665821003776', 'retweetURL': None, 'date_epoch': 1673041063} +testQrtVideoTweet_compare={'text': 'good', 'date': 'Thu Jun 29 23:33:29 +0000 2023', 'tweetURL': 'https://twitter.com/pdxdylan/status/1674561759422578690', 'tweetID': '1674561759422578690', 'conversationID': '1674561759422578690', 'mediaURLs': [], 'media_extended': [], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': 'https://twitter.com/i/status/1674197531301904388', 'allSameType': True, 'hasMedia': False, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'lang': 'en', 'replyingTo': None, 'replyingToID': None, 'retweetURL': None, 'date_epoch': 1688081609} +testNSFWTweet_compare={'text': "ngl, I'm scared on finding out the cute Sprigatito's final evolution..\n\nso i had a bot generate it for me.... and I'm forever scarred https://t.co/itMay87vcS", 'date': 'Sat Oct 15 07:28:42 +0000 2022', 'tweetURL': 'https://twitter.com/kuyacoy/status/1581185279376838657', 'tweetID': '1581185279376838657', 'conversationID': '1581185279376838657', 'mediaURLs': ['https://pbs.twimg.com/media/FfF_gKwXgAIpnpD.jpg'], 'media_extended': [{'url': 'https://pbs.twimg.com/media/FfF_gKwXgAIpnpD.jpg', 'altText': None, 'type': 'image', 'size': {'width': 760, 'height': 926}, 'thumbnail_url': 'https://pbs.twimg.com/media/FfF_gKwXgAIpnpD.jpg', 'id_str': '1581185134803517442'}], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': True, 'hasMedia': True, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'lang': 'en', 'replyingTo': None, 'replyingToID': None, 'retweetURL': None, 'date_epoch': 1665818922} +testPollTweet_compare={'text': 'I know when that hotline bling, that can only:', 'date': 'Mon Oct 05 22:57:25 +0000 2015', 'tweetURL': 'https://twitter.com/norm/status/651169346518056960', 'tweetID': '651169346518056960', 'conversationID': '651169346518056960', 'mediaURLs': [], 'media_extended': [], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': True, 'hasMedia': False, 'combinedMediaUrl': None, 'pollData': {'options': [{'name': 'Mean one thing', 'votes': 124875, 'percent': 78.82}, {'name': 'Mean multiple things', 'votes': 33554, 'percent': 21.18}]}, 'article': None, 'lang': 'en', 'replyingTo': None, 'replyingToID': None, 'retweetURL': None, 'date_epoch': 1444085845} +testMixedMediaTweet_compare={'text': 'Some of us here are definitely big nerds about beer, and could talk your ear off about it for days on end, but some of us are just "beer is nice"', 'date': 'Thu Feb 22 12:13:24 +0000 2024', 'tweetURL': 'https://twitter.com/salebeerfest/status/1760638922084741177', 'tweetID': '1760638922084741177', 'conversationID': '1760638922084741177', 'mediaURLs': ['https://pbs.twimg.com/media/GG8LwfuWoAANKhs.jpg', 'https://video.twimg.com/tweet_video/GG8LwqWX0AAZch0.mp4'], 'media_extended': [{'url': 'https://pbs.twimg.com/media/GG8LwfuWoAANKhs.jpg', 'altText': None, 'type': 'image', 'size': {'width': 858, 'height': 960}, 'thumbnail_url': 'https://pbs.twimg.com/media/GG8LwfuWoAANKhs.jpg', 'id_str': '1760638907102699520'}, {'url': 'https://video.twimg.com/tweet_video/GG8LwqWX0AAZch0.mp4', 'type': 'gif', 'size': {'width': 500, 'height': 500}, 'duration_millis': 0, 'thumbnail_url': 'https://pbs.twimg.com/tweet_video_thumb/GG8LwqWX0AAZch0.jpg', 'altText': None, 'id_str': '1760638909954904064'}], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': False, 'hasMedia': True, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'lang': 'en', 'replyingTo': None, 'replyingToID': None, 'retweetURL': None, 'date_epoch': 1708604004} +testVinePlayerTweet_compare={'text': 'You wanted old ROBLOX back, you got it. Check out our sweet "new" look! #BringBackOldROBLOX https://vine.co/v/OL9VqvM6wJh', 'date': 'Wed Apr 01 16:17:13 +0000 2015', 'tweetURL': 'https://twitter.com/Roblox/status/583302104342638592', 'tweetID': '583302104342638592', 'conversationID': '583302104342638592', 'mediaURLs': ['https://v.cdn.vine.co/r/videos/20A1BE53011195086166081318912_3fe3b526b1a.1.5.3156516531034157495.mp4?versionId=DI1mMu7EI6zcLbvgucyp3GHebdz8.9cQ'], 'media_extended': [{'url': 'https://v.cdn.vine.co/r/videos/20A1BE53011195086166081318912_3fe3b526b1a.1.5.3156516531034157495.mp4?versionId=DI1mMu7EI6zcLbvgucyp3GHebdz8.9cQ', 'type': 'video', 'size': {'width': 435, 'height': 435}}], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': True, 'hasMedia': True, 'combinedMediaUrl': None, 'pollData': {'options': []}, 'article': None, 'lang': 'en', 'replyingTo': None, 'replyingToID': None, 'retweetURL': None, 'date_epoch': 1427905033} +testRetweetTweet_compare={'text': 'RT @pdxdylan: If you want to try this out, on your mobile device, head over to https://vxtwitter.com/preferences and enable "Open links in app". Hope…', 'date': 'Tue Aug 27 23:09:07 +0000 2024', 'tweetURL': 'https://twitter.com/pdxdylan/status/1828570470222045294', 'tweetID': '1828570470222045294', 'conversationID': '1828570470222045294', 'mediaURLs': [], 'media_extended': [], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': True, 'hasMedia': False, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'lang': 'en', 'replyingTo': None, 'replyingToID': None, 'retweetURL': 'https://twitter.com/i/status/1828569456231993456', 'date_epoch': 1724800147} testUser="https://twitter.com/jack" testUserSuspended="https://twitter.com/twitter" From 7791b56419a1b44dc0adb77f828b0c7b5afdc736 Mon Sep 17 00:00:00 2001 From: xnand-dot-xyz <224628274+xnand-dot-xyz@users.noreply.github.com> Date: Fri, 12 Sep 2025 15:35:40 +0000 Subject: [PATCH 15/28] check if workaroundTokens is None before splitting --- twitfix.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/twitfix.py b/twitfix.py index 5fb764c..be90aad 100644 --- a/twitfix.py +++ b/twitfix.py @@ -286,14 +286,18 @@ def getTweetData(twitter_url,include_txt="false",include_rtf="false"): return tweetData def getUserData(twitter_url,includeFeed=False): - rawUserData = twExtract.extractUser(twitter_url,workaroundTokens=config['config']['workaroundTokens'].split(',')) + if config['config']['workaroundTokens'] is not None: + workaroundTokens = config['config']['workaroundTokens'].split(",") + else: + workaroundTokens = None + rawUserData = twExtract.extractUser(twitter_url,workaroundTokens=workaroundTokens)) userData = getApiUserResponse(rawUserData) if includeFeed: if userData['protected']: userData['latest_tweets']=[] else: - feed = twExtract.extractUserFeedFromId(userData['id'],workaroundTokens=config['config']['workaroundTokens'].split(',')) + feed = twExtract.extractUserFeedFromId(userData['id'],workaroundTokens=workaroundTokens)) apiFeed = [] for tweet in feed: apiFeed.append(getApiResponse(tweet)) From 0fc3dce253052961089bec9b6c4b6da75dc102a4 Mon Sep 17 00:00:00 2001 From: Dylan Date: Wed, 24 Sep 2025 18:35:07 +0100 Subject: [PATCH 16/28] Update v2Anon Extraction --- twExtract/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/twExtract/__init__.py b/twExtract/__init__.py index a73ec71..8f2c213 100644 --- a/twExtract/__init__.py +++ b/twExtract/__init__.py @@ -28,8 +28,8 @@ userIDregex = r"\/i\/user\/(\d+)" v2Features='{"longform_notetweets_inline_media_enabled":true,"super_follow_badge_privacy_enabled":true,"longform_notetweets_rich_text_read_enabled":true,"super_follow_user_api_enabled":true,"super_follow_tweet_api_enabled":true,"android_graphql_skip_api_media_color_palette":true,"creator_subscriptions_tweet_preview_api_enabled":true,"freedom_of_speech_not_reach_fetch_enabled":true,"creator_subscriptions_subscription_count_enabled":true,"tweetypie_unmention_optimization_enabled":true,"longform_notetweets_consumption_enabled":true,"subscriptions_verification_info_enabled":true,"blue_business_profile_image_shape_enabled":true,"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled":true,"super_follow_exclusive_tweet_notifications_enabled":true}' v2graphql_api="2OOZWmw8nAtUHVnXXQhgaA" -v2AnonFeatures='{"creator_subscriptions_tweet_preview_api_enabled":true,"communities_web_enable_tweet_community_results_fetch":true,"c9s_tweet_anatomy_moderator_badge_enabled":true,"articles_preview_enabled":true,"tweetypie_unmention_optimization_enabled":true,"responsive_web_edit_tweet_api_enabled":true,"graphql_is_translatable_rweb_tweet_is_translatable_enabled":true,"view_counts_everywhere_api_enabled":true,"longform_notetweets_consumption_enabled":true,"responsive_web_twitter_article_tweet_consumption_enabled":true,"tweet_awards_web_tipping_enabled":false,"creator_subscriptions_quote_tweet_preview_enabled":false,"freedom_of_speech_not_reach_fetch_enabled":true,"standardized_nudges_misinfo":true,"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled":true,"tweet_with_visibility_results_prefer_gql_media_interstitial_enabled":true,"rweb_video_timestamps_enabled":true,"longform_notetweets_rich_text_read_enabled":true,"longform_notetweets_inline_media_enabled":true,"rweb_tipjar_consumption_enabled":true,"responsive_web_graphql_exclude_directive_enabled":true,"verified_phone_label_enabled":false,"responsive_web_graphql_skip_user_profile_image_extensions_enabled":false,"responsive_web_graphql_timeline_navigation_enabled":true,"responsive_web_enhance_cards_enabled":false}' -v2AnonGraphql_api="7xflPyRiUxGVbJd4uWmbfg" +v2AnonFeatures='{"creator_subscriptions_tweet_preview_api_enabled":true,"premium_content_api_read_enabled":false,"communities_web_enable_tweet_community_results_fetch":true,"c9s_tweet_anatomy_moderator_badge_enabled":true,"responsive_web_grok_analyze_button_fetch_trends_enabled":false,"responsive_web_grok_analyze_post_followups_enabled":false,"responsive_web_jetfuel_frame":true,"responsive_web_grok_share_attachment_enabled":true,"articles_preview_enabled":true,"responsive_web_edit_tweet_api_enabled":true,"graphql_is_translatable_rweb_tweet_is_translatable_enabled":true,"view_counts_everywhere_api_enabled":true,"longform_notetweets_consumption_enabled":true,"responsive_web_twitter_article_tweet_consumption_enabled":true,"tweet_awards_web_tipping_enabled":false,"responsive_web_grok_show_grok_translated_post":false,"responsive_web_grok_analysis_button_from_backend":true,"creator_subscriptions_quote_tweet_preview_enabled":false,"freedom_of_speech_not_reach_fetch_enabled":true,"standardized_nudges_misinfo":true,"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled":true,"longform_notetweets_rich_text_read_enabled":true,"longform_notetweets_inline_media_enabled":true,"payments_enabled":false,"profile_label_improvements_pcf_label_in_post_enabled":true,"rweb_tipjar_consumption_enabled":true,"verified_phone_label_enabled":false,"responsive_web_grok_image_annotation_enabled":true,"responsive_web_grok_imagine_annotation_enabled":true,"responsive_web_grok_community_note_auto_translation_is_enabled":false,"responsive_web_graphql_skip_user_profile_image_extensions_enabled":false,"responsive_web_graphql_timeline_navigation_enabled":true,"responsive_web_enhance_cards_enabled":false}' +v2AnonGraphql_api="wqi5M7wZ7tW-X9S2t-Mqcg" gt_pattern = r'document\.cookie="gt=([^;]+);' androidGraphqlFeatures='{"longform_notetweets_inline_media_enabled":true,"super_follow_badge_privacy_enabled":true,"longform_notetweets_rich_text_read_enabled":true,"super_follow_user_api_enabled":true,"unified_cards_ad_metadata_container_dynamic_card_content_query_enabled":true,"super_follow_tweet_api_enabled":true,"articles_api_enabled":true,"android_graphql_skip_api_media_color_palette":true,"creator_subscriptions_tweet_preview_api_enabled":true,"freedom_of_speech_not_reach_fetch_enabled":true,"tweetypie_unmention_optimization_enabled":true,"longform_notetweets_consumption_enabled":true,"subscriptions_verification_info_enabled":true,"blue_business_profile_image_shape_enabled":true,"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled":true,"immersive_video_status_linkable_timestamps":true,"super_follow_exclusive_tweet_notifications_enabled":true}' From 433f015c0700d55cb79eee0e7c3c42998fa72cae Mon Sep 17 00:00:00 2001 From: Dylan Date: Wed, 24 Sep 2025 20:59:19 +0100 Subject: [PATCH 17/28] Temporary switch to syndication --- twExtract/__init__.py | 2 +- twitfix.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/twExtract/__init__.py b/twExtract/__init__.py index 8f2c213..ddfc403 100644 --- a/twExtract/__init__.py +++ b/twExtract/__init__.py @@ -495,7 +495,7 @@ def fixTweetData(tweet): def extractStatus(url,workaroundTokens=None): # TODO: commented out methods are too slow/unreliable at the moment - methods=[extractStatusV2Anon,extractStatusV2]#,extractStatusV2Android,extractStatusV2TweetDetail] # + methods=[extractStatus_syndication,extractStatusV2]#,extractStatusV2Android,extractStatusV2TweetDetail] # for method in methods: try: result = method(url,workaroundTokens) diff --git a/twitfix.py b/twitfix.py index be90aad..da9732e 100644 --- a/twitfix.py +++ b/twitfix.py @@ -287,17 +287,17 @@ def getTweetData(twitter_url,include_txt="false",include_rtf="false"): def getUserData(twitter_url,includeFeed=False): if config['config']['workaroundTokens'] is not None: - workaroundTokens = config['config']['workaroundTokens'].split(",") - else: - workaroundTokens = None - rawUserData = twExtract.extractUser(twitter_url,workaroundTokens=workaroundTokens)) + workaroundTokens = config['config']['workaroundTokens'].split(",") + else: + workaroundTokens = None + rawUserData = twExtract.extractUser(twitter_url,workaroundTokens=workaroundTokens) userData = getApiUserResponse(rawUserData) if includeFeed: if userData['protected']: userData['latest_tweets']=[] else: - feed = twExtract.extractUserFeedFromId(userData['id'],workaroundTokens=workaroundTokens)) + feed = twExtract.extractUserFeedFromId(userData['id'],workaroundTokens=workaroundTokens) apiFeed = [] for tweet in feed: apiFeed.append(getApiResponse(tweet)) From b657ae007666e43ead01e98d13b438f8161ea349 Mon Sep 17 00:00:00 2001 From: Dylan Date: Wed, 24 Sep 2025 21:44:41 +0100 Subject: [PATCH 18/28] Return to using extractStatusV2Anon --- twExtract/__init__.py | 2 +- vxApi.py | 13 +++++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/twExtract/__init__.py b/twExtract/__init__.py index ddfc403..8f2c213 100644 --- a/twExtract/__init__.py +++ b/twExtract/__init__.py @@ -495,7 +495,7 @@ def fixTweetData(tweet): def extractStatus(url,workaroundTokens=None): # TODO: commented out methods are too slow/unreliable at the moment - methods=[extractStatus_syndication,extractStatusV2]#,extractStatusV2Android,extractStatusV2TweetDetail] # + methods=[extractStatusV2Anon,extractStatusV2]#,extractStatusV2Android,extractStatusV2TweetDetail] # for method in methods: try: result = method(url,workaroundTokens) diff --git a/vxApi.py b/vxApi.py index c34acf7..89b2ac7 100644 --- a/vxApi.py +++ b/vxApi.py @@ -23,9 +23,10 @@ def getApiUserResponse(user): def getApiResponse(tweet,include_txt=False,include_rtf=False): tweetL = tweet["legacy"] if "user_result" in tweet["core"]: - userL = tweet["core"]["user_result"]["result"]["legacy"] + user = tweet["core"]["user_result"]["result"] elif "user_results" in tweet["core"]: - userL = tweet["core"]["user_results"]["result"]["legacy"] + user = tweet["core"]["user_results"]["result"] + userL = user["legacy"] media=[] media_extended=[] hashtags=[] @@ -33,6 +34,14 @@ def getApiResponse(tweet,include_txt=False,include_rtf=False): oldTweetVersion = False tweetArticle=None lang=None + + if "screen_name" not in userL: + userL["screen_name"] = user["core"]["screen_name"] + if "name" not in userL: + userL["name"] = user["core"]["name"] + if "profile_image_url_https" not in userL: + userL["profile_image_url_https"] = user["avatar"]["image_url"] + #editedTweet=False try: if "birdwatch_pivot" in tweet: From a27b768ff349ef01940cc29aed170bc55ae37b1f Mon Sep 17 00:00:00 2001 From: Dylan Date: Thu, 25 Sep 2025 00:45:00 +0100 Subject: [PATCH 19/28] Use env variable for requests pool --- twExtract/__init__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/twExtract/__init__.py b/twExtract/__init__.py index 8f2c213..7c46dd8 100644 --- a/twExtract/__init__.py +++ b/twExtract/__init__.py @@ -47,6 +47,9 @@ userByScreenNameGraphql_api="96tVxbPqMZDoYB5pmzezKA" userByRestIdGraphql_api="8r5oa_2vD0WkhIAOkY4TTA" twitterUrl = "x.com" # doubt this will change but just in case + +simultaneousRequests = int(os.getenv("VXTWITTER_SIMULTANEOUS_REQUESTS",1)) + class TwExtractError(Exception): def __init__(self, code, message): self.code = code @@ -65,7 +68,7 @@ def parallel_token_request(twid, tokens, request_function): except Exception as e: return {'success': False, 'error': str(e)} - with concurrent.futures.ThreadPoolExecutor(max_workers=min(2, len(tokens))) as executor: + with concurrent.futures.ThreadPoolExecutor(max_workers=min(simultaneousRequests, len(tokens))) as executor: futures = {executor.submit(try_token, token): token for token in tokens} for future in concurrent.futures.as_completed(futures): result = future.result() From c37d7195a3140f39d82d509d1e5bb167b5ed81af Mon Sep 17 00:00:00 2001 From: Xeukxz Date: Sun, 12 Oct 2025 21:41:42 +0100 Subject: [PATCH 20/28] Support for ad cards --- vxApi.py | 128 +++++++++++++++++++++++++++++++++---------------------- 1 file changed, 77 insertions(+), 51 deletions(-) diff --git a/vxApi.py b/vxApi.py index 89b2ac7..db8a88f 100644 --- a/vxApi.py +++ b/vxApi.py @@ -1,5 +1,6 @@ import html from datetime import datetime +from flask import json from configHandler import config from utils import stripEndTCO @@ -20,6 +21,48 @@ def getApiUserResponse(user): "fetched_on": int(datetime.now().timestamp()), } +def getBestMediaUrl(mediaList): + # find the highest bitrate + best_bitrate = -1 + besturl="" + for j in mediaList: + if j['content_type'] == "video/mp4" and '/hevc/' not in j["url"] and j['bitrate'] > best_bitrate: + besturl = j["url"] + best_bitrate = j['bitrate'] + if "?tag=" in besturl: + besturl = besturl[:besturl.index("?tag=")] + return besturl + +def getExtendedVideoOrGifInfo(mediaEntry): + videoInfo = mediaEntry["video_info"] + info = { + "url": getBestMediaUrl(videoInfo["variants"]), + "type": "gif" if mediaEntry.get("type", "") == "animated_gif" else "video", + "size": { + "width": mediaEntry['original_info']["width"], + "height": mediaEntry['original_info']["height"] + }, + "duration_millis": videoInfo.get("duration_millis", 0), + "thumbnail_url": mediaEntry.get("media_url_https", None), + "altText": mediaEntry.get("ext_alt_text", None), + "id_str": mediaEntry.get("id_str", None) + } + return info + +def getExtendedImageInfo(mediaEntry): + info = { + "url": mediaEntry.get("media_url_https", None), + "type": "image", + "size": { + "width": mediaEntry["original_info"]["width"], + "height": mediaEntry["original_info"]["height"] + }, + "thumbnail_url": mediaEntry.get("media_url_https", None), + "altText": mediaEntry.get("ext_alt_text", None), + "id_str": mediaEntry.get("id_str", None) + } + return info + def getApiResponse(tweet,include_txt=False,include_rtf=False): tweetL = tweet["legacy"] if "user_result" in tweet["core"]: @@ -69,63 +112,46 @@ def getApiResponse(tweet,include_txt=False,include_rtf=False): for i in tmedia: extendedInfo={} if "video_info" in i: - # find the highest bitrate - best_bitrate = -1 - besturl="" - for j in i["video_info"]["variants"]: - if j['content_type'] == "video/mp4" and '/hevc/' not in j["url"] and j['bitrate'] > best_bitrate: - besturl = j['url'] - best_bitrate = j['bitrate'] - if "?tag=" in besturl: - besturl = besturl[:besturl.index("?tag=")] - media.append(besturl) - extendedInfo["url"] = besturl - extendedInfo["type"] = "video" - if (i["type"] == "animated_gif"): - extendedInfo["type"] = "gif" - altText = None - extendedInfo["size"] = {"width":i["original_info"]["width"],"height":i["original_info"]["height"]} - if "ext_alt_text" in i: - altText=i["ext_alt_text"] - if "duration_millis" in i["video_info"]: - extendedInfo["duration_millis"] = i["video_info"]["duration_millis"] - else: - extendedInfo["duration_millis"] = 0 - extendedInfo["thumbnail_url"] = i["media_url_https"] - extendedInfo["altText"] = altText - extendedInfo["id_str"] = i["id_str"] + extendedInfo = getExtendedVideoOrGifInfo(i) + media.append(extendedInfo["url"]) media_extended.append(extendedInfo) else: - media.append(i["media_url_https"]) - extendedInfo["url"] = i["media_url_https"] - altText=None - if "ext_alt_text" in i: - altText=i["ext_alt_text"] - extendedInfo["altText"] = altText - extendedInfo["type"] = "image" - extendedInfo["size"] = {"width":i["original_info"]["width"],"height":i["original_info"]["height"]} - extendedInfo["thumbnail_url"] = i["media_url_https"] - extendedInfo["id_str"] = i["id_str"] - media_extended.append(extendedInfo) + media_extended.append(getExtendedImageInfo(i)) if "hashtags" in tweetL["entities"]: for i in tweetL["entities"]["hashtags"]: hashtags.append(i["text"]) - elif "card" in tweet and 'name' in tweet['card'] and tweet['card']['name'] == "player": - width = None - height = None - vidUrl = None - for i in tweet['card']['binding_values']: - if i['key'] == 'player_stream_url': - vidUrl = i['value']['string_value'] - elif i['key'] == 'player_width': - width = int(i['value']['string_value']) - elif i['key'] == 'player_height': - height = int(i['value']['string_value']) - if vidUrl != None and width != None and height != None: - media.append(vidUrl) - media_extended.append({"url":vidUrl,"type":"video","size":{"width":width,"height":height}}) - + elif "card" in tweet: + if 'name' in tweet['card'] and tweet['card']['name'] == "player": + width = None + height = None + vidUrl = None + for i in tweet['card']['binding_values']: + if i['key'] == 'player_stream_url': + vidUrl = i['value']['string_value'] + elif i['key'] == 'player_width': + width = int(i['value']['string_value']) + elif i['key'] == 'player_height': + height = int(i['value']['string_value']) + if vidUrl != None and width != None and height != None: + media.append(vidUrl) + media_extended.append({"url":vidUrl,"type":"video","size":{"width":width,"height":height}}) + else: + for i in tweet['card']['binding_values']: + if i['key'] == 'unified_card' and 'value' in i and 'string_value' in i['value']: + card = json.loads(i['value']['string_value']) + media_key = card['component_objects']['media_1']['data']['id'] + media_entry = card['media_entities'][media_key] + extendedInfo = getExtendedVideoOrGifInfo(media_entry) + media.append(extendedInfo['url']) + media_extended.append(extendedInfo) + break + elif i['key'] == 'photo_image_full_size_large' and 'value' in i and 'image_value' in i['value']: + imgData = i['value']['image_value'] + imgurl = imgData['url'] + media.append(imgurl) + media_extended.append({"url":imgurl,"type":"image","size":{"width":imgData['width'],"height":imgData['height']}}) + break if "article" in tweet: try: result = tweet["article"]["article_results"]["result"] From d9101dc7275ae8e78d7087ef643ac7ca54e805db Mon Sep 17 00:00:00 2001 From: Dylan Date: Sun, 12 Oct 2025 22:55:19 +0100 Subject: [PATCH 21/28] Fix media not being appended to --- vxApi.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vxApi.py b/vxApi.py index db8a88f..07eca17 100644 --- a/vxApi.py +++ b/vxApi.py @@ -116,7 +116,9 @@ def getApiResponse(tweet,include_txt=False,include_rtf=False): media.append(extendedInfo["url"]) media_extended.append(extendedInfo) else: - media_extended.append(getExtendedImageInfo(i)) + extendedInfo = getExtendedImageInfo(i) + media_extended.append(extendedInfo) + media.append(extendedInfo["url"]) if "hashtags" in tweetL["entities"]: for i in tweetL["entities"]["hashtags"]: From 78e0ecfaa9cf9973280e8562b435ba6b31e50f1d Mon Sep 17 00:00:00 2001 From: Dylan Date: Sun, 12 Oct 2025 23:06:35 +0100 Subject: [PATCH 22/28] Fix small issue with card logic --- vxApi.py | 66 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 30 deletions(-) diff --git a/vxApi.py b/vxApi.py index 07eca17..24fea75 100644 --- a/vxApi.py +++ b/vxApi.py @@ -124,36 +124,42 @@ def getApiResponse(tweet,include_txt=False,include_rtf=False): for i in tweetL["entities"]["hashtags"]: hashtags.append(i["text"]) elif "card" in tweet: - if 'name' in tweet['card'] and tweet['card']['name'] == "player": - width = None - height = None - vidUrl = None - for i in tweet['card']['binding_values']: - if i['key'] == 'player_stream_url': - vidUrl = i['value']['string_value'] - elif i['key'] == 'player_width': - width = int(i['value']['string_value']) - elif i['key'] == 'player_height': - height = int(i['value']['string_value']) - if vidUrl != None and width != None and height != None: - media.append(vidUrl) - media_extended.append({"url":vidUrl,"type":"video","size":{"width":width,"height":height}}) - else: - for i in tweet['card']['binding_values']: - if i['key'] == 'unified_card' and 'value' in i and 'string_value' in i['value']: - card = json.loads(i['value']['string_value']) - media_key = card['component_objects']['media_1']['data']['id'] - media_entry = card['media_entities'][media_key] - extendedInfo = getExtendedVideoOrGifInfo(media_entry) - media.append(extendedInfo['url']) - media_extended.append(extendedInfo) - break - elif i['key'] == 'photo_image_full_size_large' and 'value' in i and 'image_value' in i['value']: - imgData = i['value']['image_value'] - imgurl = imgData['url'] - media.append(imgurl) - media_extended.append({"url":imgurl,"type":"image","size":{"width":imgData['width'],"height":imgData['height']}}) - break + bindingValues = None + if 'binding_values' in tweet['card']: + bindingValues = tweet['card']['binding_values'] + elif 'legacy' in tweet['card'] and 'binding_values' in tweet['card']['legacy']: + bindingValues = tweet['card']['legacy']['binding_values'] + if bindingValues != None: + if 'name' in tweet['card'] and tweet['card']['name'] == "player": + width = None + height = None + vidUrl = None + for i in bindingValues: + if i['key'] == 'player_stream_url': + vidUrl = i['value']['string_value'] + elif i['key'] == 'player_width': + width = int(i['value']['string_value']) + elif i['key'] == 'player_height': + height = int(i['value']['string_value']) + if vidUrl != None and width != None and height != None: + media.append(vidUrl) + media_extended.append({"url":vidUrl,"type":"video","size":{"width":width,"height":height}}) + else: + for i in bindingValues: + if i['key'] == 'unified_card' and 'value' in i and 'string_value' in i['value']: + card = json.loads(i['value']['string_value']) + media_key = card['component_objects']['media_1']['data']['id'] + media_entry = card['media_entities'][media_key] + extendedInfo = getExtendedVideoOrGifInfo(media_entry) + media.append(extendedInfo['url']) + media_extended.append(extendedInfo) + break + elif i['key'] == 'photo_image_full_size_large' and 'value' in i and 'image_value' in i['value']: + imgData = i['value']['image_value'] + imgurl = imgData['url'] + media.append(imgurl) + media_extended.append({"url":imgurl,"type":"image","size":{"width":imgData['width'],"height":imgData['height']}}) + break if "article" in tweet: try: result = tweet["article"]["article_results"]["result"] From c9b4f842484ee1fa100a101a8b8affee1b17e3d2 Mon Sep 17 00:00:00 2001 From: Dylan Date: Sun, 12 Oct 2025 23:07:59 +0100 Subject: [PATCH 23/28] Remove old test --- test_vx_extract.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/test_vx_extract.py b/test_vx_extract.py index c3864a4..57d907d 100644 --- a/test_vx_extract.py +++ b/test_vx_extract.py @@ -33,11 +33,6 @@ def test_twextract_extractStatusV2TweetDetails(): assert utils.stripEndTCO(tweet["full_text"])==testMediaTweet_compare['text'] ## Tweet retrieve tests ## -def test_twextract_textTweetExtract(): - tweet = twExtract.extractStatus(testTextTweet,workaroundTokens=tokens) - assert utils.stripEndTCO(tweet["legacy"]["full_text"])==testTextTweet_compare['text'] - assert tweet["user"]["screen_name"]=="jack" - assert 'extended_entities' not in tweet def test_twextract_extractV2(): tweet = twExtract.extractStatusV2(testTextTweet,workaroundTokens=tokens) From 7a78cc08cadb91e0a1f9ff88c625ae12c83bc0fc Mon Sep 17 00:00:00 2001 From: Xeukxz Date: Thu, 16 Oct 2025 21:21:08 +0100 Subject: [PATCH 24/28] Support alternate card key --- vxApi.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/vxApi.py b/vxApi.py index 24fea75..69d7ce3 100644 --- a/vxApi.py +++ b/vxApi.py @@ -123,14 +123,15 @@ def getApiResponse(tweet,include_txt=False,include_rtf=False): if "hashtags" in tweetL["entities"]: for i in tweetL["entities"]["hashtags"]: hashtags.append(i["text"]) - elif "card" in tweet: + elif "card" in tweet or "tweet_card" in tweet: + cardData = tweet["card" if "card" in tweet else "tweet_card"] bindingValues = None - if 'binding_values' in tweet['card']: - bindingValues = tweet['card']['binding_values'] - elif 'legacy' in tweet['card'] and 'binding_values' in tweet['card']['legacy']: - bindingValues = tweet['card']['legacy']['binding_values'] + if 'binding_values' in cardData: + bindingValues = cardData['binding_values'] + elif 'legacy' in cardData and 'binding_values' in cardData['legacy']: + bindingValues = cardData['legacy']['binding_values'] if bindingValues != None: - if 'name' in tweet['card'] and tweet['card']['name'] == "player": + if 'name' in cardData and cardData['name'] == "player": width = None height = None vidUrl = None @@ -147,9 +148,9 @@ def getApiResponse(tweet,include_txt=False,include_rtf=False): else: for i in bindingValues: if i['key'] == 'unified_card' and 'value' in i and 'string_value' in i['value']: - card = json.loads(i['value']['string_value']) - media_key = card['component_objects']['media_1']['data']['id'] - media_entry = card['media_entities'][media_key] + cardData = json.loads(i['value']['string_value']) + media_key = cardData['component_objects']['media_1']['data']['id'] + media_entry = cardData['media_entities'][media_key] extendedInfo = getExtendedVideoOrGifInfo(media_entry) media.append(extendedInfo['url']) media_extended.append(extendedInfo) From 9d83c962e78f1a464b9e03012df73d8e3b180a2b Mon Sep 17 00:00:00 2001 From: Dylan Date: Mon, 27 Oct 2025 15:53:21 +0000 Subject: [PATCH 25/28] Re-enable extractStatusV2Android --- twExtract/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/twExtract/__init__.py b/twExtract/__init__.py index 7c46dd8..1e29950 100644 --- a/twExtract/__init__.py +++ b/twExtract/__init__.py @@ -32,8 +32,8 @@ v2AnonFeatures='{"creator_subscriptions_tweet_preview_api_enabled":true,"premium v2AnonGraphql_api="wqi5M7wZ7tW-X9S2t-Mqcg" gt_pattern = r'document\.cookie="gt=([^;]+);' -androidGraphqlFeatures='{"longform_notetweets_inline_media_enabled":true,"super_follow_badge_privacy_enabled":true,"longform_notetweets_rich_text_read_enabled":true,"super_follow_user_api_enabled":true,"unified_cards_ad_metadata_container_dynamic_card_content_query_enabled":true,"super_follow_tweet_api_enabled":true,"articles_api_enabled":true,"android_graphql_skip_api_media_color_palette":true,"creator_subscriptions_tweet_preview_api_enabled":true,"freedom_of_speech_not_reach_fetch_enabled":true,"tweetypie_unmention_optimization_enabled":true,"longform_notetweets_consumption_enabled":true,"subscriptions_verification_info_enabled":true,"blue_business_profile_image_shape_enabled":true,"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled":true,"immersive_video_status_linkable_timestamps":true,"super_follow_exclusive_tweet_notifications_enabled":true}' -androidGraphql_api="llQH5PFIRlenVrlKJU8jNA" +androidGraphqlFeatures='{"grok_translations_community_note_translation_is_enabled":false,"super_follow_badge_privacy_enabled":true,"longform_notetweets_rich_text_read_enabled":true,"super_follow_user_api_enabled":true,"profile_label_improvements_pcf_label_in_profile_enabled":true,"premium_content_api_read_enabled":false,"grok_translations_community_note_auto_translation_is_enabled":false,"android_graphql_skip_api_media_color_palette":true,"tweetypie_unmention_optimization_enabled":true,"longform_notetweets_consumption_enabled":true,"subscriptions_verification_info_enabled":true,"blue_business_profile_image_shape_enabled":true,"super_follow_exclusive_tweet_notifications_enabled":true,"longform_notetweets_inline_media_enabled":true,"grok_android_analyze_trend_fetch_enabled":false,"unified_cards_ad_metadata_container_dynamic_card_content_query_enabled":true,"super_follow_tweet_api_enabled":true,"articles_api_enabled":true,"creator_subscriptions_tweet_preview_api_enabled":true,"freedom_of_speech_not_reach_fetch_enabled":true,"grok_translations_timeline_user_bio_auto_translation_is_enabled":false,"grok_translations_post_auto_translation_is_enabled":false,"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled":true,"immersive_video_status_linkable_timestamps":true,"profile_label_improvements_pcf_label_in_post_enabled":true}' +androidGraphql_api="k3rtLsS9kG5hI-Jr0dTMCg" tweetDetailGraphqlFeatures='{"rweb_tipjar_consumption_enabled":true,"responsive_web_graphql_exclude_directive_enabled":true,"verified_phone_label_enabled":false,"creator_subscriptions_tweet_preview_api_enabled":true,"responsive_web_graphql_timeline_navigation_enabled":true,"responsive_web_graphql_skip_user_profile_image_extensions_enabled":false,"communities_web_enable_tweet_community_results_fetch":true,"c9s_tweet_anatomy_moderator_badge_enabled":true,"articles_preview_enabled":true,"tweetypie_unmention_optimization_enabled":true,"responsive_web_edit_tweet_api_enabled":true,"graphql_is_translatable_rweb_tweet_is_translatable_enabled":true,"view_counts_everywhere_api_enabled":true,"longform_notetweets_consumption_enabled":true,"responsive_web_twitter_article_tweet_consumption_enabled":true,"tweet_awards_web_tipping_enabled":false,"creator_subscriptions_quote_tweet_preview_enabled":false,"freedom_of_speech_not_reach_fetch_enabled":true,"standardized_nudges_misinfo":true,"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled":true,"rweb_video_timestamps_enabled":true,"longform_notetweets_rich_text_read_enabled":true,"longform_notetweets_inline_media_enabled":true,"responsive_web_enhance_cards_enabled":false}' tweetDetailGraphql_api="e7RKseIxLu7HgkWNKZ6qnw" @@ -498,7 +498,7 @@ def fixTweetData(tweet): def extractStatus(url,workaroundTokens=None): # TODO: commented out methods are too slow/unreliable at the moment - methods=[extractStatusV2Anon,extractStatusV2]#,extractStatusV2Android,extractStatusV2TweetDetail] # + methods=[extractStatusV2Anon,extractStatusV2,extractStatusV2Android]#,extractStatusV2TweetDetail] for method in methods: try: result = method(url,workaroundTokens) From e5b9fb982413fa6b57148a43fb552b6c1e582962 Mon Sep 17 00:00:00 2001 From: Dylan Date: Sun, 2 Nov 2025 19:32:35 +0000 Subject: [PATCH 26/28] Fix #303 --- test_vx_misc.py | 9 ++++++++- utils.py | 2 +- vx_testdata.py | 6 +++--- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/test_vx_misc.py b/test_vx_misc.py index ca8eb69..367fbc3 100644 --- a/test_vx_misc.py +++ b/test_vx_misc.py @@ -1,10 +1,17 @@ -import twitfix, cache, twExtract +import twitfix, cache, twExtract, utils from vx_testdata import * from twExtract import twUtils def test_calcSyndicationToken(): assert twUtils.calcSyndicationToken("1691389765483200513") == "43lnobuxzql" +def test_stripEndTCO(): + assert utils.stripEndTCO("Hello World https://t.co/abc123") == "Hello World" + assert utils.stripEndTCO("Hello\nWorld https://t.co/abc123") == "Hello\nWorld" + assert utils.stripEndTCO("Hello\nWorld\nhttps://t.co/abc123") == "Hello\nWorld" + assert utils.stripEndTCO("Hello\nWorld\n https://t.co/abc123") == "Hello\nWorld" + assert utils.stripEndTCO("Hello\nWorld \nhttps://t.co/abc123") == "Hello\nWorld" + def test_addToCache(): cache.clearCache() twitfix.getTweetData(testTextTweet) diff --git a/utils.py b/utils.py index b3656f8..3a305d7 100644 --- a/utils.py +++ b/utils.py @@ -3,7 +3,7 @@ import io from configHandler import config pathregex = re.compile("\\w{1,15}\\/(status|statuses)\\/(\\d{2,20})") -endTCOregex = re.compile("(^.*?) +https:\/\/t.co\/.*?$") +endTCOregex = re.compile("(^.*?)[ \n]+https:\/\/t.co\/.*?$",flags=re.DOTALL) def getTweetIdFromUrl(url): match = pathregex.search(url) diff --git a/vx_testdata.py b/vx_testdata.py index f80308b..3ec28ad 100644 --- a/vx_testdata.py +++ b/vx_testdata.py @@ -15,12 +15,12 @@ testVinePlayerTweet="https://twitter.com/Roblox/status/583302104342638592" testRetweetTweet="https://twitter.com/pdxdylan/status/1828570470222045294" testTextTweet_compare={'text': 'just setting up my twttr', 'date': 'Tue Mar 21 20:50:14 +0000 2006', 'tweetURL': 'https://twitter.com/jack/status/20', 'tweetID': '20', 'conversationID': '20', 'mediaURLs': [], 'media_extended': [], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': True, 'hasMedia': False, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'lang': 'en', 'replyingTo': None, 'replyingToID': None, 'retweetURL': None, 'date_epoch': 1142974214} -testVideoTweet_compare={'text': 'TikTok embeds on Discord/Telegram bait you with a fake play button, but to see the actual video you have to go to their website.\nAs a request from a friend, I made it so that if you add "vx" before "tiktok" on any link, it fixes that. https://t.co/QYpiVXUIrW', 'date': 'Fri Jun 24 18:17:31 +0000 2022', 'tweetURL': 'https://twitter.com/pdxdylan/status/1540398733669666818', 'tweetID': '1540398733669666818', 'conversationID': '1540398733669666818', 'mediaURLs': ['https://video.twimg.com/ext_tw_video/1540396699037929472/pu/vid/762x528/YxbXbT3X7vq4LWfC.mp4'], 'media_extended': [{'url': 'https://video.twimg.com/ext_tw_video/1540396699037929472/pu/vid/762x528/YxbXbT3X7vq4LWfC.mp4', 'type': 'video', 'size': {'width': 762, 'height': 528}, 'duration_millis': 13650, 'thumbnail_url': 'https://pbs.twimg.com/ext_tw_video_thumb/1540396699037929472/pu/img/l187Z6B9AHHxUKPV.jpg', 'altText': None, 'id_str': '1540396699037929472'}], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': True, 'hasMedia': True, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'lang': 'en', 'replyingTo': None, 'replyingToID': None, 'retweetURL': None, 'date_epoch': 1656094651} +testVideoTweet_compare={'text': 'TikTok embeds on Discord/Telegram bait you with a fake play button, but to see the actual video you have to go to their website.\nAs a request from a friend, I made it so that if you add "vx" before "tiktok" on any link, it fixes that.', 'date': 'Fri Jun 24 18:17:31 +0000 2022', 'tweetURL': 'https://twitter.com/pdxdylan/status/1540398733669666818', 'tweetID': '1540398733669666818', 'conversationID': '1540398733669666818', 'mediaURLs': ['https://video.twimg.com/ext_tw_video/1540396699037929472/pu/vid/762x528/YxbXbT3X7vq4LWfC.mp4'], 'media_extended': [{'url': 'https://video.twimg.com/ext_tw_video/1540396699037929472/pu/vid/762x528/YxbXbT3X7vq4LWfC.mp4', 'type': 'video', 'size': {'width': 762, 'height': 528}, 'duration_millis': 13650, 'thumbnail_url': 'https://pbs.twimg.com/ext_tw_video_thumb/1540396699037929472/pu/img/l187Z6B9AHHxUKPV.jpg', 'altText': None, 'id_str': '1540396699037929472'}], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': True, 'hasMedia': True, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'lang': 'en', 'replyingTo': None, 'replyingToID': None, 'retweetURL': None, 'date_epoch': 1656094651} testMediaTweet_compare={'text': 'oh.', 'date': 'Wed Jun 08 23:05:14 +0000 2022', 'tweetURL': 'https://twitter.com/pdxdylan/status/1534672932106035200', 'tweetID': '1534672932106035200', 'conversationID': '1534672673422381057', 'mediaURLs': ['https://pbs.twimg.com/media/FUxAt5LWUAMol0N.png'], 'media_extended': [{'url': 'https://pbs.twimg.com/media/FUxAt5LWUAMol0N.png', 'altText': None, 'type': 'image', 'size': {'width': 927, 'height': 534}, 'thumbnail_url': 'https://pbs.twimg.com/media/FUxAt5LWUAMol0N.png', 'id_str': '1534672730213208067'}], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': True, 'hasMedia': True, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'lang': 'und', 'replyingTo': 'pdxdylan', 'replyingToID': '1534672673422381057', 'retweetURL': None, 'date_epoch': 1654729514} -testMultiMediaTweet_compare={'text': 'Released #Retro64 1.0.9. Besides a lot of internal bug-fixes, this adds quicksand blocks, fixes the rendering for the castle stairs block, and adds a new model, Sonic! \nhttps://github.com/Retro64Mod/Retro64Mod/releases/tag/1.18.2-1.0.9 https://t.co/CWZaw4hzyg', 'date': 'Wed Jun 01 14:29:32 +0000 2022', 'tweetURL': 'https://twitter.com/pdxdylan/status/1532006436703715331', 'tweetID': '1532006436703715331', 'conversationID': '1532006436703715331', 'mediaURLs': ['https://pbs.twimg.com/media/FULF9oxXwAMDI-C.png', 'https://pbs.twimg.com/media/FULGaHkWYAIBV5U.png', 'https://pbs.twimg.com/media/FULGiZnWQAMBRWl.png'], 'media_extended': [{'url': 'https://pbs.twimg.com/media/FULF9oxXwAMDI-C.png', 'altText': None, 'type': 'image', 'size': {'width': 507, 'height': 507}, 'thumbnail_url': 'https://pbs.twimg.com/media/FULF9oxXwAMDI-C.png', 'id_str': '1532004485966577667'}, {'url': 'https://pbs.twimg.com/media/FULGaHkWYAIBV5U.png', 'altText': None, 'type': 'image', 'size': {'width': 396, 'height': 431}, 'thumbnail_url': 'https://pbs.twimg.com/media/FULGaHkWYAIBV5U.png', 'id_str': '1532004975269797890'}, {'url': 'https://pbs.twimg.com/media/FULGiZnWQAMBRWl.png', 'altText': None, 'type': 'image', 'size': {'width': 399, 'height': 341}, 'thumbnail_url': 'https://pbs.twimg.com/media/FULGiZnWQAMBRWl.png', 'id_str': '1532005117553164291'}], 'possibly_sensitive': False, 'hashtags': ['Retro64'], 'qrtURL': None, 'allSameType': True, 'hasMedia': True, 'combinedMediaUrl': 'https://vxtwitter.com/rendercombined.jpg?imgs=https://pbs.twimg.com/media/FULF9oxXwAMDI-C.png,https://pbs.twimg.com/media/FULGaHkWYAIBV5U.png,https://pbs.twimg.com/media/FULGiZnWQAMBRWl.png', 'pollData': None, 'article': None, 'lang': 'en', 'replyingTo': None, 'replyingToID': None, 'retweetURL': None, 'date_epoch': 1654093772} +testMultiMediaTweet_compare={'text': 'Released #Retro64 1.0.9. Besides a lot of internal bug-fixes, this adds quicksand blocks, fixes the rendering for the castle stairs block, and adds a new model, Sonic! \nhttps://github.com/Retro64Mod/Retro64Mod/releases/tag/1.18.2-1.0.9', 'date': 'Wed Jun 01 14:29:32 +0000 2022', 'tweetURL': 'https://twitter.com/pdxdylan/status/1532006436703715331', 'tweetID': '1532006436703715331', 'conversationID': '1532006436703715331', 'mediaURLs': ['https://pbs.twimg.com/media/FULF9oxXwAMDI-C.png', 'https://pbs.twimg.com/media/FULGaHkWYAIBV5U.png', 'https://pbs.twimg.com/media/FULGiZnWQAMBRWl.png'], 'media_extended': [{'url': 'https://pbs.twimg.com/media/FULF9oxXwAMDI-C.png', 'altText': None, 'type': 'image', 'size': {'width': 507, 'height': 507}, 'thumbnail_url': 'https://pbs.twimg.com/media/FULF9oxXwAMDI-C.png', 'id_str': '1532004485966577667'}, {'url': 'https://pbs.twimg.com/media/FULGaHkWYAIBV5U.png', 'altText': None, 'type': 'image', 'size': {'width': 396, 'height': 431}, 'thumbnail_url': 'https://pbs.twimg.com/media/FULGaHkWYAIBV5U.png', 'id_str': '1532004975269797890'}, {'url': 'https://pbs.twimg.com/media/FULGiZnWQAMBRWl.png', 'altText': None, 'type': 'image', 'size': {'width': 399, 'height': 341}, 'thumbnail_url': 'https://pbs.twimg.com/media/FULGiZnWQAMBRWl.png', 'id_str': '1532005117553164291'}], 'possibly_sensitive': False, 'hashtags': ['Retro64'], 'qrtURL': None, 'allSameType': True, 'hasMedia': True, 'combinedMediaUrl': 'https://vxtwitter.com/rendercombined.jpg?imgs=https://pbs.twimg.com/media/FULF9oxXwAMDI-C.png,https://pbs.twimg.com/media/FULGaHkWYAIBV5U.png,https://pbs.twimg.com/media/FULGiZnWQAMBRWl.png', 'pollData': None, 'article': None, 'lang': 'en', 'replyingTo': None, 'replyingToID': None, 'retweetURL': None, 'date_epoch': 1654093772} testQRTTweet_compare={'text': "vxTwitter has gotten a *ton* of usage recently, so I'd appreciate a donation to keep things running!\nhttps://x.com/pdxdylan/status/1518309187515781125", 'date': 'Fri Jan 06 21:37:43 +0000 2023', 'tweetURL': 'https://twitter.com/pdxdylan/status/1611477137319514129', 'tweetID': '1611477137319514129', 'conversationID': '1611476665821003776', 'mediaURLs': [], 'media_extended': [], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': 'https://twitter.com/i/status/1518309187515781125', 'allSameType': True, 'hasMedia': False, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'lang': 'en', 'replyingTo': 'pdxdylan', 'replyingToID': '1611476665821003776', 'retweetURL': None, 'date_epoch': 1673041063} testQrtVideoTweet_compare={'text': 'good', 'date': 'Thu Jun 29 23:33:29 +0000 2023', 'tweetURL': 'https://twitter.com/pdxdylan/status/1674561759422578690', 'tweetID': '1674561759422578690', 'conversationID': '1674561759422578690', 'mediaURLs': [], 'media_extended': [], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': 'https://twitter.com/i/status/1674197531301904388', 'allSameType': True, 'hasMedia': False, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'lang': 'en', 'replyingTo': None, 'replyingToID': None, 'retweetURL': None, 'date_epoch': 1688081609} -testNSFWTweet_compare={'text': "ngl, I'm scared on finding out the cute Sprigatito's final evolution..\n\nso i had a bot generate it for me.... and I'm forever scarred https://t.co/itMay87vcS", 'date': 'Sat Oct 15 07:28:42 +0000 2022', 'tweetURL': 'https://twitter.com/kuyacoy/status/1581185279376838657', 'tweetID': '1581185279376838657', 'conversationID': '1581185279376838657', 'mediaURLs': ['https://pbs.twimg.com/media/FfF_gKwXgAIpnpD.jpg'], 'media_extended': [{'url': 'https://pbs.twimg.com/media/FfF_gKwXgAIpnpD.jpg', 'altText': None, 'type': 'image', 'size': {'width': 760, 'height': 926}, 'thumbnail_url': 'https://pbs.twimg.com/media/FfF_gKwXgAIpnpD.jpg', 'id_str': '1581185134803517442'}], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': True, 'hasMedia': True, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'lang': 'en', 'replyingTo': None, 'replyingToID': None, 'retweetURL': None, 'date_epoch': 1665818922} +testNSFWTweet_compare={'text': "ngl, I'm scared on finding out the cute Sprigatito's final evolution..\n\nso i had a bot generate it for me.... and I'm forever scarred", 'date': 'Sat Oct 15 07:28:42 +0000 2022', 'tweetURL': 'https://twitter.com/kuyacoy/status/1581185279376838657', 'tweetID': '1581185279376838657', 'conversationID': '1581185279376838657', 'mediaURLs': ['https://pbs.twimg.com/media/FfF_gKwXgAIpnpD.jpg'], 'media_extended': [{'url': 'https://pbs.twimg.com/media/FfF_gKwXgAIpnpD.jpg', 'altText': None, 'type': 'image', 'size': {'width': 760, 'height': 926}, 'thumbnail_url': 'https://pbs.twimg.com/media/FfF_gKwXgAIpnpD.jpg', 'id_str': '1581185134803517442'}], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': True, 'hasMedia': True, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'lang': 'en', 'replyingTo': None, 'replyingToID': None, 'retweetURL': None, 'date_epoch': 1665818922} testPollTweet_compare={'text': 'I know when that hotline bling, that can only:', 'date': 'Mon Oct 05 22:57:25 +0000 2015', 'tweetURL': 'https://twitter.com/norm/status/651169346518056960', 'tweetID': '651169346518056960', 'conversationID': '651169346518056960', 'mediaURLs': [], 'media_extended': [], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': True, 'hasMedia': False, 'combinedMediaUrl': None, 'pollData': {'options': [{'name': 'Mean one thing', 'votes': 124875, 'percent': 78.82}, {'name': 'Mean multiple things', 'votes': 33554, 'percent': 21.18}]}, 'article': None, 'lang': 'en', 'replyingTo': None, 'replyingToID': None, 'retweetURL': None, 'date_epoch': 1444085845} testMixedMediaTweet_compare={'text': 'Some of us here are definitely big nerds about beer, and could talk your ear off about it for days on end, but some of us are just "beer is nice"', 'date': 'Thu Feb 22 12:13:24 +0000 2024', 'tweetURL': 'https://twitter.com/salebeerfest/status/1760638922084741177', 'tweetID': '1760638922084741177', 'conversationID': '1760638922084741177', 'mediaURLs': ['https://pbs.twimg.com/media/GG8LwfuWoAANKhs.jpg', 'https://video.twimg.com/tweet_video/GG8LwqWX0AAZch0.mp4'], 'media_extended': [{'url': 'https://pbs.twimg.com/media/GG8LwfuWoAANKhs.jpg', 'altText': None, 'type': 'image', 'size': {'width': 858, 'height': 960}, 'thumbnail_url': 'https://pbs.twimg.com/media/GG8LwfuWoAANKhs.jpg', 'id_str': '1760638907102699520'}, {'url': 'https://video.twimg.com/tweet_video/GG8LwqWX0AAZch0.mp4', 'type': 'gif', 'size': {'width': 500, 'height': 500}, 'duration_millis': 0, 'thumbnail_url': 'https://pbs.twimg.com/tweet_video_thumb/GG8LwqWX0AAZch0.jpg', 'altText': None, 'id_str': '1760638909954904064'}], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': False, 'hasMedia': True, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'lang': 'en', 'replyingTo': None, 'replyingToID': None, 'retweetURL': None, 'date_epoch': 1708604004} testVinePlayerTweet_compare={'text': 'You wanted old ROBLOX back, you got it. Check out our sweet "new" look! #BringBackOldROBLOX https://vine.co/v/OL9VqvM6wJh', 'date': 'Wed Apr 01 16:17:13 +0000 2015', 'tweetURL': 'https://twitter.com/Roblox/status/583302104342638592', 'tweetID': '583302104342638592', 'conversationID': '583302104342638592', 'mediaURLs': ['https://v.cdn.vine.co/r/videos/20A1BE53011195086166081318912_3fe3b526b1a.1.5.3156516531034157495.mp4?versionId=DI1mMu7EI6zcLbvgucyp3GHebdz8.9cQ'], 'media_extended': [{'url': 'https://v.cdn.vine.co/r/videos/20A1BE53011195086166081318912_3fe3b526b1a.1.5.3156516531034157495.mp4?versionId=DI1mMu7EI6zcLbvgucyp3GHebdz8.9cQ', 'type': 'video', 'size': {'width': 435, 'height': 435}}], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': True, 'hasMedia': True, 'combinedMediaUrl': None, 'pollData': {'options': []}, 'article': None, 'lang': 'en', 'replyingTo': None, 'replyingToID': None, 'retweetURL': None, 'date_epoch': 1427905033} From 082cb17347a918a1cb2e7a3218e818e0ccd87f42 Mon Sep 17 00:00:00 2001 From: Dylan Date: Tue, 4 Nov 2025 17:41:08 +0000 Subject: [PATCH 27/28] Don't use explicit bearer token --- twExtract/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/twExtract/__init__.py b/twExtract/__init__.py index 1e29950..c854b61 100644 --- a/twExtract/__init__.py +++ b/twExtract/__init__.py @@ -279,7 +279,7 @@ def extractStatusV2(url,workaroundTokens): def request_with_token(twid, authToken): vars = json.loads('{"includeTweetImpression":true,"includeHasBirdwatchNotes":false,"includeEditPerspective":false,"rest_ids":["x"],"includeEditControl":true,"includeCommunityTweetRelationship":true,"includeTweetVisibilityNudge":true}') vars['rest_ids'][0] = str(twid) - tweet = twitterApiGet(f"https://x.com/i/api/graphql/{v2graphql_api}/TweetResultsByIdsQuery?variables={urllib.parse.quote(json.dumps(vars))}&features={urllib.parse.quote(v2Features)}",authToken=authToken,btoken=v2bearer) + tweet = twitterApiGet(f"https://x.com/i/api/graphql/{v2graphql_api}/TweetResultsByIdsQuery?variables={urllib.parse.quote(json.dumps(vars))}&features={urllib.parse.quote(v2Features)}",authToken=authToken) try: rateLimitRemaining = tweet.headers.get("x-rate-limit-remaining") print(f"Twitter Token Rate limit remaining: {rateLimitRemaining}") From 489bbdf0267722e5a93d06f5bf4317489b734330 Mon Sep 17 00:00:00 2001 From: Dylan Date: Thu, 6 Nov 2025 21:30:29 +0000 Subject: [PATCH 28/28] Stop usage of tweetdeck bearer --- test_vx_extract.py | 10 +++++----- twExtract/__init__.py | 29 +++++++++++++++++++++-------- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/test_vx_extract.py b/test_vx_extract.py index 57d907d..f72afef 100644 --- a/test_vx_extract.py +++ b/test_vx_extract.py @@ -9,14 +9,14 @@ def test_twextract_syndicationAPI(): tweet = twExtract.extractStatus_syndication(testMediaTweet,workaroundTokens=tokens) assert utils.stripEndTCO(utils.stripEndTCO(tweet["full_text"]))==testMediaTweet_compare['text'] -def test_twextract_extractStatusV2Anon(): - tweet = twExtract.extractStatusV2Anon(testTextTweet,None)['legacy'] +def test_twextract_extractStatusV2Rest(): + tweet = twExtract.extractStatusV2Rest(testTextTweet,None)['legacy'] assert utils.stripEndTCO(tweet["full_text"])==testTextTweet_compare['text'] - tweet = twExtract.extractStatusV2Anon(testVideoTweet,None)['legacy'] + tweet = twExtract.extractStatusV2Rest(testVideoTweet,None)['legacy'] assert utils.stripEndTCO(tweet["full_text"])==testVideoTweet_compare['text'] - tweet = twExtract.extractStatusV2Anon(testMediaTweet,None)['legacy'] + tweet = twExtract.extractStatusV2Rest(testMediaTweet,None)['legacy'] assert utils.stripEndTCO(tweet["full_text"])==testMediaTweet_compare['text'] - tweet = twExtract.extractStatusV2Anon(testMultiMediaTweet,None)['legacy'] + tweet = twExtract.extractStatusV2Rest(testMultiMediaTweet,None)['legacy'] assert utils.stripEndTCO(tweet["full_text"])[:94]==testMultiMediaTweet_compare['text'][:94] diff --git a/twExtract/__init__.py b/twExtract/__init__.py index c854b61..88f1469 100644 --- a/twExtract/__init__.py +++ b/twExtract/__init__.py @@ -13,11 +13,10 @@ import concurrent.futures bearer="Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw" v2bearer="Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA" androidBearer="Bearer AAAAAAAAAAAAAAAAAAAAAFXzAwAAAAAAMHCxpeSDG1gLNLghVe8d74hl6k4%3DRUMF4xAQLsbeBhTSRrCiQpJtxoGWeyHrDb5te2jpGskWDFW82F" -tweetdeckBearer="Bearer AAAAAAAAAAAAAAAAAAAAAFQODgEAAAAAVHTp76lzh3rFzcHbmHVvQxYYpTw%3DckAlMINMjmCwxUcaXbAN4XqJVdgMJaHqNOFgPMK0zN1qLqLQCF" requestUserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:138.0) Gecko/20100101 Firefox/138.0" -bearerTokens=[tweetdeckBearer,bearer,v2bearer,androidBearer] +bearerTokens=[bearer,v2bearer,androidBearer] guestToken=None guestTokenUses=0 @@ -36,7 +35,7 @@ androidGraphqlFeatures='{"grok_translations_community_note_translation_is_enable androidGraphql_api="k3rtLsS9kG5hI-Jr0dTMCg" tweetDetailGraphqlFeatures='{"rweb_tipjar_consumption_enabled":true,"responsive_web_graphql_exclude_directive_enabled":true,"verified_phone_label_enabled":false,"creator_subscriptions_tweet_preview_api_enabled":true,"responsive_web_graphql_timeline_navigation_enabled":true,"responsive_web_graphql_skip_user_profile_image_extensions_enabled":false,"communities_web_enable_tweet_community_results_fetch":true,"c9s_tweet_anatomy_moderator_badge_enabled":true,"articles_preview_enabled":true,"tweetypie_unmention_optimization_enabled":true,"responsive_web_edit_tweet_api_enabled":true,"graphql_is_translatable_rweb_tweet_is_translatable_enabled":true,"view_counts_everywhere_api_enabled":true,"longform_notetweets_consumption_enabled":true,"responsive_web_twitter_article_tweet_consumption_enabled":true,"tweet_awards_web_tipping_enabled":false,"creator_subscriptions_quote_tweet_preview_enabled":false,"freedom_of_speech_not_reach_fetch_enabled":true,"standardized_nudges_misinfo":true,"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled":true,"rweb_video_timestamps_enabled":true,"longform_notetweets_rich_text_read_enabled":true,"longform_notetweets_inline_media_enabled":true,"responsive_web_enhance_cards_enabled":false}' -tweetDetailGraphql_api="e7RKseIxLu7HgkWNKZ6qnw" +tweetDetailGraphql_api="YVyS4SfwYW7Uw5qwy0mQCA" # this is for UserTweets endpoint tweetFeedGraphqlFeatures='{"rweb_video_screen_enabled":false,"profile_label_improvements_pcf_label_in_post_enabled":true,"rweb_tipjar_consumption_enabled":true,"verified_phone_label_enabled":false,"creator_subscriptions_tweet_preview_api_enabled":true,"responsive_web_graphql_timeline_navigation_enabled":true,"responsive_web_graphql_skip_user_profile_image_extensions_enabled":false,"premium_content_api_read_enabled":false,"communities_web_enable_tweet_community_results_fetch":true,"c9s_tweet_anatomy_moderator_badge_enabled":true,"responsive_web_grok_analyze_button_fetch_trends_enabled":false,"responsive_web_grok_analyze_post_followups_enabled":true,"responsive_web_jetfuel_frame":false,"responsive_web_grok_share_attachment_enabled":true,"articles_preview_enabled":true,"responsive_web_edit_tweet_api_enabled":true,"graphql_is_translatable_rweb_tweet_is_translatable_enabled":true,"view_counts_everywhere_api_enabled":true,"longform_notetweets_consumption_enabled":true,"responsive_web_twitter_article_tweet_consumption_enabled":true,"tweet_awards_web_tipping_enabled":false,"responsive_web_grok_show_grok_translated_post":false,"responsive_web_grok_analysis_button_from_backend":true,"creator_subscriptions_quote_tweet_preview_enabled":false,"freedom_of_speech_not_reach_fetch_enabled":true,"standardized_nudges_misinfo":true,"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled":true,"longform_notetweets_rich_text_read_enabled":true,"longform_notetweets_inline_media_enabled":true,"responsive_web_grok_image_annotation_enabled":true,"responsive_web_enhance_cards_enabled":false}' @@ -116,7 +115,7 @@ def cycleBearerTokenGet(url,headers): def twitterApiGet(url,btoken=None,authToken=None,guestToken=None): - if authToken.startswith("oa|"): + if authToken != None and authToken.startswith("oa|"): url = url.replace("https://x.com/i/api/graphql/","https://api.twitter.com/graphql/") authToken = authToken[3:] key = authToken.split("|")[0] @@ -132,7 +131,8 @@ def twitterApiGet(url,btoken=None,authToken=None,guestToken=None): response = requests.get(url,headers=headers) else: if btoken is None: - return cycleBearerTokenGet(url,getAuthHeaders(bearer,authToken=authToken,guestToken=guestToken)) + btoken = v2bearer + #return cycleBearerTokenGet(url,getAuthHeaders(bearer,authToken=authToken,guestToken=guestToken)) headers = getAuthHeaders(btoken,authToken=authToken,guestToken=guestToken) response = requests.get(url, headers=headers) @@ -437,7 +437,10 @@ def extractStatusV2TweetDetail(url,workaroundTokens): return tweet return parallel_token_request(twid, tokens, request_with_token) -def extractStatusV2Anon(url,x): +def extractStatusV2Rest_Anon(url,workaroundTokens): + return extractStatusV2Rest(url,None) + +def extractStatusV2Rest(url,workaroundTokens): # get tweet ID m = re.search(pathregex, url) if m is None: @@ -450,7 +453,17 @@ def extractStatusV2Anon(url,x): try: vars = json.loads('{"tweetId":"0","withCommunity":false,"includePromotedContent":false,"withVoice":false}') vars['tweetId'] = str(twid) - tweet = requests.get(f"https://x.com/i/api/graphql/{v2AnonGraphql_api}/TweetResultByRestId?variables={urllib.parse.quote(json.dumps(vars))}&features={urllib.parse.quote(v2AnonFeatures)}", headers=getAuthHeaders(v2bearer,guestToken=guestToken)) + if workaroundTokens is not None and len(workaroundTokens) > 0: + tokens = workaroundTokens + random.shuffle(tokens) + for authToken in tokens: + try: + tweet = twitterApiGet(f"https://x.com/i/api/graphql/{v2AnonGraphql_api}/TweetResultByRestId?variables={urllib.parse.quote(json.dumps(vars))}&features={urllib.parse.quote(v2AnonFeatures)}", btoken=v2bearer,authToken=authToken,guestToken=guestToken) + except Exception as e: + continue + else: + tweet = twitterApiGet(f"https://x.com/i/api/graphql/{v2AnonGraphql_api}/TweetResultByRestId?variables={urllib.parse.quote(json.dumps(vars))}&features={urllib.parse.quote(v2AnonFeatures)}", btoken=v2bearer,guestToken=guestToken) + try: rateLimitRemaining = tweet.headers.get("x-rate-limit-remaining") print(f"Twitter Anon Token Rate limit remaining: {rateLimitRemaining}") @@ -498,7 +511,7 @@ def fixTweetData(tweet): def extractStatus(url,workaroundTokens=None): # TODO: commented out methods are too slow/unreliable at the moment - methods=[extractStatusV2Anon,extractStatusV2,extractStatusV2Android]#,extractStatusV2TweetDetail] + methods=[extractStatusV2Rest_Anon,extractStatusV2,extractStatusV2Rest,extractStatusV2Android]#,extractStatusV2TweetDetail] for method in methods: try: result = method(url,workaroundTokens)