diff --git a/requirements.txt b/requirements.txt index f365729..ef9f74d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ pymongo==4.8.0 -boto3==1.35.18 +boto3==1.36.6 requests==2.32.3 Pillow==10.4.0 Flask==2.2.3 diff --git a/serverless.yml b/serverless.yml index 9ef561b..990545a 100644 --- a/serverless.yml +++ b/serverless.yml @@ -47,7 +47,7 @@ functions: handler: wsgi_handler.handler url: true timeout: 15 - memorySize: 500 + memorySize: 128 layers: - Ref: PythonRequirementsLambdaLayer diff --git a/test_api.py b/test_api.py index 26222a6..4fe58d0 100644 --- a/test_api.py +++ b/test_api.py @@ -48,6 +48,10 @@ def test_api_include_rtf_nomedia(): assert resp.status_code==200 assert not any(".rtf" in i for i in jData["mediaURLs"]) +def test_api_mixedmedia(): + resp = client.get(testMixedMediaTweet.replace("https://twitter.com","https://api.vxtwitter.com")+"?include_txt=true",headers={"User-Agent":"test"}) + assert resp.status_code==200 + def test_api_user(): resp = client.get(testUser.replace("https://twitter.com","https://api.vxtwitter.com"),headers={"User-Agent":"test"}) jData = resp.get_json() @@ -57,7 +61,6 @@ def test_api_user(): def test_api_user_suspended(): resp = client.get(testUserSuspended.replace("https://twitter.com","https://api.vxtwitter.com"),headers={"User-Agent":"test"}) jData = resp.get_json() - assert resp.status_code==500 assert 'suspended' in jData["error"] def test_api_user_private(): diff --git a/test_vx_extract.py b/test_vx_extract.py index 18613e0..f72afef 100644 --- a/test_vx_extract.py +++ b/test_vx_extract.py @@ -3,19 +3,20 @@ import os import twExtract import utils from vx_testdata import * +import twitfix def test_twextract_syndicationAPI(): tweet = twExtract.extractStatus_syndication(testMediaTweet,workaroundTokens=tokens) assert utils.stripEndTCO(utils.stripEndTCO(tweet["full_text"]))==testMediaTweet_compare['text'] -def test_twextract_extractStatusV2Anon(): - tweet = twExtract.extractStatusV2Anon(testTextTweet,None)['legacy'] +def test_twextract_extractStatusV2Rest(): + tweet = twExtract.extractStatusV2Rest(testTextTweet,None)['legacy'] assert utils.stripEndTCO(tweet["full_text"])==testTextTweet_compare['text'] - tweet = twExtract.extractStatusV2Anon(testVideoTweet,None)['legacy'] + tweet = twExtract.extractStatusV2Rest(testVideoTweet,None)['legacy'] assert utils.stripEndTCO(tweet["full_text"])==testVideoTweet_compare['text'] - tweet = twExtract.extractStatusV2Anon(testMediaTweet,None)['legacy'] + tweet = twExtract.extractStatusV2Rest(testMediaTweet,None)['legacy'] assert utils.stripEndTCO(tweet["full_text"])==testMediaTweet_compare['text'] - tweet = twExtract.extractStatusV2Anon(testMultiMediaTweet,None)['legacy'] + tweet = twExtract.extractStatusV2Rest(testMultiMediaTweet,None)['legacy'] assert utils.stripEndTCO(tweet["full_text"])[:94]==testMultiMediaTweet_compare['text'][:94] @@ -32,30 +33,28 @@ def test_twextract_extractStatusV2TweetDetails(): assert utils.stripEndTCO(tweet["full_text"])==testMediaTweet_compare['text'] ## Tweet retrieve tests ## -def test_twextract_textTweetExtract(): - tweet = twExtract.extractStatus(testTextTweet,workaroundTokens=tokens) - assert utils.stripEndTCO(tweet["legacy"]["full_text"])==testTextTweet_compare['text'] - assert tweet["user"]["screen_name"]=="jack" - assert 'extended_entities' not in tweet -def test_twextract_extractV2(): # remove this when v2 is default +def test_twextract_extractV2(): tweet = twExtract.extractStatusV2(testTextTweet,workaroundTokens=tokens) def test_twextract_UserExtract(): - user = twExtract.extractUser(testUser,workaroundTokens=tokens) + rawUserData = twExtract.extractUser(testUser,workaroundTokens=tokens) + user = twitfix.getApiUserResponse(rawUserData) assert user["screen_name"]=="jack" assert user["id"]==12 assert user["created_at"] == "Tue Mar 21 20:50:14 +0000 2006" def test_twextract_UserExtractID(): - user = twExtract.extractUser(testUserIDUrl,workaroundTokens=tokens) + rawUserData = twExtract.extractUser(testUserIDUrl,workaroundTokens=tokens) + user = twitfix.getApiUserResponse(rawUserData) assert user["screen_name"]=="jack" assert user["id"]==12 assert user["created_at"] == "Tue Mar 21 20:50:14 +0000 2006" def test_twextract_UserExtractWeirdURLs(): for url in testUserWeirdURLs: - user = twExtract.extractUser(url,workaroundTokens=tokens) + rawUserData = twExtract.extractUser(url,workaroundTokens=tokens) + user = twitfix.getApiUserResponse(rawUserData) assert user["screen_name"]=="jack" assert user["id"]==12 assert user["created_at"] == "Tue Mar 21 20:50:14 +0000 2006" diff --git a/test_vx_misc.py b/test_vx_misc.py index ca8eb69..367fbc3 100644 --- a/test_vx_misc.py +++ b/test_vx_misc.py @@ -1,10 +1,17 @@ -import twitfix, cache, twExtract +import twitfix, cache, twExtract, utils from vx_testdata import * from twExtract import twUtils def test_calcSyndicationToken(): assert twUtils.calcSyndicationToken("1691389765483200513") == "43lnobuxzql" +def test_stripEndTCO(): + assert utils.stripEndTCO("Hello World https://t.co/abc123") == "Hello World" + assert utils.stripEndTCO("Hello\nWorld https://t.co/abc123") == "Hello\nWorld" + assert utils.stripEndTCO("Hello\nWorld\nhttps://t.co/abc123") == "Hello\nWorld" + assert utils.stripEndTCO("Hello\nWorld\n https://t.co/abc123") == "Hello\nWorld" + assert utils.stripEndTCO("Hello\nWorld \nhttps://t.co/abc123") == "Hello\nWorld" + def test_addToCache(): cache.clearCache() twitfix.getTweetData(testTextTweet) diff --git a/testgen.py b/testgen.py index 473eafb..e35f8a2 100644 --- a/testgen.py +++ b/testgen.py @@ -32,5 +32,6 @@ with open('generated.txt', 'w',encoding='utf-8') as f: del VNF['user_name'] del VNF['user_profile_image_url'] del VNF['communityNote'] + del VNF['fetched_on'] # write in a format that can be copy-pasted into a python file, i.e testTextTweet={... f.write(f"{test}_compare={VNF}\n") \ No newline at end of file diff --git a/twExtract/__init__.py b/twExtract/__init__.py index e4dccae..88f1469 100644 --- a/twExtract/__init__.py +++ b/twExtract/__init__.py @@ -9,14 +9,14 @@ from oauthlib import oauth1 import sys sys.path.append(os.path.dirname(os.path.realpath(__file__))) import twUtils +import concurrent.futures bearer="Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw" v2bearer="Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA" androidBearer="Bearer AAAAAAAAAAAAAAAAAAAAAFXzAwAAAAAAMHCxpeSDG1gLNLghVe8d74hl6k4%3DRUMF4xAQLsbeBhTSRrCiQpJtxoGWeyHrDb5te2jpGskWDFW82F" -tweetdeckBearer="Bearer AAAAAAAAAAAAAAAAAAAAAFQODgEAAAAAVHTp76lzh3rFzcHbmHVvQxYYpTw%3DckAlMINMjmCwxUcaXbAN4XqJVdgMJaHqNOFgPMK0zN1qLqLQCF" requestUserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:138.0) Gecko/20100101 Firefox/138.0" -bearerTokens=[tweetdeckBearer,bearer,v2bearer,androidBearer] +bearerTokens=[bearer,v2bearer,androidBearer] guestToken=None guestTokenUses=0 @@ -27,21 +27,28 @@ userIDregex = r"\/i\/user\/(\d+)" v2Features='{"longform_notetweets_inline_media_enabled":true,"super_follow_badge_privacy_enabled":true,"longform_notetweets_rich_text_read_enabled":true,"super_follow_user_api_enabled":true,"super_follow_tweet_api_enabled":true,"android_graphql_skip_api_media_color_palette":true,"creator_subscriptions_tweet_preview_api_enabled":true,"freedom_of_speech_not_reach_fetch_enabled":true,"creator_subscriptions_subscription_count_enabled":true,"tweetypie_unmention_optimization_enabled":true,"longform_notetweets_consumption_enabled":true,"subscriptions_verification_info_enabled":true,"blue_business_profile_image_shape_enabled":true,"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled":true,"super_follow_exclusive_tweet_notifications_enabled":true}' v2graphql_api="2OOZWmw8nAtUHVnXXQhgaA" -v2AnonFeatures='{"creator_subscriptions_tweet_preview_api_enabled":true,"communities_web_enable_tweet_community_results_fetch":true,"c9s_tweet_anatomy_moderator_badge_enabled":true,"articles_preview_enabled":true,"tweetypie_unmention_optimization_enabled":true,"responsive_web_edit_tweet_api_enabled":true,"graphql_is_translatable_rweb_tweet_is_translatable_enabled":true,"view_counts_everywhere_api_enabled":true,"longform_notetweets_consumption_enabled":true,"responsive_web_twitter_article_tweet_consumption_enabled":true,"tweet_awards_web_tipping_enabled":false,"creator_subscriptions_quote_tweet_preview_enabled":false,"freedom_of_speech_not_reach_fetch_enabled":true,"standardized_nudges_misinfo":true,"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled":true,"tweet_with_visibility_results_prefer_gql_media_interstitial_enabled":true,"rweb_video_timestamps_enabled":true,"longform_notetweets_rich_text_read_enabled":true,"longform_notetweets_inline_media_enabled":true,"rweb_tipjar_consumption_enabled":true,"responsive_web_graphql_exclude_directive_enabled":true,"verified_phone_label_enabled":false,"responsive_web_graphql_skip_user_profile_image_extensions_enabled":false,"responsive_web_graphql_timeline_navigation_enabled":true,"responsive_web_enhance_cards_enabled":false}' -v2AnonGraphql_api="7xflPyRiUxGVbJd4uWmbfg" +v2AnonFeatures='{"creator_subscriptions_tweet_preview_api_enabled":true,"premium_content_api_read_enabled":false,"communities_web_enable_tweet_community_results_fetch":true,"c9s_tweet_anatomy_moderator_badge_enabled":true,"responsive_web_grok_analyze_button_fetch_trends_enabled":false,"responsive_web_grok_analyze_post_followups_enabled":false,"responsive_web_jetfuel_frame":true,"responsive_web_grok_share_attachment_enabled":true,"articles_preview_enabled":true,"responsive_web_edit_tweet_api_enabled":true,"graphql_is_translatable_rweb_tweet_is_translatable_enabled":true,"view_counts_everywhere_api_enabled":true,"longform_notetweets_consumption_enabled":true,"responsive_web_twitter_article_tweet_consumption_enabled":true,"tweet_awards_web_tipping_enabled":false,"responsive_web_grok_show_grok_translated_post":false,"responsive_web_grok_analysis_button_from_backend":true,"creator_subscriptions_quote_tweet_preview_enabled":false,"freedom_of_speech_not_reach_fetch_enabled":true,"standardized_nudges_misinfo":true,"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled":true,"longform_notetweets_rich_text_read_enabled":true,"longform_notetweets_inline_media_enabled":true,"payments_enabled":false,"profile_label_improvements_pcf_label_in_post_enabled":true,"rweb_tipjar_consumption_enabled":true,"verified_phone_label_enabled":false,"responsive_web_grok_image_annotation_enabled":true,"responsive_web_grok_imagine_annotation_enabled":true,"responsive_web_grok_community_note_auto_translation_is_enabled":false,"responsive_web_graphql_skip_user_profile_image_extensions_enabled":false,"responsive_web_graphql_timeline_navigation_enabled":true,"responsive_web_enhance_cards_enabled":false}' +v2AnonGraphql_api="wqi5M7wZ7tW-X9S2t-Mqcg" gt_pattern = r'document\.cookie="gt=([^;]+);' -androidGraphqlFeatures='{"longform_notetweets_inline_media_enabled":true,"super_follow_badge_privacy_enabled":true,"longform_notetweets_rich_text_read_enabled":true,"super_follow_user_api_enabled":true,"unified_cards_ad_metadata_container_dynamic_card_content_query_enabled":true,"super_follow_tweet_api_enabled":true,"articles_api_enabled":true,"android_graphql_skip_api_media_color_palette":true,"creator_subscriptions_tweet_preview_api_enabled":true,"freedom_of_speech_not_reach_fetch_enabled":true,"tweetypie_unmention_optimization_enabled":true,"longform_notetweets_consumption_enabled":true,"subscriptions_verification_info_enabled":true,"blue_business_profile_image_shape_enabled":true,"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled":true,"immersive_video_status_linkable_timestamps":true,"super_follow_exclusive_tweet_notifications_enabled":true}' -androidGraphql_api="llQH5PFIRlenVrlKJU8jNA" +androidGraphqlFeatures='{"grok_translations_community_note_translation_is_enabled":false,"super_follow_badge_privacy_enabled":true,"longform_notetweets_rich_text_read_enabled":true,"super_follow_user_api_enabled":true,"profile_label_improvements_pcf_label_in_profile_enabled":true,"premium_content_api_read_enabled":false,"grok_translations_community_note_auto_translation_is_enabled":false,"android_graphql_skip_api_media_color_palette":true,"tweetypie_unmention_optimization_enabled":true,"longform_notetweets_consumption_enabled":true,"subscriptions_verification_info_enabled":true,"blue_business_profile_image_shape_enabled":true,"super_follow_exclusive_tweet_notifications_enabled":true,"longform_notetweets_inline_media_enabled":true,"grok_android_analyze_trend_fetch_enabled":false,"unified_cards_ad_metadata_container_dynamic_card_content_query_enabled":true,"super_follow_tweet_api_enabled":true,"articles_api_enabled":true,"creator_subscriptions_tweet_preview_api_enabled":true,"freedom_of_speech_not_reach_fetch_enabled":true,"grok_translations_timeline_user_bio_auto_translation_is_enabled":false,"grok_translations_post_auto_translation_is_enabled":false,"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled":true,"immersive_video_status_linkable_timestamps":true,"profile_label_improvements_pcf_label_in_post_enabled":true}' +androidGraphql_api="k3rtLsS9kG5hI-Jr0dTMCg" tweetDetailGraphqlFeatures='{"rweb_tipjar_consumption_enabled":true,"responsive_web_graphql_exclude_directive_enabled":true,"verified_phone_label_enabled":false,"creator_subscriptions_tweet_preview_api_enabled":true,"responsive_web_graphql_timeline_navigation_enabled":true,"responsive_web_graphql_skip_user_profile_image_extensions_enabled":false,"communities_web_enable_tweet_community_results_fetch":true,"c9s_tweet_anatomy_moderator_badge_enabled":true,"articles_preview_enabled":true,"tweetypie_unmention_optimization_enabled":true,"responsive_web_edit_tweet_api_enabled":true,"graphql_is_translatable_rweb_tweet_is_translatable_enabled":true,"view_counts_everywhere_api_enabled":true,"longform_notetweets_consumption_enabled":true,"responsive_web_twitter_article_tweet_consumption_enabled":true,"tweet_awards_web_tipping_enabled":false,"creator_subscriptions_quote_tweet_preview_enabled":false,"freedom_of_speech_not_reach_fetch_enabled":true,"standardized_nudges_misinfo":true,"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled":true,"rweb_video_timestamps_enabled":true,"longform_notetweets_rich_text_read_enabled":true,"longform_notetweets_inline_media_enabled":true,"responsive_web_enhance_cards_enabled":false}' -tweetDetailGraphql_api="e7RKseIxLu7HgkWNKZ6qnw" +tweetDetailGraphql_api="YVyS4SfwYW7Uw5qwy0mQCA" # this is for UserTweets endpoint -tweetFeedGraphqlFeatures='{"rweb_video_screen_enabled":false,"profile_label_improvements_pcf_label_in_post_enabled":true,"rweb_tipjar_consumption_enabled":true,"verified_phone_label_enabled":false,"creator_subscriptions_tweet_preview_api_enabled":true,"responsive_web_graphql_timeline_navigation_enabled":true,"responsive_web_graphql_skip_user_profile_image_extensions_enabled":false,"premium_content_api_read_enabled":false,"communities_web_enable_tweet_community_results_fetch":true,"c9s_tweet_anatomy_moderator_badge_enabled":true,"responsive_web_grok_analyze_button_fetch_trends_enabled":false,"responsive_web_grok_analyze_post_followups_enabled":false,"responsive_web_jetfuel_frame":false,"responsive_web_grok_share_attachment_enabled":true,"articles_preview_enabled":true,"responsive_web_edit_tweet_api_enabled":true,"graphql_is_translatable_rweb_tweet_is_translatable_enabled":true,"view_counts_everywhere_api_enabled":true,"longform_notetweets_consumption_enabled":true,"responsive_web_twitter_article_tweet_consumption_enabled":true,"tweet_awards_web_tipping_enabled":false,"responsive_web_grok_show_grok_translated_post":false,"responsive_web_grok_analysis_button_from_backend":true,"creator_subscriptions_quote_tweet_preview_enabled":false,"freedom_of_speech_not_reach_fetch_enabled":true,"standardized_nudges_misinfo":true,"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled":true,"longform_notetweets_rich_text_read_enabled":true,"longform_notetweets_inline_media_enabled":true,"responsive_web_grok_image_annotation_enabled":true,"responsive_web_enhance_cards_enabled":false}' -tweetFeedGraphql_api="Li2XXGESVev94TzFtntrgA" +tweetFeedGraphqlFeatures='{"rweb_video_screen_enabled":false,"profile_label_improvements_pcf_label_in_post_enabled":true,"rweb_tipjar_consumption_enabled":true,"verified_phone_label_enabled":false,"creator_subscriptions_tweet_preview_api_enabled":true,"responsive_web_graphql_timeline_navigation_enabled":true,"responsive_web_graphql_skip_user_profile_image_extensions_enabled":false,"premium_content_api_read_enabled":false,"communities_web_enable_tweet_community_results_fetch":true,"c9s_tweet_anatomy_moderator_badge_enabled":true,"responsive_web_grok_analyze_button_fetch_trends_enabled":false,"responsive_web_grok_analyze_post_followups_enabled":true,"responsive_web_jetfuel_frame":false,"responsive_web_grok_share_attachment_enabled":true,"articles_preview_enabled":true,"responsive_web_edit_tweet_api_enabled":true,"graphql_is_translatable_rweb_tweet_is_translatable_enabled":true,"view_counts_everywhere_api_enabled":true,"longform_notetweets_consumption_enabled":true,"responsive_web_twitter_article_tweet_consumption_enabled":true,"tweet_awards_web_tipping_enabled":false,"responsive_web_grok_show_grok_translated_post":false,"responsive_web_grok_analysis_button_from_backend":true,"creator_subscriptions_quote_tweet_preview_enabled":false,"freedom_of_speech_not_reach_fetch_enabled":true,"standardized_nudges_misinfo":true,"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled":true,"longform_notetweets_rich_text_read_enabled":true,"longform_notetweets_inline_media_enabled":true,"responsive_web_grok_image_annotation_enabled":true,"responsive_web_enhance_cards_enabled":false}' +tweetFeedGraphql_api="OAx9yEcW3JA9bPo63pcYlA" + +userByScreenNameGraphqlFeatures='{"rweb_xchat_enabled":false,"hidden_profile_subscriptions_enabled":true,"payments_enabled":false,"profile_label_improvements_pcf_label_in_post_enabled":true,"rweb_tipjar_consumption_enabled":true,"verified_phone_label_enabled":false,"subscriptions_verification_info_is_identity_verified_enabled":true,"subscriptions_verification_info_verified_since_enabled":true,"highlights_tweets_tab_ui_enabled":true,"responsive_web_twitter_article_notes_tab_enabled":true,"subscriptions_feature_can_gift_premium":true,"creator_subscriptions_tweet_preview_api_enabled":true,"responsive_web_graphql_skip_user_profile_image_extensions_enabled":false,"responsive_web_graphql_timeline_navigation_enabled":true}' +userByScreenNameGraphql_api="96tVxbPqMZDoYB5pmzezKA" +userByRestIdGraphql_api="8r5oa_2vD0WkhIAOkY4TTA" twitterUrl = "x.com" # doubt this will change but just in case + +simultaneousRequests = int(os.getenv("VXTWITTER_SIMULTANEOUS_REQUESTS",1)) + class TwExtractError(Exception): def __init__(self, code, message): self.code = code @@ -50,6 +57,37 @@ class TwExtractError(Exception): def __str__(self): return self.msg +def parallel_token_request(twid, tokens, request_function): + results = [] + errors = [] + def try_token(token): + try: + result = request_function(twid, token) + return {'success': True, 'result': result} + except Exception as e: + return {'success': False, 'error': str(e)} + + with concurrent.futures.ThreadPoolExecutor(max_workers=min(simultaneousRequests, len(tokens))) as executor: + futures = {executor.submit(try_token, token): token for token in tokens} + for future in concurrent.futures.as_completed(futures): + result = future.result() + if result['success']: + results.append(result) + else: + errors.append(result) + + # Early return if success + if result['success']: + for f in futures: # Cancel remaining futures + if not f.done(): + f.cancel() + return result['result'] + + # all tokens failed + if errors: + raise TwExtractError(400, f"All tokens failed. Last error: {errors[-1]['error']}") + return None + def cycleBearerTokenGet(url,headers): global bearerTokens rateLimitRemaining = None @@ -77,7 +115,7 @@ def cycleBearerTokenGet(url,headers): def twitterApiGet(url,btoken=None,authToken=None,guestToken=None): - if authToken.startswith("oa|"): + if authToken != None and authToken.startswith("oa|"): url = url.replace("https://x.com/i/api/graphql/","https://api.twitter.com/graphql/") authToken = authToken[3:] key = authToken.split("|")[0] @@ -93,7 +131,8 @@ def twitterApiGet(url,btoken=None,authToken=None,guestToken=None): response = requests.get(url,headers=headers) else: if btoken is None: - return cycleBearerTokenGet(url,getAuthHeaders(bearer,authToken=authToken,guestToken=guestToken)) + btoken = v2bearer + #return cycleBearerTokenGet(url,getAuthHeaders(bearer,authToken=authToken,guestToken=guestToken)) headers = getAuthHeaders(btoken,authToken=authToken,guestToken=guestToken) response = requests.get(url, headers=headers) @@ -109,7 +148,6 @@ def getAuthHeaders(btoken,authToken=None,guestToken=None): headers["x-twitter-auth-type"] = "OAuth2Session" if guestToken is not None: headers["x-guest-token"] = guestToken - headers["Cookie"] = f"gt={guestToken}; ct0={csrfToken}; guest_id=v1:174804309415864668;" return headers @@ -238,51 +276,47 @@ def extractStatusV2(url,workaroundTokens): # get tweet tokens = workaroundTokens random.shuffle(tokens) - for authToken in tokens: + def request_with_token(twid, authToken): + vars = json.loads('{"includeTweetImpression":true,"includeHasBirdwatchNotes":false,"includeEditPerspective":false,"rest_ids":["x"],"includeEditControl":true,"includeCommunityTweetRelationship":true,"includeTweetVisibilityNudge":true}') + vars['rest_ids'][0] = str(twid) + tweet = twitterApiGet(f"https://x.com/i/api/graphql/{v2graphql_api}/TweetResultsByIdsQuery?variables={urllib.parse.quote(json.dumps(vars))}&features={urllib.parse.quote(v2Features)}",authToken=authToken) try: - vars = json.loads('{"includeTweetImpression":true,"includeHasBirdwatchNotes":false,"includeEditPerspective":false,"rest_ids":["x"],"includeEditControl":true,"includeCommunityTweetRelationship":true,"includeTweetVisibilityNudge":true}') - vars['rest_ids'][0] = str(twid) - tweet = twitterApiGet(f"https://x.com/i/api/graphql/{v2graphql_api}/TweetResultsByIdsQuery?variables={urllib.parse.quote(json.dumps(vars))}&features={urllib.parse.quote(v2Features)}",authToken=authToken) - try: - rateLimitRemaining = tweet.headers.get("x-rate-limit-remaining") - print(f"Twitter Token Rate limit remaining: {rateLimitRemaining}") - except: # for some reason the header is not always present - pass - if tweet.status_code == 429: - print("Rate limit reached for token (429)") - # try another token + rateLimitRemaining = tweet.headers.get("x-rate-limit-remaining") + print(f"Twitter Token Rate limit remaining: {rateLimitRemaining}") + except: # for some reason the header is not always present + pass + if tweet.status_code == 429: + print("Rate limit reached for token (429)") + # try another token + raise TwExtractError(400, "Extract error: rate limit reached") + output = tweet.json() + + if "errors" in output: + print(f"Error in output: {json.dumps(output['errors'])}") + # try another token + raise TwExtractError(400, "Extract error: errors in output - "+json.dumps(output['errors'])) + entries=output['data']['tweet_results'] + tweetEntry=None + for entry in entries: + if 'result' not in entry: + print("Tweet result not found in entry") continue - output = tweet.json() - - if "errors" in output: - print(f"Error in output: {json.dumps(output['errors'])}") - # try another token - continue - entries=output['data']['tweet_results'] - tweetEntry=None - for entry in entries: - if 'result' not in entry: - print("Tweet result not found in entry") - continue - result = entry['result'] - if '__typename' in result and result['__typename'] == 'TweetWithVisibilityResults': - result=result['tweet'] - elif '__typename' in result and result['__typename'] == 'TweetUnavailable': - if 'reason' in result: - return {'error':'Tweet unavailable: '+result['reason']} - return {'error':'Tweet unavailable'} - if 'rest_id' in result and result['rest_id'] == twid: - tweetEntry=result - break - tweet=tweetEntry - if tweet is None: - print("Tweet 404") - return {'error':'Tweet not found (404); May be due to invalid tweet, changes in Twitter\'s API, or a protected account.'} - except Exception as e: - print(f"Exception in extractStatusV2: {str(e)}") - continue + result = entry['result'] + if '__typename' in result and result['__typename'] == 'TweetWithVisibilityResults': + result=result['tweet'] + elif '__typename' in result and result['__typename'] == 'TweetUnavailable': + if 'reason' in result: + return {'error':'Tweet unavailable: '+result['reason']} + return {'error':'Tweet unavailable'} + if 'rest_id' in result and result['rest_id'] == twid: + tweetEntry=result + break + tweet=tweetEntry + if tweet is None: + print("Tweet 404") + return {'error':'Tweet not found (404); May be due to invalid tweet, changes in Twitter\'s API, or a protected account.'} return tweet - raise TwExtractError(400, "Extract error") + return parallel_token_request(twid, tokens, request_with_token) def extractStatusV2Android(url,workaroundTokens): # get tweet ID @@ -292,15 +326,13 @@ def extractStatusV2Android(url,workaroundTokens): twid = m.group(2) if workaroundTokens == None: raise TwExtractError(400, "Extract error (no tokens defined)") - # get tweet tokens = workaroundTokens random.shuffle(tokens) - for authToken in tokens: + def request_with_token(twid, authToken): try: - vars = json.loads('{"referrer":"home","includeTweetImpression":true,"includeHasBirdwatchNotes":false,"isReaderMode":false,"includeEditPerspective":false,"includeEditControl":true,"focalTweetId":0,"includeCommunityTweetRelationship":true,"includeTweetVisibilityNudge":true}') vars['focalTweetId'] = int(twid) - tweet = twitterApiGet(f"https://x.com/i/api/graphql/{androidGraphql_api}/ConversationTimelineV2?variables={urllib.parse.quote(json.dumps(vars))}&features={urllib.parse.quote(androidGraphqlFeatures)}", authToken=authToken) + tweet = twitterApiGet(f"https://x.com/i/api/graphql/{androidGraphql_api}/ConversationTimelineV2?variables={urllib.parse.quote(json.dumps(vars))}&features={urllib.parse.quote(androidGraphqlFeatures)}", authToken=authToken,btoken=androidBearer) try: rateLimitRemaining = tweet.headers.get("x-rate-limit-remaining") print(f"Twitter Android Token Rate limit remaining: {rateLimitRemaining}") @@ -309,13 +341,13 @@ def extractStatusV2Android(url,workaroundTokens): if tweet.status_code == 429: print("Rate limit reached for android token") # try another token - continue + raise TwExtractError(400, "Extract error: rate limit reached") output = tweet.json() if "errors" in output: print(f"Error in output: {json.dumps(output['errors'])}") # try another token - continue + raise TwExtractError(400, "Extract error: errors in output - "+json.dumps(output['errors'])) entries = None for instruction in output['data']['timeline_response']['instructions']: if instruction["__typename"] == "TimelineAddEntries": @@ -339,11 +371,11 @@ def extractStatusV2Android(url,workaroundTokens): print("Tweet 404") return {'error':'Tweet not found (404); May be due to invalid tweet, changes in Twitter\'s API, or a protected account.'} except Exception as e: - print(f"Exception in extractStatusV2: {str(e)}") - continue + print(f"Exception in extractStatusV2Android: {str(e)}") + raise TwExtractError(400, "Extract error") return tweet - raise TwExtractError(400, "Extract error") + return parallel_token_request(twid, tokens, request_with_token) def extractStatusV2TweetDetail(url,workaroundTokens): # get tweet ID @@ -356,12 +388,11 @@ def extractStatusV2TweetDetail(url,workaroundTokens): # get tweet tokens = workaroundTokens random.shuffle(tokens) - for authToken in tokens: + def request_with_token(twid, authToken): try: - vars = json.loads('{"focalTweetId":"0","with_rux_injections":false,"includePromotedContent":true,"withCommunity":true,"withQuickPromoteEligibilityTweetFields":true,"withBirdwatchNotes":true,"withVoice":true,"withV2Timeline":true}') vars['focalTweetId'] = str(twid) - tweet = twitterApiGet(f"https://x.com/i/api/graphql/{tweetDetailGraphql_api}/TweetDetail?variables={urllib.parse.quote(json.dumps(vars))}&features={urllib.parse.quote(tweetDetailGraphqlFeatures)}", authToken=authToken) + tweet = twitterApiGet(f"https://x.com/i/api/graphql/{tweetDetailGraphql_api}/TweetDetail?variables={urllib.parse.quote(json.dumps(vars))}&features={urllib.parse.quote(tweetDetailGraphqlFeatures)}", authToken=authToken,btoken=v2bearer) try: rateLimitRemaining = tweet.headers.get("x-rate-limit-remaining") print(f"Twitter Token Rate limit remaining: {rateLimitRemaining}") @@ -370,13 +401,13 @@ def extractStatusV2TweetDetail(url,workaroundTokens): if tweet.status_code == 429: print("Rate limit reached for token") # try another token - continue + raise TwExtractError(400, "Extract error: rate limit reached") output = tweet.json() if "errors" in output: print(f"Error in output: {json.dumps(output['errors'])}") # try another token - continue + raise TwExtractError(400, "Extract error: errors in output - "+json.dumps(output['errors'])) entries = None for instruction in output['data']['threaded_conversation_with_injections_v2']['instructions']: if instruction["type"] == "TimelineAddEntries": @@ -401,12 +432,15 @@ def extractStatusV2TweetDetail(url,workaroundTokens): return {'error':'Tweet not found (404); May be due to invalid tweet, changes in Twitter\'s API, or a protected account.'} except Exception as e: print(f"Exception in extractStatusV2: {str(e)}") - continue + raise TwExtractError(400, "Extract error") return tweet - raise TwExtractError(400, "Extract error") + return parallel_token_request(twid, tokens, request_with_token) -def extractStatusV2Anon(url,x): +def extractStatusV2Rest_Anon(url,workaroundTokens): + return extractStatusV2Rest(url,None) + +def extractStatusV2Rest(url,workaroundTokens): # get tweet ID m = re.search(pathregex, url) if m is None: @@ -419,7 +453,17 @@ def extractStatusV2Anon(url,x): try: vars = json.loads('{"tweetId":"0","withCommunity":false,"includePromotedContent":false,"withVoice":false}') vars['tweetId'] = str(twid) - tweet = requests.get(f"https://x.com/i/api/graphql/{v2AnonGraphql_api}/TweetResultByRestId?variables={urllib.parse.quote(json.dumps(vars))}&features={urllib.parse.quote(v2AnonFeatures)}", headers=getAuthHeaders(v2bearer,guestToken=guestToken)) + if workaroundTokens is not None and len(workaroundTokens) > 0: + tokens = workaroundTokens + random.shuffle(tokens) + for authToken in tokens: + try: + tweet = twitterApiGet(f"https://x.com/i/api/graphql/{v2AnonGraphql_api}/TweetResultByRestId?variables={urllib.parse.quote(json.dumps(vars))}&features={urllib.parse.quote(v2AnonFeatures)}", btoken=v2bearer,authToken=authToken,guestToken=guestToken) + except Exception as e: + continue + else: + tweet = twitterApiGet(f"https://x.com/i/api/graphql/{v2AnonGraphql_api}/TweetResultByRestId?variables={urllib.parse.quote(json.dumps(vars))}&features={urllib.parse.quote(v2AnonFeatures)}", btoken=v2bearer,guestToken=guestToken) + try: rateLimitRemaining = tweet.headers.get("x-rate-limit-remaining") print(f"Twitter Anon Token Rate limit remaining: {rateLimitRemaining}") @@ -466,7 +510,8 @@ def fixTweetData(tweet): return tweet def extractStatus(url,workaroundTokens=None): - methods=[extractStatusV2Anon,extractStatusV2TweetDetail,extractStatusV2Android,extractStatusV2] + # TODO: commented out methods are too slow/unreliable at the moment + methods=[extractStatusV2Rest_Anon,extractStatusV2,extractStatusV2Rest,extractStatusV2Android]#,extractStatusV2TweetDetail] for method in methods: try: result = method(url,workaroundTokens) @@ -497,20 +542,27 @@ def extractUser(url,workaroundTokens): if authToken.startswith("oa|"): # oauth token not supported atm continue try: - - reqHeaders = getAuthHeaders(bearer,authToken=authToken) + reqHeaders = getAuthHeaders(v2bearer,authToken=authToken) if not useId: - user = requests.get(f"https://api.{twitterUrl}/1.1/users/show.json?screen_name={screen_name}",headers=reqHeaders) + vars=json.loads('{"screen_name":"","withGrokTranslatedBio":false}') + vars['screen_name'] = screen_name + user = requests.get(f"https://x.com/i/api/graphql/{userByScreenNameGraphql_api}/UserByScreenName",{'variables':json.dumps(vars),'features':userByScreenNameGraphqlFeatures,'fieldToggles':'{"withAuxiliaryUserLabels":true}'},headers=reqHeaders) else: - user = requests.get(f"https://api.{twitterUrl}/1.1/users/show.json?user_id={screen_name}",headers=reqHeaders) + vars=json.loads('{"userId":"","withGrokTranslatedBio":false}') + vars['userId'] = screen_name + user = requests.get(f"https://x.com/i/api/graphql/{userByRestIdGraphql_api}/UserByRestId",{'variables':json.dumps(vars),'features':userByScreenNameGraphqlFeatures,'fieldToggles':'{"withAuxiliaryUserLabels":true}'},headers=reqHeaders) output = user.json() if "errors" in output: # pick the first error and create a twExtractError error = output["errors"][0] raise TwExtractError(error["code"], error["message"]) + elif 'user' not in output['data']: + raise TwExtractError(404, "User not found.") + elif output['data']['user']['result']['__typename'] == 'UserUnavailable': + raise TwExtractError(404, output['data']['user']['result']['message']) return output except Exception as e: - if hasattr(e,"msg") and (e.msg == 'User has been suspended.' or e.msg == 'User not found.'): + if hasattr(e,"msg") and ('suspended' in e.msg or e.msg == 'User not found.'): raise e continue raise TwExtractError(400, "Extract error") @@ -523,14 +575,14 @@ def extractUserFeedFromId(userId,workaroundTokens): # TODO: https://api.twitter.com/graphql/x31u1gdnjcqtiVZFc1zWnQ/UserWithProfileTweetsQueryV2?variables={"cursor":"?","includeTweetImpression":true,"includeHasBirdwatchNotes":false,"includeEditPerspective":false,"includeEditControl":true,"count":40,"rest_id":"12","includeTweetVisibilityNudge":true,"autoplay_enabled":true}&features={"longform_notetweets_inline_media_enabled":true,"super_follow_badge_privacy_enabled":true,"longform_notetweets_rich_text_read_enabled":true,"super_follow_user_api_enabled":true,"unified_cards_ad_metadata_container_dynamic_card_content_query_enabled":true,"super_follow_tweet_api_enabled":true,"articles_api_enabled":true,"android_graphql_skip_api_media_color_palette":true,"creator_subscriptions_tweet_preview_api_enabled":true,"freedom_of_speech_not_reach_fetch_enabled":true,"tweetypie_unmention_optimization_enabled":true,"longform_notetweets_consumption_enabled":true,"subscriptions_verification_info_enabled":true,"blue_business_profile_image_shape_enabled":true,"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled":true,"immersive_video_status_linkable_timestamps":false,"super_follow_exclusive_tweet_notifications_enabled":true} continue try: - vars = json.loads('{"userId":"0","count":20,"includePromotedContent":true,"withQuickPromoteEligibilityTweetFields":true,"withVoice":true}') + vars = json.loads('{"userId":"0","count":20,"includePromotedContent":true,"withCommunity":true,"withVoice":true}') vars['userId'] = str(userId) vars['includePromotedContent'] = False # idk if this works - reqHeaders = getAuthHeaders(v2bearer,guestToken=getGuestToken()) - endpoint=f"/i/api/graphql/{tweetFeedGraphql_api}/UserTweets" + reqHeaders = getAuthHeaders(v2bearer,authToken=authToken) + endpoint=f"/i/api/graphql/{tweetFeedGraphql_api}/UserTweetsAndReplies" reqHeaders["x-client-transaction-id"] = twUtils.generate_transaction_id("GET",endpoint) feed = requests.get(f"https://{twitterUrl}{endpoint}", {'variables':json.dumps(vars),'features':tweetFeedGraphqlFeatures,'fieldToggles':'{"withArticlePlainText":false}'},headers=reqHeaders) - if feed.status_code == 403: + if feed.status_code == 403 or feed.status_code == 404: raise TwExtractError(403, "Extract error") output = feed.json() if "errors" in output: diff --git a/twitfix.py b/twitfix.py index b9c9e53..1bf8204 100644 --- a/twitfix.py +++ b/twitfix.py @@ -67,13 +67,14 @@ def isValidUserAgent(user_agent): return False def message(text): - return render_template( + rendered = render_template( 'default.html', message = text, color = config['config']['color'], appname = config['config']['appname'], repo = config['config']['repo'], url = config['config']['url'] ) + return Response(rendered, mimetype='text/html',headers={"Cache-Tag": "message", "Cache-Control": "max-age=1760, public"}) def generateActivityLink(tweetData,media=None,mediatype=None,embedIndex=-1): global user_agent @@ -265,19 +266,13 @@ def getTweetData(twitter_url,include_txt="false",include_rtf="false"): return cachedVNF try: - rawTweetData = twExtract.extractStatusV2Anon(twitter_url, None) + if config['config']['workaroundTokens'] is not None: + workaroundTokens = config['config']['workaroundTokens'].split(",") + else: + workaroundTokens = None + rawTweetData = twExtract.extractStatus(twitter_url,workaroundTokens=workaroundTokens) except: rawTweetData = None - if rawTweetData is None: - try: - if config['config']['workaroundTokens'] is not None: - workaroundTokens = config['config']['workaroundTokens'].split(",") - else: - workaroundTokens = None - - rawTweetData = twExtract.extractStatus(twitter_url,workaroundTokens=workaroundTokens) - except: - rawTweetData = None if rawTweetData == None or 'error' in rawTweetData: return None @@ -291,14 +286,18 @@ def getTweetData(twitter_url,include_txt="false",include_rtf="false"): return tweetData def getUserData(twitter_url,includeFeed=False): - rawUserData = twExtract.extractUser(twitter_url,workaroundTokens=config['config']['workaroundTokens'].split(',')) + if config['config']['workaroundTokens'] is not None: + workaroundTokens = config['config']['workaroundTokens'].split(",") + else: + workaroundTokens = None + rawUserData = twExtract.extractUser(twitter_url,workaroundTokens=workaroundTokens) userData = getApiUserResponse(rawUserData) if includeFeed: if userData['protected']: userData['latest_tweets']=[] else: - feed = twExtract.extractUserFeedFromId(userData['id'],workaroundTokens=config['config']['workaroundTokens'].split(',')) + feed = twExtract.extractUserFeedFromId(userData['id'],workaroundTokens=workaroundTokens) apiFeed = [] for tweet in feed: apiFeed.append(getApiResponse(tweet)) @@ -415,33 +414,33 @@ def twitfix(sub_path): embeddingMedia = tweetData['hasMedia'] renderMedia = None if embeddingMedia: - renderMedia = determineMediaToEmbed(tweetData,embedIndex) + renderMedia = determineMediaToEmbed(tweetData,embedIndex,convertGif=False) # direct embeds should always prioritize the main tweet, so don't check for qrt # determine what type of media we're dealing with if not embeddingMedia and qrt is None: return renderTextTweetEmbed(tweetData) else: if renderMedia['type'] == "image": - return render_template("rawimage.html",media=renderMedia) + return Response(render_template("rawimage.html",media=renderMedia),headers={"Cache-Tag": "embed"}) elif renderMedia['type'] == "video" or renderMedia['type'] == "gif": - return render_template("rawvideo.html",media=renderMedia) + return Response(render_template("rawvideo.html",media=renderMedia),headers={"Cache-Tag": "embed"}) else: # full embed embedTweetData = determineEmbedTweet(tweetData) embeddingMedia = embedTweetData['hasMedia'] if "article" in embedTweetData and embedTweetData["article"] is not None: - return renderArticleTweetEmbed(tweetData," • See original tweet for full article") + return Response(renderArticleTweetEmbed(tweetData," • See original tweet for full article"),headers={"Cache-Tag": "embed"}) elif not embeddingMedia: - return renderTextTweetEmbed(tweetData) + return Response(renderTextTweetEmbed(tweetData),headers={"Cache-Tag": "embed"}) else: media = determineMediaToEmbed(embedTweetData,embedIndex) suffix="" if "suffix" in media: suffix = media["suffix"] if media['type'] == "image": - return renderImageTweetEmbed(tweetData,media['url'] , appnameSuffix=suffix,embedIndex=embedIndex) + return Response(renderImageTweetEmbed(tweetData,media['url'] , appnameSuffix=suffix,embedIndex=embedIndex),headers={"Cache-Tag": "embed"}) elif media['type'] == "video" or media['type'] == "gif": - return renderVideoTweetEmbed(tweetData,media,appnameSuffix=suffix,embedIndex=embedIndex) + return Response(renderVideoTweetEmbed(tweetData,media,appnameSuffix=suffix,embedIndex=embedIndex),headers={"Cache-Tag": "embed"}) return message(msgs.failedToScan) diff --git a/utils.py b/utils.py index 483c63d..3a305d7 100644 --- a/utils.py +++ b/utils.py @@ -3,7 +3,7 @@ import io from configHandler import config pathregex = re.compile("\\w{1,15}\\/(status|statuses)\\/(\\d{2,20})") -endTCOregex = re.compile("(^.*?) +https:\/\/t.co\/.*?$") +endTCOregex = re.compile("(^.*?)[ \n]+https:\/\/t.co\/.*?$",flags=re.DOTALL) def getTweetIdFromUrl(url): match = pathregex.search(url) @@ -64,7 +64,7 @@ def determineEmbedTweet(tweetData): return tweetData['qrt'] return tweetData -def determineMediaToEmbed(tweetData,embedIndex = -1): +def determineMediaToEmbed(tweetData,embedIndex = -1,convertGif = True): if tweetData['allSameType'] and tweetData['media_extended'][0]['type'] == "image" and embedIndex == -1 and tweetData['combinedMediaUrl'] != None: return {"url":tweetData['combinedMediaUrl'],"type":"image"} else: @@ -82,7 +82,7 @@ def determineMediaToEmbed(tweetData,embedIndex = -1): if media['type'] == "image": return media elif media['type'] == "video" or media['type'] == "gif": - if media['type'] == "gif": + if media['type'] == "gif" and convertGif: if config['config']['gifConvertAPI'] != "" and config['config']['gifConvertAPI'] != "none": vurl=media['originalUrl'] if 'originalUrl' in media else media['url'] media['url'] = config['config']['gifConvertAPI'] + "/convert?url=" + vurl diff --git a/vxApi.py b/vxApi.py index 83bdbfb..69d7ce3 100644 --- a/vxApi.py +++ b/vxApi.py @@ -1,30 +1,75 @@ import html from datetime import datetime +from flask import json from configHandler import config from utils import stripEndTCO def getApiUserResponse(user): + userResult = user["data"]["user"]["result"] return { - "id": user["id"], - "screen_name": user["screen_name"], - "name": user["name"], - "profile_image_url": user["profile_image_url_https"], - "description": user["description"], - "location": user["location"], - "followers_count": user["followers_count"], - "following_count": user["friends_count"], - "tweet_count": user["statuses_count"], - "created_at": user["created_at"], - "protected": user["protected"], + "id": int(userResult["rest_id"]), + "screen_name": userResult["core"]["screen_name"], + "name": userResult["core"]["name"], + "profile_image_url": userResult['avatar']["image_url"], + "description": userResult["legacy"]["description"], + "location": userResult["location"]["location"], + "followers_count": userResult["legacy"]["followers_count"], + "following_count": userResult["legacy"]["friends_count"], + "tweet_count": userResult["legacy"]["statuses_count"], + "created_at": userResult["core"]["created_at"], + "protected": userResult["privacy"]["protected"], "fetched_on": int(datetime.now().timestamp()), } +def getBestMediaUrl(mediaList): + # find the highest bitrate + best_bitrate = -1 + besturl="" + for j in mediaList: + if j['content_type'] == "video/mp4" and '/hevc/' not in j["url"] and j['bitrate'] > best_bitrate: + besturl = j["url"] + best_bitrate = j['bitrate'] + if "?tag=" in besturl: + besturl = besturl[:besturl.index("?tag=")] + return besturl + +def getExtendedVideoOrGifInfo(mediaEntry): + videoInfo = mediaEntry["video_info"] + info = { + "url": getBestMediaUrl(videoInfo["variants"]), + "type": "gif" if mediaEntry.get("type", "") == "animated_gif" else "video", + "size": { + "width": mediaEntry['original_info']["width"], + "height": mediaEntry['original_info']["height"] + }, + "duration_millis": videoInfo.get("duration_millis", 0), + "thumbnail_url": mediaEntry.get("media_url_https", None), + "altText": mediaEntry.get("ext_alt_text", None), + "id_str": mediaEntry.get("id_str", None) + } + return info + +def getExtendedImageInfo(mediaEntry): + info = { + "url": mediaEntry.get("media_url_https", None), + "type": "image", + "size": { + "width": mediaEntry["original_info"]["width"], + "height": mediaEntry["original_info"]["height"] + }, + "thumbnail_url": mediaEntry.get("media_url_https", None), + "altText": mediaEntry.get("ext_alt_text", None), + "id_str": mediaEntry.get("id_str", None) + } + return info + def getApiResponse(tweet,include_txt=False,include_rtf=False): tweetL = tweet["legacy"] if "user_result" in tweet["core"]: - userL = tweet["core"]["user_result"]["result"]["legacy"] + user = tweet["core"]["user_result"]["result"] elif "user_results" in tweet["core"]: - userL = tweet["core"]["user_results"]["result"]["legacy"] + user = tweet["core"]["user_results"]["result"] + userL = user["legacy"] media=[] media_extended=[] hashtags=[] @@ -32,6 +77,14 @@ def getApiResponse(tweet,include_txt=False,include_rtf=False): oldTweetVersion = False tweetArticle=None lang=None + + if "screen_name" not in userL: + userL["screen_name"] = user["core"]["screen_name"] + if "name" not in userL: + userL["name"] = user["core"]["name"] + if "profile_image_url_https" not in userL: + userL["profile_image_url_https"] = user["avatar"]["image_url"] + #editedTweet=False try: if "birdwatch_pivot" in tweet: @@ -59,61 +112,55 @@ def getApiResponse(tweet,include_txt=False,include_rtf=False): for i in tmedia: extendedInfo={} if "video_info" in i: - # find the highest bitrate - best_bitrate = -1 - besturl="" - for j in i["video_info"]["variants"]: - if j['content_type'] == "video/mp4" and '/hevc/' not in j["url"] and j['bitrate'] > best_bitrate: - besturl = j['url'] - best_bitrate = j['bitrate'] - if "?tag=" in besturl: - besturl = besturl[:besturl.index("?tag=")] - media.append(besturl) - extendedInfo["url"] = besturl - extendedInfo["type"] = "video" - if (i["type"] == "animated_gif"): - extendedInfo["type"] = "gif" - altText = None - extendedInfo["size"] = {"width":i["original_info"]["width"],"height":i["original_info"]["height"]} - if "ext_alt_text" in i: - altText=i["ext_alt_text"] - if "duration_millis" in i["video_info"]: - extendedInfo["duration_millis"] = i["video_info"]["duration_millis"] - else: - extendedInfo["duration_millis"] = 0 - extendedInfo["thumbnail_url"] = i["media_url_https"] - extendedInfo["altText"] = altText + extendedInfo = getExtendedVideoOrGifInfo(i) + media.append(extendedInfo["url"]) media_extended.append(extendedInfo) else: - media.append(i["media_url_https"]) - extendedInfo["url"] = i["media_url_https"] - altText=None - if "ext_alt_text" in i: - altText=i["ext_alt_text"] - extendedInfo["altText"] = altText - extendedInfo["type"] = "image" - extendedInfo["size"] = {"width":i["original_info"]["width"],"height":i["original_info"]["height"]} - extendedInfo["thumbnail_url"] = i["media_url_https"] + extendedInfo = getExtendedImageInfo(i) media_extended.append(extendedInfo) + media.append(extendedInfo["url"]) if "hashtags" in tweetL["entities"]: for i in tweetL["entities"]["hashtags"]: hashtags.append(i["text"]) - elif "card" in tweet and 'name' in tweet['card'] and tweet['card']['name'] == "player": - width = None - height = None - vidUrl = None - for i in tweet['card']['binding_values']: - if i['key'] == 'player_stream_url': - vidUrl = i['value']['string_value'] - elif i['key'] == 'player_width': - width = int(i['value']['string_value']) - elif i['key'] == 'player_height': - height = int(i['value']['string_value']) - if vidUrl != None and width != None and height != None: - media.append(vidUrl) - media_extended.append({"url":vidUrl,"type":"video","size":{"width":width,"height":height}}) - + elif "card" in tweet or "tweet_card" in tweet: + cardData = tweet["card" if "card" in tweet else "tweet_card"] + bindingValues = None + if 'binding_values' in cardData: + bindingValues = cardData['binding_values'] + elif 'legacy' in cardData and 'binding_values' in cardData['legacy']: + bindingValues = cardData['legacy']['binding_values'] + if bindingValues != None: + if 'name' in cardData and cardData['name'] == "player": + width = None + height = None + vidUrl = None + for i in bindingValues: + if i['key'] == 'player_stream_url': + vidUrl = i['value']['string_value'] + elif i['key'] == 'player_width': + width = int(i['value']['string_value']) + elif i['key'] == 'player_height': + height = int(i['value']['string_value']) + if vidUrl != None and width != None and height != None: + media.append(vidUrl) + media_extended.append({"url":vidUrl,"type":"video","size":{"width":width,"height":height}}) + else: + for i in bindingValues: + if i['key'] == 'unified_card' and 'value' in i and 'string_value' in i['value']: + cardData = json.loads(i['value']['string_value']) + media_key = cardData['component_objects']['media_1']['data']['id'] + media_entry = cardData['media_entities'][media_key] + extendedInfo = getExtendedVideoOrGifInfo(media_entry) + media.append(extendedInfo['url']) + media_extended.append(extendedInfo) + break + elif i['key'] == 'photo_image_full_size_large' and 'value' in i and 'image_value' in i['value']: + imgData = i['value']['image_value'] + imgurl = imgData['url'] + media.append(imgurl) + media_extended.append({"url":imgurl,"type":"image","size":{"width":imgData['width'],"height":imgData['height']}}) + break if "article" in tweet: try: result = tweet["article"]["article_results"]["result"] @@ -230,14 +277,6 @@ def getApiResponse(tweet,include_txt=False,include_rtf=False): if 'in_reply_to_status_id_str' in tweetL and tweetL['in_reply_to_status_id_str'] != None: replyingToID = tweetL['in_reply_to_status_id_str'] - if 'screen_name' not in userL and 'core' in tweet["core"]["user_results"]["result"]: - userL['screen_name'] = tweet["core"]["user_results"]["result"]["core"]["screen_name"] - if 'name' not in userL: - userL['name'] = tweet["core"]["user_results"]["result"]["core"]["name"] - - if 'profile_image_url_https' not in userL and 'avatar' in tweet["core"]["user_results"]["result"]: - userL['profile_image_url_https'] = tweet["core"]["user_results"]["result"]["avatar"]["image_url"] - apiObject = { "text": twText, "likes": tweetL["favorite_count"], diff --git a/vx_testdata.py b/vx_testdata.py index 3644aee..3ec28ad 100644 --- a/vx_testdata.py +++ b/vx_testdata.py @@ -6,7 +6,7 @@ testVideoTweet="https://twitter.com/pdxdylan/status/1540398733669666818" testMediaTweet="https://twitter.com/pdxdylan/status/1534672932106035200" testMultiMediaTweet="https://twitter.com/pdxdylan/status/1532006436703715331" testQRTTweet="https://twitter.com/pdxdylan/status/1611477137319514129" -testQrtCeptionTweet="https://twitter.com/CatherineShu/status/585253766271672320" # TODO: tweet is deleted +testQrtCeptionTweet="https://twitter.com/CatherineShu/status/585253766271672320" testQrtVideoTweet="https://twitter.com/pdxdylan/status/1674561759422578690" testNSFWTweet="https://twitter.com/kuyacoy/status/1581185279376838657" testPollTweet="https://twitter.com/norm/status/651169346518056960" @@ -14,17 +14,17 @@ testMixedMediaTweet="https://twitter.com/bigbeerfest/status/1760638922084741177" testVinePlayerTweet="https://twitter.com/Roblox/status/583302104342638592" testRetweetTweet="https://twitter.com/pdxdylan/status/1828570470222045294" -testTextTweet_compare={'text': 'just setting up my twttr', 'date': 'Tue Mar 21 20:50:14 +0000 2006', 'tweetURL': 'https://twitter.com/jack/status/20', 'tweetID': '20', 'conversationID': '20', 'mediaURLs': [], 'media_extended': [], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': True, 'hasMedia': False, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'date_epoch': 1142974214} -testVideoTweet_compare={'text': 'TikTok embeds on Discord/Telegram bait you with a fake play button, but to see the actual video you have to go to their website.\nAs a request from a friend, I made it so that if you add "vx" before "tiktok" on any link, it fixes that. https://t.co/QYpiVXUIrW', 'date': 'Fri Jun 24 18:17:31 +0000 2022', 'tweetURL': 'https://twitter.com/pdxdylan/status/1540398733669666818', 'tweetID': '1540398733669666818', 'conversationID': '1540398733669666818', 'mediaURLs': ['https://video.twimg.com/ext_tw_video/1540396699037929472/pu/vid/762x528/YxbXbT3X7vq4LWfC.mp4'], 'media_extended': [{'url': 'https://video.twimg.com/ext_tw_video/1540396699037929472/pu/vid/762x528/YxbXbT3X7vq4LWfC.mp4', 'type': 'video', 'size': {'width': 762, 'height': 528}, 'duration_millis': 13650, 'thumbnail_url': 'https://pbs.twimg.com/ext_tw_video_thumb/1540396699037929472/pu/img/l187Z6B9AHHxUKPV.jpg', 'altText': None}], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': True, 'hasMedia': True, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'date_epoch': 1656094651} -testMediaTweet_compare={'text': 'oh.', 'date': 'Wed Jun 08 23:05:14 +0000 2022', 'tweetURL': 'https://twitter.com/pdxdylan/status/1534672932106035200', 'tweetID': '1534672932106035200', 'conversationID': '1534672673422381057', 'mediaURLs': ['https://pbs.twimg.com/media/FUxAt5LWUAMol0N.png'], 'media_extended': [{'url': 'https://pbs.twimg.com/media/FUxAt5LWUAMol0N.png', 'altText': None, 'type': 'image', 'size': {'width': 927, 'height': 534}, 'thumbnail_url': 'https://pbs.twimg.com/media/FUxAt5LWUAMol0N.png'}], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': True, 'hasMedia': True, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'date_epoch': 1654729514} -testMultiMediaTweet_compare={'text': 'Released #Retro64 1.0.9. Besides a lot of internal bug-fixes, this adds quicksand blocks, fixes the rendering for the castle stairs block, and adds a new model, Sonic! \nhttps://github.com/Retro64Mod/Retro64Mod/releases/tag/1.18.2-1.0.9 https://t.co/CWZaw4hzyg', 'date': 'Wed Jun 01 14:29:32 +0000 2022', 'tweetURL': 'https://twitter.com/pdxdylan/status/1532006436703715331', 'tweetID': '1532006436703715331', 'conversationID': '1532006436703715331', 'mediaURLs': ['https://pbs.twimg.com/media/FULF9oxXwAMDI-C.png', 'https://pbs.twimg.com/media/FULGaHkWYAIBV5U.png', 'https://pbs.twimg.com/media/FULGiZnWQAMBRWl.png'], 'media_extended': [{'url': 'https://pbs.twimg.com/media/FULF9oxXwAMDI-C.png', 'altText': None, 'type': 'image', 'size': {'width': 507, 'height': 507}, 'thumbnail_url': 'https://pbs.twimg.com/media/FULF9oxXwAMDI-C.png'}, {'url': 'https://pbs.twimg.com/media/FULGaHkWYAIBV5U.png', 'altText': None, 'type': 'image', 'size': {'width': 396, 'height': 431}, 'thumbnail_url': 'https://pbs.twimg.com/media/FULGaHkWYAIBV5U.png'}, {'url': 'https://pbs.twimg.com/media/FULGiZnWQAMBRWl.png', 'altText': None, 'type': 'image', 'size': {'width': 399, 'height': 341}, 'thumbnail_url': 'https://pbs.twimg.com/media/FULGiZnWQAMBRWl.png'}], 'possibly_sensitive': False, 'hashtags': ['Retro64'], 'qrtURL': None, 'allSameType': True, 'hasMedia': True, 'combinedMediaUrl': 'https://vxtwitter.com/rendercombined.jpg?imgs=https://pbs.twimg.com/media/FULF9oxXwAMDI-C.png,https://pbs.twimg.com/media/FULGaHkWYAIBV5U.png,https://pbs.twimg.com/media/FULGiZnWQAMBRWl.png', 'pollData': None, 'article': None, 'date_epoch': 1654093772} -testQRTTweet_compare={'text': "vxTwitter has gotten a *ton* of usage recently, so I'd appreciate a donation to keep things running!\n", 'date': 'Fri Jan 06 21:37:43 +0000 2023', 'tweetURL': 'https://twitter.com/pdxdylan/status/1611477137319514129', 'tweetID': '1611477137319514129', 'conversationID': '1611476665821003776', 'mediaURLs': [], 'media_extended': [], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': 'https://twitter.com/i/status/1518309187515781125', 'allSameType': True, 'hasMedia': False, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'date_epoch': 1673041063} -testQrtCeptionTweet_compare={'text': 'Testing retweetception ', 'date': 'Tue Apr 07 01:32:26 +0000 2015', 'tweetURL': 'https://twitter.com/CatherineShu/status/585253766271672320', 'tweetID': '585253766271672320', 'conversationID': '585253766271672320', 'mediaURLs': [], 'media_extended': [], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': 'https://twitter.com/i/status/585253161260216320', 'allSameType': True, 'hasMedia': False, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'date_epoch': 1428370346} -testQrtVideoTweet_compare={'text': 'good', 'date': 'Thu Jun 29 23:33:29 +0000 2023', 'tweetURL': 'https://twitter.com/pdxdylan/status/1674561759422578690', 'tweetID': '1674561759422578690', 'conversationID': '1674561759422578690', 'mediaURLs': [], 'media_extended': [], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': 'https://twitter.com/i/status/1674197531301904388', 'allSameType': True, 'hasMedia': False, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'date_epoch': 1688081609} -testNSFWTweet_compare={'text': "ngl, I'm scared on finding out the cute Sprigatito's final evolution..\n\nso i had a bot generate it for me.... and I'm forever scarred https://t.co/itMay87vcS", 'date': 'Sat Oct 15 07:28:42 +0000 2022', 'tweetURL': 'https://twitter.com/kuyacoy/status/1581185279376838657', 'tweetID': '1581185279376838657', 'conversationID': '1581185279376838657', 'mediaURLs': ['https://pbs.twimg.com/media/FfF_gKwXgAIpnpD.jpg'], 'media_extended': [{'url': 'https://pbs.twimg.com/media/FfF_gKwXgAIpnpD.jpg', 'altText': None, 'type': 'image', 'size': {'width': 760, 'height': 926}, 'thumbnail_url': 'https://pbs.twimg.com/media/FfF_gKwXgAIpnpD.jpg'}], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': True, 'hasMedia': True, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'date_epoch': 1665818922} -testPollTweet_compare={'text': 'I know when that hotline bling, that can only:', 'date': 'Mon Oct 05 22:57:25 +0000 2015', 'tweetURL': 'https://twitter.com/norm/status/651169346518056960', 'tweetID': '651169346518056960', 'conversationID': '651169346518056960', 'mediaURLs': [], 'media_extended': [], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': True, 'hasMedia': False, 'combinedMediaUrl': None, 'pollData': {'options': [{'name': 'Mean one thing', 'votes': 124875, 'percent': 78.82}, {'name': 'Mean multiple things', 'votes': 33554, 'percent': 21.18}]}, 'article': None, 'date_epoch': 1444085845} -testMixedMediaTweet_compare={'text': 'Some of us here are definitely big nerds about beer, and could talk your ear off about it for days on end, but some of us are just "beer is nice"', 'date': 'Thu Feb 22 12:13:24 +0000 2024', 'tweetURL': 'https://twitter.com/salebeerfest/status/1760638922084741177', 'tweetID': '1760638922084741177', 'conversationID': '1760638922084741177', 'mediaURLs': ['https://pbs.twimg.com/media/GG8LwfuWoAANKhs.jpg', 'https://video.twimg.com/tweet_video/GG8LwqWX0AAZch0.mp4'], 'media_extended': [{'url': 'https://pbs.twimg.com/media/GG8LwfuWoAANKhs.jpg', 'altText': None, 'type': 'image', 'size': {'width': 858, 'height': 960}, 'thumbnail_url': 'https://pbs.twimg.com/media/GG8LwfuWoAANKhs.jpg'}, {'url': 'https://video.twimg.com/tweet_video/GG8LwqWX0AAZch0.mp4', 'type': 'gif', 'size': {'width': 500, 'height': 500}, 'duration_millis': 0, 'thumbnail_url': 'https://pbs.twimg.com/tweet_video_thumb/GG8LwqWX0AAZch0.jpg', 'altText': None}], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': False, 'hasMedia': True, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'date_epoch': 1708604004} -testVinePlayerTweet_compare={'text': 'You wanted old ROBLOX back, you got it. Check out our sweet "new" look! #BringBackOldROBLOX https://vine.co/v/OL9VqvM6wJh', 'date': 'Wed Apr 01 16:17:13 +0000 2015', 'tweetURL': 'https://twitter.com/Roblox/status/583302104342638592', 'tweetID': '583302104342638592', 'conversationID': '583302104342638592', 'mediaURLs': ['https://v.cdn.vine.co/r/videos/20A1BE53011195086166081318912_3fe3b526b1a.1.5.3156516531034157495.mp4?versionId=DI1mMu7EI6zcLbvgucyp3GHebdz8.9cQ'], 'media_extended': [{'url': 'https://v.cdn.vine.co/r/videos/20A1BE53011195086166081318912_3fe3b526b1a.1.5.3156516531034157495.mp4?versionId=DI1mMu7EI6zcLbvgucyp3GHebdz8.9cQ', 'type': 'video', 'size': {'width': 435, 'height': 435}}], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': True, 'hasMedia': True, 'combinedMediaUrl': None, 'pollData': {'options': []}, 'article': None, 'date_epoch': 1427905033} +testTextTweet_compare={'text': 'just setting up my twttr', 'date': 'Tue Mar 21 20:50:14 +0000 2006', 'tweetURL': 'https://twitter.com/jack/status/20', 'tweetID': '20', 'conversationID': '20', 'mediaURLs': [], 'media_extended': [], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': True, 'hasMedia': False, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'lang': 'en', 'replyingTo': None, 'replyingToID': None, 'retweetURL': None, 'date_epoch': 1142974214} +testVideoTweet_compare={'text': 'TikTok embeds on Discord/Telegram bait you with a fake play button, but to see the actual video you have to go to their website.\nAs a request from a friend, I made it so that if you add "vx" before "tiktok" on any link, it fixes that.', 'date': 'Fri Jun 24 18:17:31 +0000 2022', 'tweetURL': 'https://twitter.com/pdxdylan/status/1540398733669666818', 'tweetID': '1540398733669666818', 'conversationID': '1540398733669666818', 'mediaURLs': ['https://video.twimg.com/ext_tw_video/1540396699037929472/pu/vid/762x528/YxbXbT3X7vq4LWfC.mp4'], 'media_extended': [{'url': 'https://video.twimg.com/ext_tw_video/1540396699037929472/pu/vid/762x528/YxbXbT3X7vq4LWfC.mp4', 'type': 'video', 'size': {'width': 762, 'height': 528}, 'duration_millis': 13650, 'thumbnail_url': 'https://pbs.twimg.com/ext_tw_video_thumb/1540396699037929472/pu/img/l187Z6B9AHHxUKPV.jpg', 'altText': None, 'id_str': '1540396699037929472'}], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': True, 'hasMedia': True, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'lang': 'en', 'replyingTo': None, 'replyingToID': None, 'retweetURL': None, 'date_epoch': 1656094651} +testMediaTweet_compare={'text': 'oh.', 'date': 'Wed Jun 08 23:05:14 +0000 2022', 'tweetURL': 'https://twitter.com/pdxdylan/status/1534672932106035200', 'tweetID': '1534672932106035200', 'conversationID': '1534672673422381057', 'mediaURLs': ['https://pbs.twimg.com/media/FUxAt5LWUAMol0N.png'], 'media_extended': [{'url': 'https://pbs.twimg.com/media/FUxAt5LWUAMol0N.png', 'altText': None, 'type': 'image', 'size': {'width': 927, 'height': 534}, 'thumbnail_url': 'https://pbs.twimg.com/media/FUxAt5LWUAMol0N.png', 'id_str': '1534672730213208067'}], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': True, 'hasMedia': True, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'lang': 'und', 'replyingTo': 'pdxdylan', 'replyingToID': '1534672673422381057', 'retweetURL': None, 'date_epoch': 1654729514} +testMultiMediaTweet_compare={'text': 'Released #Retro64 1.0.9. Besides a lot of internal bug-fixes, this adds quicksand blocks, fixes the rendering for the castle stairs block, and adds a new model, Sonic! \nhttps://github.com/Retro64Mod/Retro64Mod/releases/tag/1.18.2-1.0.9', 'date': 'Wed Jun 01 14:29:32 +0000 2022', 'tweetURL': 'https://twitter.com/pdxdylan/status/1532006436703715331', 'tweetID': '1532006436703715331', 'conversationID': '1532006436703715331', 'mediaURLs': ['https://pbs.twimg.com/media/FULF9oxXwAMDI-C.png', 'https://pbs.twimg.com/media/FULGaHkWYAIBV5U.png', 'https://pbs.twimg.com/media/FULGiZnWQAMBRWl.png'], 'media_extended': [{'url': 'https://pbs.twimg.com/media/FULF9oxXwAMDI-C.png', 'altText': None, 'type': 'image', 'size': {'width': 507, 'height': 507}, 'thumbnail_url': 'https://pbs.twimg.com/media/FULF9oxXwAMDI-C.png', 'id_str': '1532004485966577667'}, {'url': 'https://pbs.twimg.com/media/FULGaHkWYAIBV5U.png', 'altText': None, 'type': 'image', 'size': {'width': 396, 'height': 431}, 'thumbnail_url': 'https://pbs.twimg.com/media/FULGaHkWYAIBV5U.png', 'id_str': '1532004975269797890'}, {'url': 'https://pbs.twimg.com/media/FULGiZnWQAMBRWl.png', 'altText': None, 'type': 'image', 'size': {'width': 399, 'height': 341}, 'thumbnail_url': 'https://pbs.twimg.com/media/FULGiZnWQAMBRWl.png', 'id_str': '1532005117553164291'}], 'possibly_sensitive': False, 'hashtags': ['Retro64'], 'qrtURL': None, 'allSameType': True, 'hasMedia': True, 'combinedMediaUrl': 'https://vxtwitter.com/rendercombined.jpg?imgs=https://pbs.twimg.com/media/FULF9oxXwAMDI-C.png,https://pbs.twimg.com/media/FULGaHkWYAIBV5U.png,https://pbs.twimg.com/media/FULGiZnWQAMBRWl.png', 'pollData': None, 'article': None, 'lang': 'en', 'replyingTo': None, 'replyingToID': None, 'retweetURL': None, 'date_epoch': 1654093772} +testQRTTweet_compare={'text': "vxTwitter has gotten a *ton* of usage recently, so I'd appreciate a donation to keep things running!\nhttps://x.com/pdxdylan/status/1518309187515781125", 'date': 'Fri Jan 06 21:37:43 +0000 2023', 'tweetURL': 'https://twitter.com/pdxdylan/status/1611477137319514129', 'tweetID': '1611477137319514129', 'conversationID': '1611476665821003776', 'mediaURLs': [], 'media_extended': [], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': 'https://twitter.com/i/status/1518309187515781125', 'allSameType': True, 'hasMedia': False, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'lang': 'en', 'replyingTo': 'pdxdylan', 'replyingToID': '1611476665821003776', 'retweetURL': None, 'date_epoch': 1673041063} +testQrtVideoTweet_compare={'text': 'good', 'date': 'Thu Jun 29 23:33:29 +0000 2023', 'tweetURL': 'https://twitter.com/pdxdylan/status/1674561759422578690', 'tweetID': '1674561759422578690', 'conversationID': '1674561759422578690', 'mediaURLs': [], 'media_extended': [], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': 'https://twitter.com/i/status/1674197531301904388', 'allSameType': True, 'hasMedia': False, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'lang': 'en', 'replyingTo': None, 'replyingToID': None, 'retweetURL': None, 'date_epoch': 1688081609} +testNSFWTweet_compare={'text': "ngl, I'm scared on finding out the cute Sprigatito's final evolution..\n\nso i had a bot generate it for me.... and I'm forever scarred", 'date': 'Sat Oct 15 07:28:42 +0000 2022', 'tweetURL': 'https://twitter.com/kuyacoy/status/1581185279376838657', 'tweetID': '1581185279376838657', 'conversationID': '1581185279376838657', 'mediaURLs': ['https://pbs.twimg.com/media/FfF_gKwXgAIpnpD.jpg'], 'media_extended': [{'url': 'https://pbs.twimg.com/media/FfF_gKwXgAIpnpD.jpg', 'altText': None, 'type': 'image', 'size': {'width': 760, 'height': 926}, 'thumbnail_url': 'https://pbs.twimg.com/media/FfF_gKwXgAIpnpD.jpg', 'id_str': '1581185134803517442'}], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': True, 'hasMedia': True, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'lang': 'en', 'replyingTo': None, 'replyingToID': None, 'retweetURL': None, 'date_epoch': 1665818922} +testPollTweet_compare={'text': 'I know when that hotline bling, that can only:', 'date': 'Mon Oct 05 22:57:25 +0000 2015', 'tweetURL': 'https://twitter.com/norm/status/651169346518056960', 'tweetID': '651169346518056960', 'conversationID': '651169346518056960', 'mediaURLs': [], 'media_extended': [], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': True, 'hasMedia': False, 'combinedMediaUrl': None, 'pollData': {'options': [{'name': 'Mean one thing', 'votes': 124875, 'percent': 78.82}, {'name': 'Mean multiple things', 'votes': 33554, 'percent': 21.18}]}, 'article': None, 'lang': 'en', 'replyingTo': None, 'replyingToID': None, 'retweetURL': None, 'date_epoch': 1444085845} +testMixedMediaTweet_compare={'text': 'Some of us here are definitely big nerds about beer, and could talk your ear off about it for days on end, but some of us are just "beer is nice"', 'date': 'Thu Feb 22 12:13:24 +0000 2024', 'tweetURL': 'https://twitter.com/salebeerfest/status/1760638922084741177', 'tweetID': '1760638922084741177', 'conversationID': '1760638922084741177', 'mediaURLs': ['https://pbs.twimg.com/media/GG8LwfuWoAANKhs.jpg', 'https://video.twimg.com/tweet_video/GG8LwqWX0AAZch0.mp4'], 'media_extended': [{'url': 'https://pbs.twimg.com/media/GG8LwfuWoAANKhs.jpg', 'altText': None, 'type': 'image', 'size': {'width': 858, 'height': 960}, 'thumbnail_url': 'https://pbs.twimg.com/media/GG8LwfuWoAANKhs.jpg', 'id_str': '1760638907102699520'}, {'url': 'https://video.twimg.com/tweet_video/GG8LwqWX0AAZch0.mp4', 'type': 'gif', 'size': {'width': 500, 'height': 500}, 'duration_millis': 0, 'thumbnail_url': 'https://pbs.twimg.com/tweet_video_thumb/GG8LwqWX0AAZch0.jpg', 'altText': None, 'id_str': '1760638909954904064'}], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': False, 'hasMedia': True, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'lang': 'en', 'replyingTo': None, 'replyingToID': None, 'retweetURL': None, 'date_epoch': 1708604004} +testVinePlayerTweet_compare={'text': 'You wanted old ROBLOX back, you got it. Check out our sweet "new" look! #BringBackOldROBLOX https://vine.co/v/OL9VqvM6wJh', 'date': 'Wed Apr 01 16:17:13 +0000 2015', 'tweetURL': 'https://twitter.com/Roblox/status/583302104342638592', 'tweetID': '583302104342638592', 'conversationID': '583302104342638592', 'mediaURLs': ['https://v.cdn.vine.co/r/videos/20A1BE53011195086166081318912_3fe3b526b1a.1.5.3156516531034157495.mp4?versionId=DI1mMu7EI6zcLbvgucyp3GHebdz8.9cQ'], 'media_extended': [{'url': 'https://v.cdn.vine.co/r/videos/20A1BE53011195086166081318912_3fe3b526b1a.1.5.3156516531034157495.mp4?versionId=DI1mMu7EI6zcLbvgucyp3GHebdz8.9cQ', 'type': 'video', 'size': {'width': 435, 'height': 435}}], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': True, 'hasMedia': True, 'combinedMediaUrl': None, 'pollData': {'options': []}, 'article': None, 'lang': 'en', 'replyingTo': None, 'replyingToID': None, 'retweetURL': None, 'date_epoch': 1427905033} +testRetweetTweet_compare={'text': 'RT @pdxdylan: If you want to try this out, on your mobile device, head over to https://vxtwitter.com/preferences and enable "Open links in app". Hope…', 'date': 'Tue Aug 27 23:09:07 +0000 2024', 'tweetURL': 'https://twitter.com/pdxdylan/status/1828570470222045294', 'tweetID': '1828570470222045294', 'conversationID': '1828570470222045294', 'mediaURLs': [], 'media_extended': [], 'possibly_sensitive': False, 'hashtags': [], 'qrtURL': None, 'allSameType': True, 'hasMedia': False, 'combinedMediaUrl': None, 'pollData': None, 'article': None, 'lang': 'en', 'replyingTo': None, 'replyingToID': None, 'retweetURL': 'https://twitter.com/i/status/1828569456231993456', 'date_epoch': 1724800147} testUser="https://twitter.com/jack" testUserSuspended="https://twitter.com/twitter"