diff --git a/twExtract/__init__.py b/twExtract/__init__.py index f1be1e5..fa3dae4 100644 --- a/twExtract/__init__.py +++ b/twExtract/__init__.py @@ -8,6 +8,9 @@ import urllib.parse import math bearer="Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw" v2bearer="Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA" + +bearerTokens=[bearer,v2bearer] + guestToken=None guestTokenUses=0 pathregex = r"\w{1,15}\/(status|statuses)\/(\d{2,20})" @@ -36,6 +39,26 @@ class TwExtractError(Exception): def __str__(self): return self.msg +def cycleBearerTokenGet(url,headers): + global bearerTokens + rateLimitRemaining = None + for token in bearerTokens: + headers["Authorization"] = token + try: + tweet = requests.get(url, headers=headers) + try: + rateLimitRemaining = tweet.headers.get("x-rate-limit-remaining") + except: # for some reason the header is not always present + pass + if tweet.status_code == 429 and rateLimitRemaining is not None and int(rateLimitRemaining) > 0: # special case where the bearer token is rate limited but another one is not + # try another bearer token + print(f"Error 429 but {rateLimitRemaining} remaining") + continue + except Exception as e: + pass + return tweet + raise TwExtractError(400, "Extract error") + def getGuestToken(): global guestToken global guestTokenUses @@ -194,7 +217,7 @@ def extractStatusV2(url,workaroundTokens): csrfToken=str(uuid.uuid4()).replace('-', '') vars = json.loads('{"includeTweetImpression":true,"includeHasBirdwatchNotes":false,"includeEditPerspective":false,"rest_ids":["x"],"includeEditControl":true,"includeCommunityTweetRelationship":true,"includeTweetVisibilityNudge":true}') vars['rest_ids'][0] = str(twid) - tweet = requests.get(f"https://x.com/i/api/graphql/{v2graphql_api}/TweetResultsByIdsQuery?variables={urllib.parse.quote(json.dumps(vars))}&features={urllib.parse.quote(v2Features)}", headers={"Authorization":bearer,"Cookie":f"auth_token={authToken}; ct0={csrfToken}; ","x-twitter-active-user":"yes","x-twitter-auth-type":"OAuth2Session","x-twitter-client-language":"en","x-csrf-token":csrfToken,"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0"}) + tweet = cycleBearerTokenGet(f"https://x.com/i/api/graphql/{v2graphql_api}/TweetResultsByIdsQuery?variables={urllib.parse.quote(json.dumps(vars))}&features={urllib.parse.quote(v2Features)}", headers={"Authorization":bearer,"Cookie":f"auth_token={authToken}; ct0={csrfToken}; ","x-twitter-active-user":"yes","x-twitter-auth-type":"OAuth2Session","x-twitter-client-language":"en","x-csrf-token":csrfToken,"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0"}) try: rateLimitRemaining = tweet.headers.get("x-rate-limit-remaining") print(f"Twitter Token Rate limit remaining: {rateLimitRemaining}") @@ -252,7 +275,7 @@ def extractStatusV2Android(url,workaroundTokens): csrfToken=str(uuid.uuid4()).replace('-', '') vars = json.loads('{"referrer":"home","includeTweetImpression":true,"includeHasBirdwatchNotes":false,"isReaderMode":false,"includeEditPerspective":false,"includeEditControl":true,"focalTweetId":0,"includeCommunityTweetRelationship":true,"includeTweetVisibilityNudge":true}') vars['focalTweetId'] = int(twid) - tweet = requests.get(f"https://x.com/i/api/graphql/{androidGraphql_api}/ConversationTimelineV2?variables={urllib.parse.quote(json.dumps(vars))}&features={urllib.parse.quote(androidGraphqlFeatures)}", headers={"Authorization":bearer,"Cookie":f"auth_token={authToken}; ct0={csrfToken}; ","x-twitter-active-user":"yes","x-twitter-auth-type":"OAuth2Session","x-twitter-client-language":"en","x-csrf-token":csrfToken,"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0"}) + tweet = cycleBearerTokenGet(f"https://x.com/i/api/graphql/{androidGraphql_api}/ConversationTimelineV2?variables={urllib.parse.quote(json.dumps(vars))}&features={urllib.parse.quote(androidGraphqlFeatures)}", headers={"Authorization":bearer,"Cookie":f"auth_token={authToken}; ct0={csrfToken}; ","x-twitter-active-user":"yes","x-twitter-auth-type":"OAuth2Session","x-twitter-client-language":"en","x-csrf-token":csrfToken,"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0"}) try: rateLimitRemaining = tweet.headers.get("x-rate-limit-remaining") print(f"Twitter Android Token Rate limit remaining: {rateLimitRemaining}") @@ -309,7 +332,7 @@ def extractStatusV2TweetDetail(url,workaroundTokens): csrfToken=str(uuid.uuid4()).replace('-', '') vars = json.loads('{"focalTweetId":"0","with_rux_injections":false,"includePromotedContent":true,"withCommunity":true,"withQuickPromoteEligibilityTweetFields":true,"withBirdwatchNotes":true,"withVoice":true,"withV2Timeline":true}') vars['focalTweetId'] = str(twid) - tweet = requests.get(f"https://x.com/i/api/graphql/{tweetDetailGraphql_api}/TweetDetail?variables={urllib.parse.quote(json.dumps(vars))}&features={urllib.parse.quote(tweetDetailGraphqlFeatures)}", headers={"Authorization":bearer,"Cookie":f"auth_token={authToken}; ct0={csrfToken};guest_id=v1:171580915998156785","x-twitter-active-user":"yes","x-twitter-auth-type":"OAuth2Session","x-twitter-client-language":"en","x-csrf-token":csrfToken,"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0"}) + tweet = cycleBearerTokenGet(f"https://x.com/i/api/graphql/{tweetDetailGraphql_api}/TweetDetail?variables={urllib.parse.quote(json.dumps(vars))}&features={urllib.parse.quote(tweetDetailGraphqlFeatures)}", headers={"Authorization":bearer,"Cookie":f"auth_token={authToken}; ct0={csrfToken};guest_id=v1:171580915998156785","x-twitter-active-user":"yes","x-twitter-auth-type":"OAuth2Session","x-twitter-client-language":"en","x-csrf-token":csrfToken,"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0"}) try: rateLimitRemaining = tweet.headers.get("x-rate-limit-remaining") print(f"Twitter Token Rate limit remaining: {rateLimitRemaining}")