parallelize extractStatusV2
This commit is contained in:
@@ -38,7 +38,7 @@ def test_twextract_textTweetExtract():
|
|||||||
assert tweet["user"]["screen_name"]=="jack"
|
assert tweet["user"]["screen_name"]=="jack"
|
||||||
assert 'extended_entities' not in tweet
|
assert 'extended_entities' not in tweet
|
||||||
|
|
||||||
def test_twextract_extractV2(): # remove this when v2 is default
|
def test_twextract_extractV2():
|
||||||
tweet = twExtract.extractStatusV2(testTextTweet,workaroundTokens=tokens)
|
tweet = twExtract.extractStatusV2(testTextTweet,workaroundTokens=tokens)
|
||||||
|
|
||||||
def test_twextract_UserExtract():
|
def test_twextract_UserExtract():
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ from oauthlib import oauth1
|
|||||||
import sys
|
import sys
|
||||||
sys.path.append(os.path.dirname(os.path.realpath(__file__)))
|
sys.path.append(os.path.dirname(os.path.realpath(__file__)))
|
||||||
import twUtils
|
import twUtils
|
||||||
|
import concurrent.futures
|
||||||
bearer="Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw"
|
bearer="Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw"
|
||||||
v2bearer="Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
|
v2bearer="Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
|
||||||
androidBearer="Bearer AAAAAAAAAAAAAAAAAAAAAFXzAwAAAAAAMHCxpeSDG1gLNLghVe8d74hl6k4%3DRUMF4xAQLsbeBhTSRrCiQpJtxoGWeyHrDb5te2jpGskWDFW82F"
|
androidBearer="Bearer AAAAAAAAAAAAAAAAAAAAAFXzAwAAAAAAMHCxpeSDG1gLNLghVe8d74hl6k4%3DRUMF4xAQLsbeBhTSRrCiQpJtxoGWeyHrDb5te2jpGskWDFW82F"
|
||||||
@@ -50,6 +51,37 @@ class TwExtractError(Exception):
|
|||||||
def __str__(self):
|
def __str__(self):
|
||||||
return self.msg
|
return self.msg
|
||||||
|
|
||||||
|
def parallel_token_request(twid, tokens, request_function):
|
||||||
|
results = []
|
||||||
|
errors = []
|
||||||
|
def try_token(token):
|
||||||
|
try:
|
||||||
|
result = request_function(twid, token)
|
||||||
|
return {'success': True, 'result': result}
|
||||||
|
except Exception as e:
|
||||||
|
return {'success': False, 'error': str(e)}
|
||||||
|
|
||||||
|
with concurrent.futures.ThreadPoolExecutor(max_workers=min(2, len(tokens))) as executor:
|
||||||
|
futures = {executor.submit(try_token, token): token for token in tokens}
|
||||||
|
for future in concurrent.futures.as_completed(futures):
|
||||||
|
result = future.result()
|
||||||
|
if result['success']:
|
||||||
|
results.append(result)
|
||||||
|
else:
|
||||||
|
errors.append(result)
|
||||||
|
|
||||||
|
# Early return if success
|
||||||
|
if result['success']:
|
||||||
|
for f in futures: # Cancel remaining futures
|
||||||
|
if not f.done():
|
||||||
|
f.cancel()
|
||||||
|
return result['result']
|
||||||
|
|
||||||
|
# all tokens failed
|
||||||
|
if errors:
|
||||||
|
raise TwExtractError(400, f"All tokens failed. Last error: {errors[-1]['error']}")
|
||||||
|
return None
|
||||||
|
|
||||||
def cycleBearerTokenGet(url,headers):
|
def cycleBearerTokenGet(url,headers):
|
||||||
global bearerTokens
|
global bearerTokens
|
||||||
rateLimitRemaining = None
|
rateLimitRemaining = None
|
||||||
@@ -237,51 +269,47 @@ def extractStatusV2(url,workaroundTokens):
|
|||||||
# get tweet
|
# get tweet
|
||||||
tokens = workaroundTokens
|
tokens = workaroundTokens
|
||||||
random.shuffle(tokens)
|
random.shuffle(tokens)
|
||||||
for authToken in tokens:
|
def request_with_token(twid, authToken):
|
||||||
|
vars = json.loads('{"includeTweetImpression":true,"includeHasBirdwatchNotes":false,"includeEditPerspective":false,"rest_ids":["x"],"includeEditControl":true,"includeCommunityTweetRelationship":true,"includeTweetVisibilityNudge":true}')
|
||||||
|
vars['rest_ids'][0] = str(twid)
|
||||||
|
tweet = twitterApiGet(f"https://x.com/i/api/graphql/{v2graphql_api}/TweetResultsByIdsQuery?variables={urllib.parse.quote(json.dumps(vars))}&features={urllib.parse.quote(v2Features)}",authToken=authToken,btoken=v2bearer)
|
||||||
try:
|
try:
|
||||||
vars = json.loads('{"includeTweetImpression":true,"includeHasBirdwatchNotes":false,"includeEditPerspective":false,"rest_ids":["x"],"includeEditControl":true,"includeCommunityTweetRelationship":true,"includeTweetVisibilityNudge":true}')
|
rateLimitRemaining = tweet.headers.get("x-rate-limit-remaining")
|
||||||
vars['rest_ids'][0] = str(twid)
|
print(f"Twitter Token Rate limit remaining: {rateLimitRemaining}")
|
||||||
tweet = twitterApiGet(f"https://x.com/i/api/graphql/{v2graphql_api}/TweetResultsByIdsQuery?variables={urllib.parse.quote(json.dumps(vars))}&features={urllib.parse.quote(v2Features)}",authToken=authToken)
|
except: # for some reason the header is not always present
|
||||||
try:
|
pass
|
||||||
rateLimitRemaining = tweet.headers.get("x-rate-limit-remaining")
|
if tweet.status_code == 429:
|
||||||
print(f"Twitter Token Rate limit remaining: {rateLimitRemaining}")
|
print("Rate limit reached for token (429)")
|
||||||
except: # for some reason the header is not always present
|
# try another token
|
||||||
pass
|
raise TwExtractError(400, "Extract error: rate limit reached")
|
||||||
if tweet.status_code == 429:
|
output = tweet.json()
|
||||||
print("Rate limit reached for token (429)")
|
|
||||||
# try another token
|
if "errors" in output:
|
||||||
|
print(f"Error in output: {json.dumps(output['errors'])}")
|
||||||
|
# try another token
|
||||||
|
raise TwExtractError(400, "Extract error: errors in output - "+json.dumps(output['errors']))
|
||||||
|
entries=output['data']['tweet_results']
|
||||||
|
tweetEntry=None
|
||||||
|
for entry in entries:
|
||||||
|
if 'result' not in entry:
|
||||||
|
print("Tweet result not found in entry")
|
||||||
continue
|
continue
|
||||||
output = tweet.json()
|
result = entry['result']
|
||||||
|
if '__typename' in result and result['__typename'] == 'TweetWithVisibilityResults':
|
||||||
if "errors" in output:
|
result=result['tweet']
|
||||||
print(f"Error in output: {json.dumps(output['errors'])}")
|
elif '__typename' in result and result['__typename'] == 'TweetUnavailable':
|
||||||
# try another token
|
if 'reason' in result:
|
||||||
continue
|
return {'error':'Tweet unavailable: '+result['reason']}
|
||||||
entries=output['data']['tweet_results']
|
return {'error':'Tweet unavailable'}
|
||||||
tweetEntry=None
|
if 'rest_id' in result and result['rest_id'] == twid:
|
||||||
for entry in entries:
|
tweetEntry=result
|
||||||
if 'result' not in entry:
|
break
|
||||||
print("Tweet result not found in entry")
|
tweet=tweetEntry
|
||||||
continue
|
if tweet is None:
|
||||||
result = entry['result']
|
print("Tweet 404")
|
||||||
if '__typename' in result and result['__typename'] == 'TweetWithVisibilityResults':
|
return {'error':'Tweet not found (404); May be due to invalid tweet, changes in Twitter\'s API, or a protected account.'}
|
||||||
result=result['tweet']
|
|
||||||
elif '__typename' in result and result['__typename'] == 'TweetUnavailable':
|
|
||||||
if 'reason' in result:
|
|
||||||
return {'error':'Tweet unavailable: '+result['reason']}
|
|
||||||
return {'error':'Tweet unavailable'}
|
|
||||||
if 'rest_id' in result and result['rest_id'] == twid:
|
|
||||||
tweetEntry=result
|
|
||||||
break
|
|
||||||
tweet=tweetEntry
|
|
||||||
if tweet is None:
|
|
||||||
print("Tweet 404")
|
|
||||||
return {'error':'Tweet not found (404); May be due to invalid tweet, changes in Twitter\'s API, or a protected account.'}
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Exception in extractStatusV2: {str(e)}")
|
|
||||||
continue
|
|
||||||
return tweet
|
return tweet
|
||||||
raise TwExtractError(400, "Extract error")
|
return parallel_token_request(twid, tokens, request_with_token)
|
||||||
|
|
||||||
def extractStatusV2Android(url,workaroundTokens):
|
def extractStatusV2Android(url,workaroundTokens):
|
||||||
# get tweet ID
|
# get tweet ID
|
||||||
|
|||||||
Reference in New Issue
Block a user