Skip to content

Commit 4ac17cf

Browse files
committed
parallelize extractStatusV2
1 parent f4d1308 commit 4ac17cf

File tree

2 files changed

+71
-43
lines changed

2 files changed

+71
-43
lines changed

test_vx_extract.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def test_twextract_textTweetExtract():
3838
assert tweet["user"]["screen_name"]=="jack"
3939
assert 'extended_entities' not in tweet
4040

41-
def test_twextract_extractV2(): # remove this when v2 is default
41+
def test_twextract_extractV2():
4242
tweet = twExtract.extractStatusV2(testTextTweet,workaroundTokens=tokens)
4343

4444
def test_twextract_UserExtract():

twExtract/__init__.py

Lines changed: 70 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import sys
1010
sys.path.append(os.path.dirname(os.path.realpath(__file__)))
1111
import twUtils
12+
import concurrent.futures
1213
bearer="Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw"
1314
v2bearer="Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
1415
androidBearer="Bearer AAAAAAAAAAAAAAAAAAAAAFXzAwAAAAAAMHCxpeSDG1gLNLghVe8d74hl6k4%3DRUMF4xAQLsbeBhTSRrCiQpJtxoGWeyHrDb5te2jpGskWDFW82F"
@@ -50,6 +51,37 @@ def __init__(self, code, message):
5051
def __str__(self):
5152
return self.msg
5253

54+
def parallel_token_request(twid, tokens, request_function):
55+
results = []
56+
errors = []
57+
def try_token(token):
58+
try:
59+
result = request_function(twid, token)
60+
return {'success': True, 'result': result}
61+
except Exception as e:
62+
return {'success': False, 'error': str(e)}
63+
64+
with concurrent.futures.ThreadPoolExecutor(max_workers=min(2, len(tokens))) as executor:
65+
futures = {executor.submit(try_token, token): token for token in tokens}
66+
for future in concurrent.futures.as_completed(futures):
67+
result = future.result()
68+
if result['success']:
69+
results.append(result)
70+
else:
71+
errors.append(result)
72+
73+
# Early return if success
74+
if result['success']:
75+
for f in futures: # Cancel remaining futures
76+
if not f.done():
77+
f.cancel()
78+
return result['result']
79+
80+
# all tokens failed
81+
if errors:
82+
raise TwExtractError(400, f"All tokens failed. Last error: {errors[-1]['error']}")
83+
return None
84+
5385
def cycleBearerTokenGet(url,headers):
5486
global bearerTokens
5587
rateLimitRemaining = None
@@ -237,51 +269,47 @@ def extractStatusV2(url,workaroundTokens):
237269
# get tweet
238270
tokens = workaroundTokens
239271
random.shuffle(tokens)
240-
for authToken in tokens:
272+
def request_with_token(twid, authToken):
273+
vars = json.loads('{"includeTweetImpression":true,"includeHasBirdwatchNotes":false,"includeEditPerspective":false,"rest_ids":["x"],"includeEditControl":true,"includeCommunityTweetRelationship":true,"includeTweetVisibilityNudge":true}')
274+
vars['rest_ids'][0] = str(twid)
275+
tweet = twitterApiGet(f"https://x.com/i/api/graphql/{v2graphql_api}/TweetResultsByIdsQuery?variables={urllib.parse.quote(json.dumps(vars))}&features={urllib.parse.quote(v2Features)}",authToken=authToken,btoken=v2bearer)
241276
try:
242-
vars = json.loads('{"includeTweetImpression":true,"includeHasBirdwatchNotes":false,"includeEditPerspective":false,"rest_ids":["x"],"includeEditControl":true,"includeCommunityTweetRelationship":true,"includeTweetVisibilityNudge":true}')
243-
vars['rest_ids'][0] = str(twid)
244-
tweet = twitterApiGet(f"https://x.com/i/api/graphql/{v2graphql_api}/TweetResultsByIdsQuery?variables={urllib.parse.quote(json.dumps(vars))}&features={urllib.parse.quote(v2Features)}",authToken=authToken)
245-
try:
246-
rateLimitRemaining = tweet.headers.get("x-rate-limit-remaining")
247-
print(f"Twitter Token Rate limit remaining: {rateLimitRemaining}")
248-
except: # for some reason the header is not always present
249-
pass
250-
if tweet.status_code == 429:
251-
print("Rate limit reached for token (429)")
252-
# try another token
253-
continue
254-
output = tweet.json()
255-
256-
if "errors" in output:
257-
print(f"Error in output: {json.dumps(output['errors'])}")
258-
# try another token
277+
rateLimitRemaining = tweet.headers.get("x-rate-limit-remaining")
278+
print(f"Twitter Token Rate limit remaining: {rateLimitRemaining}")
279+
except: # for some reason the header is not always present
280+
pass
281+
if tweet.status_code == 429:
282+
print("Rate limit reached for token (429)")
283+
# try another token
284+
raise TwExtractError(400, "Extract error: rate limit reached")
285+
output = tweet.json()
286+
287+
if "errors" in output:
288+
print(f"Error in output: {json.dumps(output['errors'])}")
289+
# try another token
290+
raise TwExtractError(400, "Extract error: errors in output - "+json.dumps(output['errors']))
291+
entries=output['data']['tweet_results']
292+
tweetEntry=None
293+
for entry in entries:
294+
if 'result' not in entry:
295+
print("Tweet result not found in entry")
259296
continue
260-
entries=output['data']['tweet_results']
261-
tweetEntry=None
262-
for entry in entries:
263-
if 'result' not in entry:
264-
print("Tweet result not found in entry")
265-
continue
266-
result = entry['result']
267-
if '__typename' in result and result['__typename'] == 'TweetWithVisibilityResults':
268-
result=result['tweet']
269-
elif '__typename' in result and result['__typename'] == 'TweetUnavailable':
270-
if 'reason' in result:
271-
return {'error':'Tweet unavailable: '+result['reason']}
272-
return {'error':'Tweet unavailable'}
273-
if 'rest_id' in result and result['rest_id'] == twid:
274-
tweetEntry=result
275-
break
276-
tweet=tweetEntry
277-
if tweet is None:
278-
print("Tweet 404")
279-
return {'error':'Tweet not found (404); May be due to invalid tweet, changes in Twitter\'s API, or a protected account.'}
280-
except Exception as e:
281-
print(f"Exception in extractStatusV2: {str(e)}")
282-
continue
297+
result = entry['result']
298+
if '__typename' in result and result['__typename'] == 'TweetWithVisibilityResults':
299+
result=result['tweet']
300+
elif '__typename' in result and result['__typename'] == 'TweetUnavailable':
301+
if 'reason' in result:
302+
return {'error':'Tweet unavailable: '+result['reason']}
303+
return {'error':'Tweet unavailable'}
304+
if 'rest_id' in result and result['rest_id'] == twid:
305+
tweetEntry=result
306+
break
307+
tweet=tweetEntry
308+
if tweet is None:
309+
print("Tweet 404")
310+
return {'error':'Tweet not found (404); May be due to invalid tweet, changes in Twitter\'s API, or a protected account.'}
283311
return tweet
284-
raise TwExtractError(400, "Extract error")
312+
return parallel_token_request(twid, tokens, request_with_token)
285313

286314
def extractStatusV2Android(url,workaroundTokens):
287315
# get tweet ID

0 commit comments

Comments
 (0)