diff --git a/twExtract/__init__.py b/twExtract/__init__.py index baf5172..bb3f940 100644 --- a/twExtract/__init__.py +++ b/twExtract/__init__.py @@ -1,8 +1,21 @@ import yt_dlp from yt_dlp.extractor import twitter import json +import requests +import re +from . import twExtractError -def extractStatus(url): +guestToken=None +pathregex = r"\w{1,15}\/(status|statuses)\/(\d{2,20})" + +def getGuestToken(): + global guestToken + if guestToken is None: + r = requests.post("https://api.twitter.com/1.1/guest/activate.json", headers={"Authorization":"Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw"}) + guestToken = json.loads(r.text)["guest_token"] + return guestToken + +def extractStatus_fallback(url): twIE = twitter.TwitterIE() twIE.set_downloader(yt_dlp.YoutubeDL()) twid = twIE._match_id(url) @@ -16,6 +29,28 @@ def extractStatus(url): }) return status + +def extractStatus(url): + try: + # get tweet ID + m = re.search(pathregex, url) + if m is None: + return extractStatus_fallback(url) + twid = m.group(2) + # get guest token + guestToken = getGuestToken() + # get tweet + tweet = requests.get("https://api.twitter.com/1.1/statuses/show/" + twid + ".json?tweet_mode=extended&cards_platform=Web-12&include_cards=1&include_reply_count=1&include_user_entities=0", headers={"Authorization":"Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw", "x-guest-token":guestToken}) + output = tweet.json() + if "errors" in output: + # pick the first error and create a twExtractError + error = output["errors"][0] + raise twExtractError.TwExtractError(error["code"], error["message"]) + return output + except Exception as e: + return extractStatus_fallback(url) + + def lambda_handler(event, context): if ("queryStringParameters" not in event): return { diff --git a/twExtract/twExtractError.py b/twExtract/twExtractError.py new file mode 100644 index 0000000..fba0328 --- /dev/null +++ b/twExtract/twExtractError.py @@ -0,0 +1,8 @@ + +class TwExtractError(Exception): + def __init__(self, code, message): + self.code = code + self.msg = message + + def __str__(self): + return self.msg \ No newline at end of file diff --git a/twitfix.py b/twitfix.py index 302e004..6546236 100644 --- a/twitfix.py +++ b/twitfix.py @@ -4,7 +4,6 @@ from weakref import finalize from flask import Flask, render_template, request, redirect, abort, Response, send_from_directory, url_for, send_file, make_response, jsonify from flask_cors import CORS import textwrap -from pkg_resources import ExtractionError import requests import re import os @@ -206,8 +205,8 @@ def vnfFromCacheOrDL(video_link): vnf = link_to_vnf(video_link) addVnfToLinkCache(video_link, vnf) return vnf,None - except ExtractorError as exErr: - if 'HTTP Error 404' in exErr.msg: + except (ExtractorError, twExtract.twExtractError.TwExtractError) as exErr: + if 'HTTP Error 404' in exErr.msg or 'No status found with that ID' in exErr.msg: exErr.msg=msgs.tweetNotFound elif 'suspended' in exErr.msg: exErr.msg=msgs.tweetSuspended