From 0d85623c789250477651848edf824d09238ef6fd Mon Sep 17 00:00:00 2001 From: Dylan Date: Sat, 8 Apr 2023 16:37:45 +0100 Subject: [PATCH 1/2] Add NSFW Tweet extract test --- test_vx.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test_vx.py b/test_vx.py index b6a3e0e..61a9406 100644 --- a/test_vx.py +++ b/test_vx.py @@ -19,6 +19,10 @@ testQRTTweet="https://twitter.com/Twitter/status/1232823570046255104" testQrtCeptionTweet="https://twitter.com/CatherineShu/status/585253766271672320" testQrtVideoTweet="https://twitter.com/Twitter/status/1494436688554344449" +# I literally picked a random tweet that twitter marked as 'sensitive' without it being like, actually NSFW. +# Any better suggestions for a tweet to use are welcome +testNSFWTweet="https://twitter.com/kuyacoy/status/1581185279376838657" + textVNF_compare = {'tweet': 'https://twitter.com/jack/status/20', 'url': '', 'description': 'just setting up my twttr', 'screen_name': 'jack', 'type': 'Text', 'images': ['', '', '', '', ''], 'time': 'Tue Mar 21 20:50:14 +0000 2006', 'qrtURL': None, 'nsfw': False} videoVNF_compare={'tweet': 'https://twitter.com/Twitter/status/1263145271946551300', 'url': 'https://video.twimg.com/amplify_video/1263145212760805376/vid/1280x720/9jous8HM0_duxL0w.mp4?tag=13', 'description': 'Testing, testing...\n\nA new way to have a convo with exactly who you want. We’re starting with a small % globally, so keep your 👀 out to see it in action. https://t.co/pV53mvjAVT', 'thumbnail': 'http://pbs.twimg.com/media/EYeX7akWsAIP1_1.jpg', 'screen_name': 'Twitter', 'type': 'Video', 'images': ['', '', '', '', ''], 'time': 'Wed May 20 16:31:15 +0000 2020', 'qrtURL': None, 'nsfw': False,'verified': True, 'size': {'width': 1920, 'height': 1080}} testMedia_compare={'tweet': 'https://twitter.com/Twitter/status/1118295916874739714', 'url': '', 'description': 'On profile pages, we used to only show someone’s replies, not the original Tweet 🙄 Now we’re showing both so you can follow the conversation more easily! https://t.co/LSBEZYFqmY', 'thumbnail': 'https://pbs.twimg.com/media/D4TS4xeX4AA02DI.jpg', 'screen_name': 'Twitter', 'type': 'Image', 'images': ['https://pbs.twimg.com/media/D4TS4xeX4AA02DI.jpg', '', '', '', '1'], 'time': 'Tue Apr 16 23:31:38 +0000 2019', 'qrtURL': None, 'nsfw': False, 'size': {}} @@ -102,6 +106,9 @@ def test_pollTweetExtract(): assert 'card' in tweet compareDict(testPoll_comparePoll,tweet['card']) +def test_NSFW_TweetExtract(): + tweet = twExtract.extractStatus(testNSFWTweet) # For now just test that there's no error + ## VNF conversion test ## def test_textTweetVNF(): From 0820a9d35448e52133ef00ce951f1ef5bc0d9793 Mon Sep 17 00:00:00 2001 From: Dylan Date: Sat, 8 Apr 2023 17:58:37 +0100 Subject: [PATCH 2/2] Workaround for Twitter API changes --- configHandler.py | 11 +++++++++-- requirements.txt | 5 +++-- serverless.yml | 4 ++++ test_vx.py | 5 +++++ twExtract/__init__.py | 36 ++++++++++++++++++++++++------------ 5 files changed, 45 insertions(+), 16 deletions(-) diff --git a/configHandler.py b/configHandler.py index 37f84dc..c0ae984 100644 --- a/configHandler.py +++ b/configHandler.py @@ -14,7 +14,13 @@ elif ('RUNNING_SERVERLESS' in os.environ and os.environ['RUNNING_SERVERLESS'] == "repo": os.environ["VXTWITTER_REPO"], "url": os.environ["VXTWITTER_URL"], "combination_method": os.environ["VXTWITTER_COMBINATION_METHOD"], # can either be 'local' or a URL to a server handling requests in the same format - "gifConvertAPI":os.environ["VXTWITTER_GIF_CONVERT_API"] + "gifConvertAPI":os.environ["VXTWITTER_GIF_CONVERT_API"], + "workaroundKeys":{ + "consumerKey":os.environ["VXTWITTER_WORKAROUND_CONSUMER_KEY"], + "consumerSecret":os.environ["VXTWITTER_WORKAROUND_CONSUMER_SECRET"], + "accessToken":os.environ["VXTWITTER_WORKAROUND_TOKEN"], + "accessTokenSecret":os.environ["VXTWITTER_WORKAROUND_TOKEN_SECRET"] + } } } else: @@ -31,7 +37,8 @@ else: "repo": "https://github.com/dylanpdx/BetterTwitFix", "url": "https://vxtwitter.com", "combination_method": "local", # can either be 'local' or a URL to a server handling requests in the same format - "gifConvertAPI":"" + "gifConvertAPI":"", + "workaroundKeys":{"consumerKey":"","consumerSecret":"","accessToken":"","accessTokenSecret":""} } } diff --git a/requirements.txt b/requirements.txt index 3f58c01..0b24e26 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,8 @@ -pymongo==4.3.3 +pymongo==4.3.3 boto3==1.26.104 requests==2.28.2 Pillow==9.4.0 Flask==2.2.3 Flask-Cors==3.0.10 -yt-dlp==2022.7.18 \ No newline at end of file +yt-dlp==2022.7.18 +twitter==1.19.6 \ No newline at end of file diff --git a/serverless.yml b/serverless.yml index 35bf26f..1e0a5cf 100644 --- a/serverless.yml +++ b/serverless.yml @@ -27,6 +27,10 @@ provider: VXTWITTER_URL: ${env:VXTWITTER_URL, 'https://vxtwitter.com'} VXTWITTER_COMBINATION_METHOD: ${env:VXTWITTER_COMBINATION_METHOD, 'local'} VXTWITTER_GIF_CONVERT_API: ${env:VXTWITTER_GIF_CONVERT_API, ''} + VXTWITTER_WORKAROUND_CONSUMER_KEY: ${env:VXTWITTER_WORKAROUND_CONSUMER_KEY, ''} + VXTWITTER_WORKAROUND_CONSUMER_SECRET: ${env:VXTWITTER_WORKAROUND_CONSUMER_SECRET, ''} + VXTWITTER_WORKAROUND_TOKEN: ${env:VXTWITTER_WORKAROUND_TOKEN, ''} + VXTWITTER_WORKAROUND_TOKEN_SECRET: ${env:VXTWITTER_WORKAROUND_TOKEN_SECRET, ''} package: patterns: diff --git a/test_vx.py b/test_vx.py index 61a9406..8064c7e 100644 --- a/test_vx.py +++ b/test_vx.py @@ -194,6 +194,11 @@ def test_embedFromCache(): resp = client.get(testMultiMediaTweet.replace("https://twitter.com",""),headers={"User-Agent":"test"}) assert resp.status_code==200 +def test_embedSuggestive(): + resp = client.get(testNSFWTweet.replace("https://twitter.com",""),headers={"User-Agent":"test"}) + assert resp.status_code==200 + assert "so i had a bot generate it for me" in str(resp.data) + def test_veryLongEmbed(): cache.clearCache() cache.setCache({'https://twitter.com/TEST/status/1234': diff --git a/twExtract/__init__.py b/twExtract/__init__.py index 82c6227..0c86b36 100644 --- a/twExtract/__init__.py +++ b/twExtract/__init__.py @@ -4,11 +4,24 @@ import json import requests import re from . import twExtractError +import twitter +from configHandler import config bearer="Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw" guestToken=None pathregex = r"\w{1,15}\/(status|statuses)\/(\d{2,20})" userregex = r"^https?:\/\/(?:www\.)?twitter\.com\/(?:#!\/)?@?([^/?#]*)(?:[?#/].*)?$" userIDregex = r"\/i\/user\/(\d+)" +try: + auth = twitter.oauth.OAuth( + config['config']['workaroundKeys']["accessToken"], + config['config']['workaroundKeys']["accessTokenSecret"], + config['config']['workaroundKeys']["consumerKey"], + config['config']['workaroundKeys']["consumerSecret"] + ) + api = twitter.Twitter(auth=auth) +except Exception as e: + api = None + def getGuestToken(): global guestToken if guestToken is None: @@ -17,18 +30,17 @@ def getGuestToken(): return guestToken def extractStatus_fallback(url): - twIE = twitter.TwitterIE() - twIE.set_downloader(yt_dlp.YoutubeDL()) - twid = twIE._match_id(url) - status = twIE._call_api( - 'statuses/show/%s.json' % twid, twid, { - 'cards_platform': 'Web-12', - 'include_cards': 1, - 'include_reply_count': 1, - 'include_user_entities': 0, - 'tweet_mode': 'extended', - }) - return status + if api is None: + raise twExtractError.TwExtractError(500, "Could not extract tweet.") + print(" ➤ [ I ] Using fallback method to extract tweet") + # get tweet ID + m = re.search(pathregex, url) + if m is None: + raise twExtractError.TwExtractError(400, "Invalid URL") + twid = m.group(2) + # get tweet + tweet = api.statuses.show(_id=twid, tweet_mode="extended") + return tweet def extractStatus(url):