Re-implement txt functionality
This commit is contained in:
parent
ee5c93fe6d
commit
165ae08300
5
api.md
5
api.md
@ -46,5 +46,6 @@ The following fields are returned:
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
Additionally, you can add the `include_txt` parameter. Setting it to 'true' will include an additional media URL, a link to a .txt containing basic tweet information.
|
Additionally, you can add the `include_rtf` or `include_txt` parameters. Setting any to 'true' will include an additional media URL, a link to a .txt or .rtf containing basic tweet information.
|
||||||
`include_txt` can also be set to `ifnomedia`, which will only add a link to the .txt if there is no media URLs in the tweet.
|
Instead of "true", they can also be set to `ifnomedia`, which will only add a link if there is no media URLs in the tweet.
|
||||||
|
These features are added for archival of text tweets in certain software.
|
@ -7,4 +7,5 @@ Flask-Cors==4.0.0
|
|||||||
yt-dlp==2022.7.18
|
yt-dlp==2022.7.18
|
||||||
Werkzeug==2.3.7
|
Werkzeug==2.3.7
|
||||||
numerize==0.12
|
numerize==0.12
|
||||||
oauthlib==3.2.2
|
oauthlib==3.2.2
|
||||||
|
PyRTF3==0.47.5
|
38
test_api.py
Normal file
38
test_api.py
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
from vx_testdata import *
|
||||||
|
import twitfix,twExtract
|
||||||
|
from flask.testing import FlaskClient
|
||||||
|
client = FlaskClient(twitfix.app)
|
||||||
|
|
||||||
|
def test_api_include_txt():
|
||||||
|
resp = client.get(testTextTweet.replace("https://twitter.com","https://api.vxtwitter.com")+"?include_txt=true",headers={"User-Agent":"test"})
|
||||||
|
jData = resp.get_json()
|
||||||
|
assert resp.status_code==200
|
||||||
|
assert any(".txt" in i for i in jData["mediaURLs"])
|
||||||
|
|
||||||
|
def test_api_include_rtf():
|
||||||
|
resp = client.get(testTextTweet.replace("https://twitter.com","https://api.vxtwitter.com")+"?include_rtf=true",headers={"User-Agent":"test"})
|
||||||
|
jData = resp.get_json()
|
||||||
|
assert resp.status_code==200
|
||||||
|
assert any(".rtf" in i for i in jData["mediaURLs"])
|
||||||
|
|
||||||
|
def test_api_include_txt_nomedia():
|
||||||
|
resp = client.get(testTextTweet.replace("https://twitter.com","https://api.vxtwitter.com")+"?include_txt=ifnomedia",headers={"User-Agent":"test"})
|
||||||
|
jData = resp.get_json()
|
||||||
|
assert resp.status_code==200
|
||||||
|
assert any(".txt" in i for i in jData["mediaURLs"])
|
||||||
|
|
||||||
|
resp = client.get(testMediaTweet.replace("https://twitter.com","https://api.vxtwitter.com")+"?include_txt=ifnomedia",headers={"User-Agent":"test"})
|
||||||
|
jData = resp.get_json()
|
||||||
|
assert resp.status_code==200
|
||||||
|
assert not any(".txt" in i for i in jData["mediaURLs"])
|
||||||
|
|
||||||
|
def test_api_include_rtf_nomedia():
|
||||||
|
resp = client.get(testTextTweet.replace("https://twitter.com","https://api.vxtwitter.com")+"?include_rtf=ifnomedia",headers={"User-Agent":"test"})
|
||||||
|
jData = resp.get_json()
|
||||||
|
assert resp.status_code==200
|
||||||
|
assert any(".rtf" in i for i in jData["mediaURLs"])
|
||||||
|
|
||||||
|
resp = client.get(testMediaTweet.replace("https://twitter.com","https://api.vxtwitter.com")+"?include_rtf=ifnomedia",headers={"User-Agent":"test"})
|
||||||
|
jData = resp.get_json()
|
||||||
|
assert resp.status_code==200
|
||||||
|
assert not any(".rtf" in i for i in jData["mediaURLs"])
|
@ -173,4 +173,14 @@ def test_embed_stripLastUrl():
|
|||||||
|
|
||||||
def test_embed_no_username():
|
def test_embed_no_username():
|
||||||
resp = client.get(testMediaTweet.replace("/pdxdylan",""),headers={"User-Agent":"test"})
|
resp = client.get(testMediaTweet.replace("/pdxdylan",""),headers={"User-Agent":"test"})
|
||||||
assert resp.status_code==200
|
assert resp.status_code==200
|
||||||
|
|
||||||
|
def test_embed_txt():
|
||||||
|
resp = client.get(testTextTweet.replace("https://twitter.com","")+".txt",headers={"User-Agent":"test"})
|
||||||
|
assert resp.status_code==200
|
||||||
|
assert testTextTweet_compare["text"] in str(resp.data)
|
||||||
|
|
||||||
|
def test_embed_rtf():
|
||||||
|
resp = client.get(testTextTweet.replace("https://twitter.com","")+".rtf",headers={"User-Agent":"test"})
|
||||||
|
assert resp.status_code==200
|
||||||
|
assert testTextTweet_compare["text"] in str(resp.data)
|
@ -3,10 +3,13 @@ import vx_testdata
|
|||||||
tokens = ""
|
tokens = ""
|
||||||
|
|
||||||
tokensList = tokens.split(",")
|
tokensList = tokens.split(",")
|
||||||
|
errorTokens = []
|
||||||
for token in tokensList:
|
for token in tokensList:
|
||||||
try:
|
try:
|
||||||
twExtract.extractStatusV2(vx_testdata.testNSFWTweet,workaroundTokens=[token])
|
twExtract.extractStatusV2(vx_testdata.testNSFWTweet,workaroundTokens=[token])
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(str(e)+" "+token)
|
print(str(e)+" "+token)
|
||||||
pass
|
errorTokens.append(token)
|
||||||
|
pass
|
||||||
|
|
||||||
|
print("Error tokens: "+str(errorTokens))
|
39
twitfix.py
39
twitfix.py
@ -4,7 +4,7 @@ from flask_cors import CORS
|
|||||||
import re
|
import re
|
||||||
import os
|
import os
|
||||||
import combineImg
|
import combineImg
|
||||||
from io import BytesIO
|
from io import BytesIO, StringIO
|
||||||
import urllib
|
import urllib
|
||||||
import msgs
|
import msgs
|
||||||
import twExtract as twExtract
|
import twExtract as twExtract
|
||||||
@ -15,6 +15,10 @@ import vxlogging as log
|
|||||||
from utils import getTweetIdFromUrl, pathregex
|
from utils import getTweetIdFromUrl, pathregex
|
||||||
from vxApi import getApiResponse
|
from vxApi import getApiResponse
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
from PyRTF.Elements import Document
|
||||||
|
from PyRTF.document.section import Section
|
||||||
|
from PyRTF.document.paragraph import Paragraph
|
||||||
|
from utils import BytesIOWrapper
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
CORS(app)
|
CORS(app)
|
||||||
user_agent=""
|
user_agent=""
|
||||||
@ -108,11 +112,11 @@ def oembedend():
|
|||||||
provName = request.args.get("provider",None)
|
provName = request.args.get("provider",None)
|
||||||
return oEmbedGen(desc, user, link, ttype,providerName=provName)
|
return oEmbedGen(desc, user, link, ttype,providerName=provName)
|
||||||
|
|
||||||
def getTweetData(twitter_url):
|
def getTweetData(twitter_url,include_txt="false",include_rtf="false"):
|
||||||
cachedVNF = getVnfFromLinkCache(twitter_url)
|
cachedVNF = getVnfFromLinkCache(twitter_url)
|
||||||
if cachedVNF is not None:
|
if cachedVNF is not None:
|
||||||
return cachedVNF
|
return cachedVNF
|
||||||
|
|
||||||
try:
|
try:
|
||||||
rawTweetData = twExtract.extractStatusV2Anon(twitter_url)
|
rawTweetData = twExtract.extractStatusV2Anon(twitter_url)
|
||||||
except:
|
except:
|
||||||
@ -127,7 +131,7 @@ def getTweetData(twitter_url):
|
|||||||
|
|
||||||
if rawTweetData is None:
|
if rawTweetData is None:
|
||||||
return None
|
return None
|
||||||
tweetData = getApiResponse(rawTweetData)
|
tweetData = getApiResponse(rawTweetData,include_txt,include_rtf)
|
||||||
if tweetData is None:
|
if tweetData is None:
|
||||||
return None
|
return None
|
||||||
addVnfToLinkCache(twitter_url,tweetData)
|
addVnfToLinkCache(twitter_url,tweetData)
|
||||||
@ -154,8 +158,18 @@ def twitfix(sub_path):
|
|||||||
if match is None:
|
if match is None:
|
||||||
abort(404)
|
abort(404)
|
||||||
twitter_url = f'https://twitter.com/i/status/{getTweetIdFromUrl(sub_path)}'
|
twitter_url = f'https://twitter.com/i/status/{getTweetIdFromUrl(sub_path)}'
|
||||||
|
isApiRequest=request.url.startswith("https://api.vx") or request.url.startswith("http://api.vx")
|
||||||
|
|
||||||
tweetData = getTweetData(twitter_url)
|
include_txt="false"
|
||||||
|
include_rtf="false"
|
||||||
|
|
||||||
|
if isApiRequest:
|
||||||
|
if "include_txt" in request.args:
|
||||||
|
include_txt = request.args.get("include_txt")
|
||||||
|
if "include_rtf" in request.args:
|
||||||
|
include_rtf = request.args.get("include_rtf")
|
||||||
|
|
||||||
|
tweetData = getTweetData(twitter_url,include_txt,include_rtf)
|
||||||
if tweetData is None:
|
if tweetData is None:
|
||||||
log.error("Tweet Data Get failed for "+twitter_url)
|
log.error("Tweet Data Get failed for "+twitter_url)
|
||||||
return message(msgs.failedToScan)
|
return message(msgs.failedToScan)
|
||||||
@ -175,6 +189,19 @@ def twitfix(sub_path):
|
|||||||
# remove the .mp4 from the end of the URL
|
# remove the .mp4 from the end of the URL
|
||||||
if requestUrlWithoutQuery.endswith(".mp4") or requestUrlWithoutQuery.endswith(".png"):
|
if requestUrlWithoutQuery.endswith(".mp4") or requestUrlWithoutQuery.endswith(".png"):
|
||||||
sub_path = sub_path[:-4]
|
sub_path = sub_path[:-4]
|
||||||
|
elif requestUrlWithoutQuery.endswith(".txt"):
|
||||||
|
return Response(tweetData['text'], mimetype='text/plain')
|
||||||
|
elif requestUrlWithoutQuery.endswith(".rtf"):
|
||||||
|
doc = Document()
|
||||||
|
section = Section()
|
||||||
|
doc.Sections.append(section)
|
||||||
|
p = Paragraph()
|
||||||
|
p.append(tweetData['text'])
|
||||||
|
section.append(p)
|
||||||
|
rtf = StringIO()
|
||||||
|
doc.write(rtf)
|
||||||
|
rtf.seek(0)
|
||||||
|
return send_file(BytesIOWrapper(rtf), mimetype='application/rtf', as_attachment=True, download_name=f'{tweetData["user_screen_name"]}_{tweetData["tweetID"]}.rtf')
|
||||||
|
|
||||||
embedIndex = -1
|
embedIndex = -1
|
||||||
# if url ends with /1, /2, /3, or /4, we'll use that as the index
|
# if url ends with /1, /2, /3, or /4, we'll use that as the index
|
||||||
@ -182,7 +209,7 @@ def twitfix(sub_path):
|
|||||||
embedIndex = int(sub_path[-1])-1
|
embedIndex = int(sub_path[-1])-1
|
||||||
sub_path = sub_path[:-2]
|
sub_path = sub_path[:-2]
|
||||||
|
|
||||||
if request.url.startswith("https://api.vx") or request.url.startswith("http://api.vx"): # Directly return the API response if the request is from the API
|
if isApiRequest: # Directly return the API response if the request is from the API
|
||||||
return tweetData
|
return tweetData
|
||||||
elif directEmbed: # direct embed
|
elif directEmbed: # direct embed
|
||||||
# direct embeds should always prioritize the main tweet, so don't check for qrt
|
# direct embeds should always prioritize the main tweet, so don't check for qrt
|
||||||
|
26
utils.py
26
utils.py
@ -1,4 +1,5 @@
|
|||||||
import re
|
import re
|
||||||
|
import io
|
||||||
|
|
||||||
pathregex = re.compile("\\w{1,15}\\/(status|statuses)\\/(\\d{2,20})")
|
pathregex = re.compile("\\w{1,15}\\/(status|statuses)\\/(\\d{2,20})")
|
||||||
endTCOregex = re.compile("(^.*?) +https:\/\/t.co\/.*?$")
|
endTCOregex = re.compile("(^.*?) +https:\/\/t.co\/.*?$")
|
||||||
@ -16,4 +17,27 @@ def stripEndTCO(text):
|
|||||||
if match is not None:
|
if match is not None:
|
||||||
return match.group(1)
|
return match.group(1)
|
||||||
else:
|
else:
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
# https://stackoverflow.com/a/55977438
|
||||||
|
class BytesIOWrapper(io.BufferedReader):
|
||||||
|
"""Wrap a buffered bytes stream over TextIOBase string stream."""
|
||||||
|
|
||||||
|
def __init__(self, text_io_buffer, encoding=None, errors=None, **kwargs):
|
||||||
|
super(BytesIOWrapper, self).__init__(text_io_buffer, **kwargs)
|
||||||
|
self.encoding = encoding or text_io_buffer.encoding or 'utf-8'
|
||||||
|
self.errors = errors or text_io_buffer.errors or 'strict'
|
||||||
|
|
||||||
|
def _encoding_call(self, method_name, *args, **kwargs):
|
||||||
|
raw_method = getattr(self.raw, method_name)
|
||||||
|
val = raw_method(*args, **kwargs)
|
||||||
|
return val.encode(self.encoding, errors=self.errors)
|
||||||
|
|
||||||
|
def read(self, size=-1):
|
||||||
|
return self._encoding_call('read', size)
|
||||||
|
|
||||||
|
def read1(self, size=-1):
|
||||||
|
return self._encoding_call('read1', size)
|
||||||
|
|
||||||
|
def peek(self, size=-1):
|
||||||
|
return self._encoding_call('peek', size)
|
14
vxApi.py
14
vxApi.py
@ -3,7 +3,7 @@ from datetime import datetime
|
|||||||
from configHandler import config
|
from configHandler import config
|
||||||
from utils import stripEndTCO
|
from utils import stripEndTCO
|
||||||
|
|
||||||
def getApiResponse(tweet,include_txt=False,include_zip=False):
|
def getApiResponse(tweet,include_txt=False,include_rtf=False):
|
||||||
tweetL = tweet["legacy"]
|
tweetL = tweet["legacy"]
|
||||||
if "user_result" in tweet["core"]:
|
if "user_result" in tweet["core"]:
|
||||||
userL = tweet["core"]["user_result"]["result"]["legacy"]
|
userL = tweet["core"]["user_result"]["result"]["legacy"]
|
||||||
@ -66,16 +66,16 @@ def getApiResponse(tweet,include_txt=False,include_zip=False):
|
|||||||
hashtags.append(i["text"])
|
hashtags.append(i["text"])
|
||||||
|
|
||||||
#include_txt = request.args.get("include_txt", "false")
|
#include_txt = request.args.get("include_txt", "false")
|
||||||
#include_zip = request.args.get("include_zip", "false") # for certain types of archival software (i.e Hydrus)
|
#include_rtf = request.args.get("include_rtf", "false") # for certain types of archival software (i.e Hydrus)
|
||||||
|
|
||||||
if include_txt == "true" or (include_txt == "ifnomedia" and len(media)==0):
|
if include_txt == True or include_txt == "true" or (include_txt == "ifnomedia" and len(media)==0):
|
||||||
txturl = config['config']['url']+"/"+userL["screen_name"]+"/status/"+tweet["rest_id"]+".txt"
|
txturl = config['config']['url']+"/"+userL["screen_name"]+"/status/"+tweet["rest_id"]+".txt"
|
||||||
media.append(txturl)
|
media.append(txturl)
|
||||||
media_extended.append({"url":txturl,"type":"txt"})
|
media_extended.append({"url":txturl,"type":"txt"})
|
||||||
if include_zip == "true" or (include_zip == "ifnomedia" and len(media)==0):
|
if include_rtf == True or include_rtf == "true" or (include_rtf == "ifnomedia" and len(media)==0):
|
||||||
zipurl = config['config']['url']+"/"+userL["screen_name"]+"/status/"+tweet["rest_id"]+".zip"
|
rtfurl = config['config']['url']+"/"+userL["screen_name"]+"/status/"+tweet["rest_id"]+".rtf"
|
||||||
media.append(zipurl)
|
media.append(rtfurl)
|
||||||
media_extended.append({"url":zipurl,"type":"zip"})
|
media_extended.append({"url":rtfurl,"type":"rtf"})
|
||||||
|
|
||||||
qrtURL = None
|
qrtURL = None
|
||||||
if 'quoted_status_id_str' in tweetL:
|
if 'quoted_status_id_str' in tweetL:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user