43 lines
1.4 KiB
Python
43 lines
1.4 KiB
Python
import re
|
|
import io
|
|
|
|
pathregex = re.compile("\\w{1,15}\\/(status|statuses)\\/(\\d{2,20})")
|
|
endTCOregex = re.compile("(^.*?) +https:\/\/t.co\/.*?$")
|
|
|
|
def getTweetIdFromUrl(url):
|
|
match = pathregex.search(url)
|
|
if match is not None:
|
|
return match.group(2)
|
|
else:
|
|
return None
|
|
|
|
def stripEndTCO(text):
|
|
# remove t.co links at the end of a string
|
|
match = endTCOregex.search(text)
|
|
if match is not None:
|
|
return match.group(1)
|
|
else:
|
|
return text
|
|
|
|
# https://stackoverflow.com/a/55977438
|
|
class BytesIOWrapper(io.BufferedReader):
|
|
"""Wrap a buffered bytes stream over TextIOBase string stream."""
|
|
|
|
def __init__(self, text_io_buffer, encoding=None, errors=None, **kwargs):
|
|
super(BytesIOWrapper, self).__init__(text_io_buffer, **kwargs)
|
|
self.encoding = encoding or text_io_buffer.encoding or 'utf-8'
|
|
self.errors = errors or text_io_buffer.errors or 'strict'
|
|
|
|
def _encoding_call(self, method_name, *args, **kwargs):
|
|
raw_method = getattr(self.raw, method_name)
|
|
val = raw_method(*args, **kwargs)
|
|
return val.encode(self.encoding, errors=self.errors)
|
|
|
|
def read(self, size=-1):
|
|
return self._encoding_call('read', size)
|
|
|
|
def read1(self, size=-1):
|
|
return self._encoding_call('read1', size)
|
|
|
|
def peek(self, size=-1):
|
|
return self._encoding_call('peek', size) |