Commit 2d638de0 authored by Himanshu Dabas's avatar Himanshu Dabas Committed by GitHub

fix for deprecation of v1.1 endpoints (#944)

parent 421a155a
...@@ -309,3 +309,6 @@ def run_as_command(): ...@@ -309,3 +309,6 @@ def run_as_command():
sys.exit(0) sys.exit(0)
main() main()
if __name__ == '__main__':
main()
...@@ -81,3 +81,5 @@ class Config: ...@@ -81,3 +81,5 @@ class Config:
TranslateDest: str = "en" TranslateDest: str = "en"
Backoff_exponent: float = 3.0 Backoff_exponent: float = 3.0
Min_wait_time: int = 0 Min_wait_time: int = 0
Bearer_token: str = None
Guest_token: str = None
...@@ -4,32 +4,39 @@ from json import loads ...@@ -4,32 +4,39 @@ from json import loads
import logging as logme import logging as logme
class NoMoreTweetsException(Exception):
def __init__(self, msg):
super().__init__(msg)
def Follow(response): def Follow(response):
logme.debug(__name__+':Follow') logme.debug(__name__ + ':Follow')
soup = BeautifulSoup(response, "html.parser") soup = BeautifulSoup(response, "html.parser")
follow = soup.find_all("td", "info fifty screenname") follow = soup.find_all("td", "info fifty screenname")
cursor = soup.find_all("div", "w-button-more") cursor = soup.find_all("div", "w-button-more")
try: try:
cursor = findall(r'cursor=(.*?)">', str(cursor))[0] cursor = findall(r'cursor=(.*?)">', str(cursor))[0]
except IndexError: except IndexError:
logme.critical(__name__+':Follow:IndexError') logme.critical(__name__ + ':Follow:IndexError')
return follow, cursor return follow, cursor
def Mobile(response): def Mobile(response):
logme.debug(__name__+':Mobile') logme.debug(__name__ + ':Mobile')
soup = BeautifulSoup(response, "html.parser") soup = BeautifulSoup(response, "html.parser")
tweets = soup.find_all("span", "metadata") tweets = soup.find_all("span", "metadata")
max_id = soup.find_all("div", "w-button-more") max_id = soup.find_all("div", "w-button-more")
try: try:
max_id = findall(r'max_id=(.*?)">', str(max_id))[0] max_id = findall(r'max_id=(.*?)">', str(max_id))[0]
except Exception as e: except Exception as e:
logme.critical(__name__+':Mobile:' + str(e)) logme.critical(__name__ + ':Mobile:' + str(e))
return tweets, max_id return tweets, max_id
def MobileFav(response):
def MobileFav(response):
soup = BeautifulSoup(response, "html.parser") soup = BeautifulSoup(response, "html.parser")
tweets = soup.find_all("table", "tweet") tweets = soup.find_all("table", "tweet")
max_id = soup.find_all("div", "w-button-more") max_id = soup.find_all("div", "w-button-more")
...@@ -40,8 +47,9 @@ def MobileFav(response): ...@@ -40,8 +47,9 @@ def MobileFav(response):
return tweets, max_id return tweets, max_id
def profile(response): def profile(response):
logme.debug(__name__+':profile') logme.debug(__name__ + ':profile')
json_response = loads(response) json_response = loads(response)
html = json_response["items_html"] html = json_response["items_html"]
soup = BeautifulSoup(html, "html.parser") soup = BeautifulSoup(html, "html.parser")
...@@ -49,10 +57,54 @@ def profile(response): ...@@ -49,10 +57,54 @@ def profile(response):
return feed, feed[-1]["data-item-id"] return feed, feed[-1]["data-item-id"]
def Json(response): def Json(response):
logme.debug(__name__+':Json') logme.debug(__name__ + ':Json')
json_response = loads(response) json_response = loads(response)
html = json_response["items_html"] html = json_response["items_html"]
soup = BeautifulSoup(html, "html.parser") soup = BeautifulSoup(html, "html.parser")
feed = soup.find_all("div", "tweet") feed = soup.find_all("div", "tweet")
return feed, json_response["min_position"] return feed, json_response["min_position"]
def search_v2(response):
# TODO need to implement this
response = loads(response)
if len(response['globalObjects']['tweets']) == 0:
msg = 'No more data. finished scraping!!'
raise NoMoreTweetsException(msg)
# need to modify things at the function call end
# timeline = response['timeline']['instructions'][0]['addEntries']['entries']
feed = []
feed_set = set()
# here we need to remove the quoted and `to-reply` tweets from the list as they may or may not contain the
# for _id in response['globalObjects']['tweets']:
# if 'quoted_status_id_str' in response['globalObjects']['tweets'][_id] or \
# response['globalObjects']['tweets'][_id]['in_reply_to_status_id_str']:
# try:
# feed_set.add(response['globalObjects']['tweets'][_id]['quoted_status_id_str'])
# except KeyError:
# feed_set.add(response['globalObjects']['tweets'][_id]['in_reply_to_status_id_str'])
# i = 1
# for _id in response['globalObjects']['tweets']:
# if _id not in feed_set:
# temp_obj = response['globalObjects']['tweets'][_id]
# temp_obj['user_data'] = response['globalObjects']['users'][temp_obj['user_id_str']]
# feed.append(temp_obj)
for timeline_entry in response['timeline']['instructions'][0]['addEntries']['entries']:
# this will handle the cases when the timeline entry is a tweet
if timeline_entry['entryId'].find('sq-I-t-') == 0:
_id = timeline_entry['content']['item']['content']['tweet']['id']
temp_obj = response['globalObjects']['tweets'][_id]
temp_obj['user_data'] = response['globalObjects']['users'][temp_obj['user_id_str']]
feed.append(temp_obj)
try:
next_cursor = response['timeline']['instructions'][0]['addEntries']['entries'][-1]['content'][
'operation']['cursor']['value']
except KeyError:
# this is needed because after the first request location of cursor is changed
next_cursor = response['timeline']['instructions'][-1]['replaceEntry']['entry']['content']['operation'][
'cursor']['value']
return feed, next_cursor
...@@ -37,8 +37,9 @@ def Tweet(config, t): ...@@ -37,8 +37,9 @@ def Tweet(config, t):
logme.debug(__name__+':Tweet:notFormat') logme.debug(__name__+':Tweet:notFormat')
output = f"{t.id_str} {t.datestamp} {t.timestamp} {t.timezone} " output = f"{t.id_str} {t.datestamp} {t.timestamp} {t.timezone} "
if t.retweet: # TODO: someone who is familiar with this code, needs to take a look at what this is <also see tweet.py>
output += "RT " # if t.retweet:
# output += "RT "
output += f"<{t.username}> {t.tweet}" output += f"<{t.username}> {t.tweet}"
......
...@@ -8,28 +8,40 @@ from fake_useragent import UserAgent ...@@ -8,28 +8,40 @@ from fake_useragent import UserAgent
import asyncio import asyncio
import concurrent.futures import concurrent.futures
import random import random
from json import loads from json import loads, dumps
from aiohttp_socks import ProxyConnector, ProxyType from aiohttp_socks import ProxyConnector, ProxyType
from urllib.parse import quote
from . import url from . import url
from .output import Tweets, Users from .output import Tweets, Users
from .user import inf from .token import TokenExpiryException
import logging as logme import logging as logme
httpproxy = None httpproxy = None
user_agent_list = [ user_agent_list = [
#'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36', # 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
#'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36', # ' Chrome/60.0.3112.113 Safari/537.36',
#'Mozilla/5.0 (Windows NT 5.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36', # 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
#'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36', # ' Chrome/60.0.3112.90 Safari/537.36',
#'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36', # 'Mozilla/5.0 (Windows NT 5.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
#'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36', # ' Chrome/60.0.3112.90 Safari/537.36',
#'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36', # 'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
#'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36', # ' Chrome/60.0.3112.90 Safari/537.36',
#'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36', # 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko)'
#'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36', # ' Chrome/44.0.2403.157 Safari/537.36',
# 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
# ' Chrome/60.0.3112.113 Safari/537.36',
# 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
# ' Chrome/57.0.2987.133 Safari/537.36',
# 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
# ' Chrome/57.0.2987.133 Safari/537.36',
# 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
# ' Chrome/55.0.2883.87 Safari/537.36',
# 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
# ' Chrome/55.0.2883.87 Safari/537.36',
'Mozilla/4.0 (compatible; MSIE 9.0; Windows NT 6.1)', 'Mozilla/4.0 (compatible; MSIE 9.0; Windows NT 6.1)',
'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko', 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko',
'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)', 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)',
...@@ -42,11 +54,19 @@ user_agent_list = [ ...@@ -42,11 +54,19 @@ user_agent_list = [
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; Trident/7.0; rv:11.0) like Gecko', 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; Trident/7.0; rv:11.0) like Gecko',
'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)', 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)',
'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)', 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)',
'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)' 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET '
'CLR 3.5.30729)',
] ]
# function to convert python `dict` to json and then encode it to be passed in the url as a parameter
# some urls require this format
def dict_to_url(dct):
return quote(dumps(dct))
def get_connector(config): def get_connector(config):
logme.debug(__name__+':get_connector') logme.debug(__name__ + ':get_connector')
_connector = None _connector = None
if config.Proxy_host: if config.Proxy_host:
if config.Proxy_host.lower() == "tor": if config.Proxy_host.lower() == "tor":
...@@ -73,82 +93,92 @@ def get_connector(config): ...@@ -73,82 +93,92 @@ def get_connector(config):
port=config.Proxy_port, port=config.Proxy_port,
rdns=True) rdns=True)
else: else:
logme.critical(__name__+':get_connector:proxy-port-type-error') logme.critical(__name__ + ':get_connector:proxy-port-type-error')
print("Error: Please specify --proxy-host, --proxy-port, and --proxy-type") print("Error: Please specify --proxy-host, --proxy-port, and --proxy-type")
sys.exit(1) sys.exit(1)
else: else:
if config.Proxy_port or config.Proxy_type: if config.Proxy_port or config.Proxy_type:
logme.critical(__name__+':get_connector:proxy-host-arg-error') logme.critical(__name__ + ':get_connector:proxy-host-arg-error')
print("Error: Please specify --proxy-host, --proxy-port, and --proxy-type") print("Error: Please specify --proxy-host, --proxy-port, and --proxy-type")
sys.exit(1) sys.exit(1)
return _connector return _connector
async def RequestUrl(config, init, headers = []): async def RequestUrl(config, init, headers=[]):
logme.debug(__name__+':RequestUrl') logme.debug(__name__ + ':RequestUrl')
_connector = get_connector(config) _connector = get_connector(config)
_serialQuery = "" _serialQuery = ""
params = [] params = []
_url = "" _url = ""
_headers = {}
# TODO : do this later
if config.Profile: if config.Profile:
if config.Profile_full: if config.Profile_full:
logme.debug(__name__+':RequestUrl:Profile_full') logme.debug(__name__ + ':RequestUrl:Profile_full')
_url = await url.MobileProfile(config.Username, init) _url = await url.MobileProfile(config.Username, init)
else: else:
logme.debug(__name__+':RequestUrl:notProfile_full') logme.debug(__name__ + ':RequestUrl:notProfile_full')
_url = await url.Profile(config.Username, init) _url = await url.Profile(config.Username, init)
_serialQuery = _url _serialQuery = _url
elif config.TwitterSearch: elif config.TwitterSearch:
logme.debug(__name__+':RequestUrl:TwitterSearch') logme.debug(__name__ + ':RequestUrl:TwitterSearch')
_url, params, _serialQuery = await url.Search(config, init) _url, params, _serialQuery = await url.Search(config, init)
_headers = [("authorization", config.Bearer_token), ("x-guest-token", config.Guest_token)]
else: else:
if config.Following: if config.Following:
logme.debug(__name__+':RequestUrl:Following') logme.debug(__name__ + ':RequestUrl:Following')
_url = await url.Following(config.Username, init) _url = await url.Following(config.Username, init)
elif config.Followers: elif config.Followers:
logme.debug(__name__+':RequestUrl:Followers') logme.debug(__name__ + ':RequestUrl:Followers')
_url = await url.Followers(config.Username, init) _url = await url.Followers(config.Username, init)
else: else:
logme.debug(__name__+':RequestUrl:Favorites') logme.debug(__name__ + ':RequestUrl:Favorites')
_url = await url.Favorites(config.Username, init) _url = await url.Favorites(config.Username, init)
_serialQuery = _url _serialQuery = _url
response = await Request(_url, params=params, connector=_connector, headers=headers) response = await Request(_url, params=params, connector=_connector, headers=_headers)
if config.Debug: if config.Debug:
print(_serialQuery, file=open("twint-request_urls.log", "a", encoding="utf-8")) print(_serialQuery, file=open("twint-request_urls.log", "a", encoding="utf-8"))
return response return response
def ForceNewTorIdentity(config): def ForceNewTorIdentity(config):
logme.debug(__name__+':ForceNewTorIdentity') logme.debug(__name__ + ':ForceNewTorIdentity')
try: try:
tor_c = socket.create_connection(('127.0.0.1', config.Tor_control_port)) tor_c = socket.create_connection(('127.0.0.1', config.Tor_control_port))
tor_c.send('AUTHENTICATE "{}"\r\nSIGNAL NEWNYM\r\n'.format(config.Tor_control_password).encode()) tor_c.send('AUTHENTICATE "{}"\r\nSIGNAL NEWNYM\r\n'.format(config.Tor_control_password).encode())
response = tor_c.recv(1024) response = tor_c.recv(1024)
if response != b'250 OK\r\n250 OK\r\n': if response != b'250 OK\r\n250 OK\r\n':
sys.stderr.write('Unexpected response from Tor control port: {}\n'.format(response)) sys.stderr.write('Unexpected response from Tor control port: {}\n'.format(response))
logme.critical(__name__+':ForceNewTorIdentity:unexpectedResponse') logme.critical(__name__ + ':ForceNewTorIdentity:unexpectedResponse')
except Exception as e: except Exception as e:
logme.debug(__name__+':ForceNewTorIdentity:errorConnectingTor') logme.debug(__name__ + ':ForceNewTorIdentity:errorConnectingTor')
sys.stderr.write('Error connecting to Tor control port: {}\n'.format(repr(e))) sys.stderr.write('Error connecting to Tor control port: {}\n'.format(repr(e)))
sys.stderr.write('If you want to rotate Tor ports automatically - enable Tor control port\n') sys.stderr.write('If you want to rotate Tor ports automatically - enable Tor control port\n')
async def Request(url, connector=None, params=[], headers=[]):
logme.debug(__name__+':Request:Connector') async def Request(_url, connector=None, params=None, headers=None):
logme.debug(__name__ + ':Request:Connector')
async with aiohttp.ClientSession(connector=connector, headers=headers) as session: async with aiohttp.ClientSession(connector=connector, headers=headers) as session:
return await Response(session, url, params) return await Response(session, _url, params)
async def Response(session, url, params=[]): async def Response(session, _url, params=None):
logme.debug(__name__+':Response') logme.debug(__name__ + ':Response')
with timeout(120): with timeout(120):
async with session.get(url, ssl=True, params=params, proxy=httpproxy) as response: async with session.get(_url, ssl=True, params=params, proxy=httpproxy) as response:
return await response.text() resp = await response.text()
if response.status == 429: # 429 implies Too many requests i.e. Rate Limit Exceeded
raise TokenExpiryException(loads(resp)['errors'][0]['message'])
return resp
async def RandomUserAgent(wa=None): async def RandomUserAgent(wa=None):
logme.debug(__name__+':RandomUserAgent') logme.debug(__name__ + ':RandomUserAgent')
try: try:
if wa: if wa:
return "Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36" return "Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36"
...@@ -156,43 +186,61 @@ async def RandomUserAgent(wa=None): ...@@ -156,43 +186,61 @@ async def RandomUserAgent(wa=None):
except: except:
return random.choice(user_agent_list) return random.choice(user_agent_list)
async def Username(_id):
logme.debug(__name__+':Username')
url = f"https://twitter.com/intent/user?user_id={_id}&lang=en"
r = await Request(url, headers={"X-Requested-With": "XMLHttpRequest"})
soup = BeautifulSoup(r, "html.parser")
return soup.find("a", "fn url alternate-context")["href"].replace("/", "") async def Username(_id, bearer_token, guest_token):
logme.debug(__name__ + ':Username')
_dct = {'userId': _id, 'withHighlightedLabel': False}
_url = "https://api.twitter.com/graphql/B9FuNQVmyx32rdbIPEZKag/UserByRestId?variables={}".format(dict_to_url(_dct))
_headers = {
'authorization': bearer_token,
'x-guest-token': guest_token,
}
r = await Request(_url, headers=_headers)
j_r = loads(r)
username = j_r['data']['user']['legacy']['screen_name']
return username
async def Tweet(url, config, conn): async def Tweet(url, config, conn):
logme.debug(__name__+':Tweet') logme.debug(__name__ + ':Tweet')
try: try:
response = await Request(url) response = await Request(url)
soup = BeautifulSoup(response, "html.parser") soup = BeautifulSoup(response, "html.parser")
tweets = soup.find_all("div", "tweet") tweets = soup.find_all("div", "tweet")
await Tweets(tweets, config, conn, url) await Tweets(tweets, config, conn, url)
except Exception as e: except Exception as e:
logme.critical(__name__+':Tweet:' + str(e)) logme.critical(__name__ + ':Tweet:' + str(e))
async def User(url, config, conn, user_id = False):
logme.debug(__name__+':User') async def User(username, config, conn, bearer_token, guest_token, user_id=False):
_connector = get_connector(config) logme.debug(__name__ + ':User')
_dct = {'screen_name': username, 'withHighlightedLabel': False}
_url = 'https://api.twitter.com/graphql/jMaTS-_Ea8vh9rpKggJbCQ/UserByScreenName?variables={}'\
.format(dict_to_url(_dct))
_headers = {
'authorization': bearer_token,
'x-guest-token': guest_token,
}
try: try:
response = await Request(url, connector=_connector, headers={"X-Requested-With": "XMLHttpRequest"}) response = await Request(_url, headers=_headers)
soup = BeautifulSoup(response, "html.parser") j_r = loads(response)
if user_id: if user_id:
return int(inf(soup, "id")) _id = j_r['data']['user']['rest_id']
await Users(soup, config, conn) return _id
await Users(j_r, config, conn)
except Exception as e: except Exception as e:
logme.critical(__name__+':User:' + str(e)) logme.critical(__name__ + ':User:' + str(e))
raise
def Limit(Limit, count): def Limit(Limit, count):
logme.debug(__name__+':Limit') logme.debug(__name__ + ':Limit')
if Limit is not None and count >= int(Limit): if Limit is not None and count >= int(Limit):
return True return True
async def Multi(feed, config, conn): async def Multi(feed, config, conn):
logme.debug(__name__+':Multi') logme.debug(__name__ + ':Multi')
count = 0 count = 0
try: try:
with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor: with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
...@@ -201,27 +249,27 @@ async def Multi(feed, config, conn): ...@@ -201,27 +249,27 @@ async def Multi(feed, config, conn):
for tweet in feed: for tweet in feed:
count += 1 count += 1
if config.Favorites or config.Profile_full: if config.Favorites or config.Profile_full:
logme.debug(__name__+':Multi:Favorites-profileFull') logme.debug(__name__ + ':Multi:Favorites-profileFull')
link = tweet.find("a")["href"] link = tweet.find("a")["href"]
url = f"https://twitter.com{link}&lang=en" url = f"https://twitter.com{link}&lang=en"
elif config.User_full: elif config.User_full:
logme.debug(__name__+':Multi:userFull') logme.debug(__name__ + ':Multi:userFull')
username = tweet.find("a")["name"] username = tweet.find("a")["name"]
url = f"http://twitter.com/{username}?lang=en" url = f"http://twitter.com/{username}?lang=en"
else: else:
logme.debug(__name__+':Multi:else-url') logme.debug(__name__ + ':Multi:else-url')
link = tweet.find("a", "tweet-timestamp js-permalink js-nav js-tooltip")["href"] link = tweet.find("a", "tweet-timestamp js-permalink js-nav js-tooltip")["href"]
url = f"https://twitter.com{link}?lang=en" url = f"https://twitter.com{link}?lang=en"
if config.User_full: if config.User_full:
logme.debug(__name__+':Multi:user-full-Run') logme.debug(__name__ + ':Multi:user-full-Run')
futures.append(loop.run_in_executor(executor, await User(url, futures.append(loop.run_in_executor(executor, await User(url,
config, conn))) config, conn)))
else: else:
logme.debug(__name__+':Multi:notUser-full-Run') logme.debug(__name__ + ':Multi:notUser-full-Run')
futures.append(loop.run_in_executor(executor, await Tweet(url, futures.append(loop.run_in_executor(executor, await Tweet(url,
config, conn))) config, conn)))
logme.debug(__name__+':Multi:asyncioGather') logme.debug(__name__ + ':Multi:asyncioGather')
await asyncio.gather(*futures) await asyncio.gather(*futures)
except Exception as e: except Exception as e:
# TODO: fix error not error # TODO: fix error not error
......
...@@ -17,19 +17,22 @@ author_list.pop() ...@@ -17,19 +17,22 @@ author_list.pop()
# used by Pandas # used by Pandas
_follows_object = {} _follows_object = {}
def _formatDateTime(datetimestamp): def _formatDateTime(datetimestamp):
try: try:
return int(datetime.strptime(datetimestamp, "%Y-%m-%d %H:%M:%S").timestamp()) return int(datetime.strptime(datetimestamp, "%Y-%m-%d %H:%M:%S").timestamp())
except ValueError: except ValueError:
return int(datetime.strptime(datetimestamp, "%Y-%m-%d").timestamp()) return int(datetime.strptime(datetimestamp, "%Y-%m-%d").timestamp())
def _clean_follow_list(): def _clean_follow_list():
logme.debug(__name__+':clean_follow_list') logme.debug(__name__ + ':clean_follow_list')
global _follows_object global _follows_object
_follows_object = {} _follows_object = {}
def clean_lists(): def clean_lists():
logme.debug(__name__+':clean_lists') logme.debug(__name__ + ':clean_lists')
global follows_list global follows_list
global tweets_list global tweets_list
global users_list global users_list
...@@ -37,10 +40,11 @@ def clean_lists(): ...@@ -37,10 +40,11 @@ def clean_lists():
tweets_list = [] tweets_list = []
users_list = [] users_list = []
def datecheck(datetimestamp, config): def datecheck(datetimestamp, config):
logme.debug(__name__+':datecheck') logme.debug(__name__ + ':datecheck')
if config.Since: if config.Since:
logme.debug(__name__+':datecheck:SinceTrue') logme.debug(__name__ + ':datecheck:SinceTrue')
d = _formatDateTime(datetimestamp) d = _formatDateTime(datetimestamp)
s = _formatDateTime(config.Since) s = _formatDateTime(config.Since)
...@@ -48,44 +52,49 @@ def datecheck(datetimestamp, config): ...@@ -48,44 +52,49 @@ def datecheck(datetimestamp, config):
if d < s: if d < s:
return False return False
if config.Until: if config.Until:
logme.debug(__name__+':datecheck:UntilTrue') logme.debug(__name__ + ':datecheck:UntilTrue')
d = _formatDateTime(datetimestamp) d = _formatDateTime(datetimestamp)
s = _formatDateTime(config.Until) s = _formatDateTime(config.Until)
if d > s: if d > s:
return False return False
logme.debug(__name__+':datecheck:dateRangeFalse') logme.debug(__name__ + ':datecheck:dateRangeFalse')
return True return True
# TODO In this method we need to delete the quoted tweets, because twitter also sends the quoted tweets in the
# `tweets` list along with the other tweets
def is_tweet(tw): def is_tweet(tw):
try: try:
tw["data-item-id"] tw["data-item-id"]
logme.debug(__name__+':is_tweet:True') logme.debug(__name__ + ':is_tweet:True')
return True return True
except: except:
logme.critical(__name__+':is_tweet:False') logme.critical(__name__ + ':is_tweet:False')
return False return False
def _output(obj, output, config, **extra): def _output(obj, output, config, **extra):
logme.debug(__name__+':_output') logme.debug(__name__ + ':_output')
if config.Lowercase: if config.Lowercase:
if isinstance(obj, str): if isinstance(obj, str):
logme.debug(__name__+':_output:Lowercase:username') logme.debug(__name__ + ':_output:Lowercase:username')
obj = obj.lower() obj = obj.lower()
elif obj.__class__.__name__ == "user": elif obj.__class__.__name__ == "user":
logme.debug(__name__+':_output:Lowercase:user') logme.debug(__name__ + ':_output:Lowercase:user')
pass pass
elif obj.__class__.__name__ == "tweet": elif obj.__class__.__name__ == "tweet":
logme.debug(__name__+':_output:Lowercase:tweet') logme.debug(__name__ + ':_output:Lowercase:tweet')
obj.username = obj.username.lower() obj.username = obj.username.lower()
author_list.update({obj.username}) author_list.update({obj.username})
for i in range(len(obj.mentions)): for i in range(len(obj.mentions)):
obj.mentions[i] = obj.mentions[i].lower() obj.mentions[i] = obj.mentions[i].lower()
for i in range(len(obj.hashtags)): for i in range(len(obj.hashtags)):
obj.hashtags[i] = obj.hashtags[i].lower() obj.hashtags[i] = obj.hashtags[i].lower()
for i in range(len(obj.cashtags)): # TODO : dont know what cashtags are, <also modify in tweet.py>
obj.cashtags[i] = obj.cashtags[i].lower() # for i in range(len(obj.cashtags)):
# obj.cashtags[i] = obj.cashtags[i].lower()
else: else:
logme.info('_output:Lowercase:hiddenTweetFound') logme.info('_output:Lowercase:hiddenTweetFound')
print("[x] Hidden tweet found, account suspended due to violation of TOS") print("[x] Hidden tweet found, account suspended due to violation of TOS")
...@@ -94,36 +103,36 @@ def _output(obj, output, config, **extra): ...@@ -94,36 +103,36 @@ def _output(obj, output, config, **extra):
if config.Store_csv: if config.Store_csv:
try: try:
write.Csv(obj, config) write.Csv(obj, config)
logme.debug(__name__+':_output:CSV') logme.debug(__name__ + ':_output:CSV')
except Exception as e: except Exception as e:
logme.critical(__name__+':_output:CSV:Error:' + str(e)) logme.critical(__name__ + ':_output:CSV:Error:' + str(e))
print(str(e) + " [x] output._output") print(str(e) + " [x] output._output")
elif config.Store_json: elif config.Store_json:
write.Json(obj, config) write.Json(obj, config)
logme.debug(__name__+':_output:JSON') logme.debug(__name__ + ':_output:JSON')
else: else:
write.Text(output, config.Output) write.Text(output, config.Output)
logme.debug(__name__+':_output:Text') logme.debug(__name__ + ':_output:Text')
if config.Elasticsearch: if config.Elasticsearch:
logme.debug(__name__+':_output:Elasticsearch') logme.debug(__name__ + ':_output:Elasticsearch')
print("", end=".", flush=True) print("", end=".", flush=True)
else: else:
if not config.Hide_output: if not config.Hide_output:
try: try:
print(output.replace('\n', ' ')) print(output.replace('\n', ' '))
except UnicodeEncodeError: except UnicodeEncodeError:
logme.critical(__name__+':_output:UnicodeEncodeError') logme.critical(__name__ + ':_output:UnicodeEncodeError')
print("unicode error [x] output._output") print("unicode error [x] output._output")
async def checkData(tweet, config, conn): async def checkData(tweet, config, conn):
logme.debug(__name__+':checkData') logme.debug(__name__ + ':checkData')
copyright = tweet.find("div", "StreamItemContent--withheld")
if copyright is None and is_tweet(tweet):
tweet = Tweet(tweet, config) tweet = Tweet(tweet, config)
if not tweet.datestamp: if not tweet.datestamp:
logme.critical(__name__+':checkData:hiddenTweetFound') logme.critical(__name__ + ':checkData:hiddenTweetFound')
print("[x] Hidden tweet found, account suspended due to violation of TOS") print("[x] Hidden tweet found, account suspended due to violation of TOS")
return return
...@@ -131,56 +140,58 @@ async def checkData(tweet, config, conn): ...@@ -131,56 +140,58 @@ async def checkData(tweet, config, conn):
output = format.Tweet(config, tweet) output = format.Tweet(config, tweet)
if config.Database: if config.Database:
logme.debug(__name__+':checkData:Database') logme.debug(__name__ + ':checkData:Database')
db.tweets(conn, tweet, config) db.tweets(conn, tweet, config)
if config.Pandas: if config.Pandas:
logme.debug(__name__+':checkData:Pandas') logme.debug(__name__ + ':checkData:Pandas')
panda.update(tweet, config) panda.update(tweet, config)
if config.Store_object: if config.Store_object:
logme.debug(__name__+':checkData:Store_object') logme.debug(__name__ + ':checkData:Store_object')
if hasattr(config.Store_object_tweets_list, 'append'): if hasattr(config.Store_object_tweets_list, 'append'):
config.Store_object_tweets_list.append(tweet) config.Store_object_tweets_list.append(tweet)
else: else:
tweets_list.append(tweet) tweets_list.append(tweet)
if config.Elasticsearch: if config.Elasticsearch:
logme.debug(__name__+':checkData:Elasticsearch') logme.debug(__name__ + ':checkData:Elasticsearch')
elasticsearch.Tweet(tweet, config) elasticsearch.Tweet(tweet, config)
_output(tweet, output, config) _output(tweet, output, config)
else: # else:
logme.critical(__name__+':checkData:copyrightedTweet') # logme.critical(__name__+':checkData:copyrightedTweet')
async def Tweets(tweets, config, conn, url=''): async def Tweets(tweets, config, conn, url=''):
logme.debug(__name__+':Tweets') logme.debug(__name__ + ':Tweets')
if config.Favorites or config.Profile_full or config.Location: if config.Favorites or config.Profile_full or config.Location:
logme.debug(__name__+':Tweets:fav+full+loc') logme.debug(__name__ + ':Tweets:fav+full+loc')
for tw in tweets: for tw in tweets:
if tw['data-item-id'] == url.split('?')[0].split('/')[-1]: if tw['data-item-id'] == url.split('?')[0].split('/')[-1]:
await checkData(tw, config, conn) await checkData(tw, config, conn)
elif config.TwitterSearch: elif config.TwitterSearch:
logme.debug(__name__+':Tweets:TwitterSearch') logme.debug(__name__ + ':Tweets:TwitterSearch')
await checkData(tweets, config, conn) await checkData(tweets, config, conn)
else: else:
logme.debug(__name__+':Tweets:else') logme.debug(__name__ + ':Tweets:else')
if int(tweets["data-user-id"]) == config.User_id or config.Retweets: if int(tweets["data-user-id"]) == config.User_id or config.Retweets:
await checkData(tweets, config, conn) await checkData(tweets, config, conn)
async def Users(u, config, conn): async def Users(u, config, conn):
logme.debug(__name__+':User') logme.debug(__name__ + ':User')
global users_list global users_list
user = User(u) user = User(u)
output = format.User(config.Format, user) output = format.User(config.Format, user)
if config.Database: if config.Database:
logme.debug(__name__+':User:Database') logme.debug(__name__ + ':User:Database')
db.user(conn, config, user) db.user(conn, config, user)
if config.Elasticsearch: if config.Elasticsearch:
logme.debug(__name__+':User:Elasticsearch') logme.debug(__name__ + ':User:Elasticsearch')
_save_date = user.join_date _save_date = user.join_date
_save_time = user.join_time _save_time = user.join_time
user.join_date = str(datetime.strptime(user.join_date, "%d %b %Y")).split()[0] user.join_date = str(datetime.strptime(user.join_date, "%d %b %Y")).split()[0]
...@@ -190,7 +201,7 @@ async def Users(u, config, conn): ...@@ -190,7 +201,7 @@ async def Users(u, config, conn):
user.join_time = _save_time user.join_time = _save_time
if config.Store_object: if config.Store_object:
logme.debug(__name__+':User:Store_object') logme.debug(__name__ + ':User:Store_object')
if hasattr(config.Store_object_follow_list, 'append'): if hasattr(config.Store_object_follow_list, 'append'):
config.Store_object_follow_list.append(user) config.Store_object_follow_list.append(user)
...@@ -200,23 +211,24 @@ async def Users(u, config, conn): ...@@ -200,23 +211,24 @@ async def Users(u, config, conn):
users_list.append(user) # twint.user.user users_list.append(user) # twint.user.user
if config.Pandas: if config.Pandas:
logme.debug(__name__+':User:Pandas+user') logme.debug(__name__ + ':User:Pandas+user')
panda.update(user, config) panda.update(user, config)
_output(user, output, config) _output(user, output, config)
async def Username(username, config, conn): async def Username(username, config, conn):
logme.debug(__name__+':Username') logme.debug(__name__ + ':Username')
global _follows_object global _follows_object
global follows_list global follows_list
follow_var = config.Following*"following" + config.Followers*"followers" follow_var = config.Following * "following" + config.Followers * "followers"
if config.Database: if config.Database:
logme.debug(__name__+':Username:Database') logme.debug(__name__ + ':Username:Database')
db.follow(conn, config.Username, config.Followers, username) db.follow(conn, config.Username, config.Followers, username)
if config.Elasticsearch: if config.Elasticsearch:
logme.debug(__name__+':Username:Elasticsearch') logme.debug(__name__ + ':Username:Elasticsearch')
elasticsearch.Follow(username, config) elasticsearch.Follow(username, config)
if config.Store_object: if config.Store_object:
...@@ -226,13 +238,13 @@ async def Username(username, config, conn): ...@@ -226,13 +238,13 @@ async def Username(username, config, conn):
follows_list.append(username) # twint.user.user follows_list.append(username) # twint.user.user
if config.Pandas: if config.Pandas:
logme.debug(__name__+':Username:object+pandas') logme.debug(__name__ + ':Username:object+pandas')
try: try:
_ = _follows_object[config.Username][follow_var] _ = _follows_object[config.Username][follow_var]
except KeyError: except KeyError:
_follows_object.update({config.Username: {follow_var: []}}) _follows_object.update({config.Username: {follow_var: []}})
_follows_object[config.Username][follow_var].append(username) _follows_object[config.Username][follow_var].append(username)
if config.Pandas_au: if config.Pandas_au:
logme.debug(__name__+':Username:object+pandas+au') logme.debug(__name__ + ':Username:object+pandas+au')
panda.update(_follows_object[config.Username], config) panda.update(_follows_object[config.Username], config)
_output(username, username, config) _output(username, username, config)
import sys, os, time, datetime import sys, os, datetime
from asyncio import get_event_loop, TimeoutError, ensure_future, new_event_loop, set_event_loop from asyncio import get_event_loop, TimeoutError, ensure_future, new_event_loop, set_event_loop
from . import datelock, feed, get, output, verbose, storage from . import datelock, feed, get, output, verbose, storage
from .token import TokenExpiryException
from . import token
from .storage import db from .storage import db
from .feed import NoMoreTweetsException
import logging as logme import logging as logme
import time import time
bearer = 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs' \
'%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
class Twint: class Twint:
def __init__(self, config): def __init__(self, config):
logme.debug(__name__+':Twint:__init__') logme.debug(__name__ + ':Twint:__init__')
if config.Resume is not None and (config.TwitterSearch or config.Followers or config.Following): if config.Resume is not None and (config.TwitterSearch or config.Followers or config.Following):
logme.debug(__name__+':Twint:__init__:Resume') logme.debug(__name__ + ':Twint:__init__:Resume')
self.init = self.get_resume(config.Resume) self.init = self.get_resume(config.Resume)
else: else:
self.init = '-1' self.init = '-1'
...@@ -21,16 +28,21 @@ class Twint: ...@@ -21,16 +28,21 @@ class Twint:
self.count = 0 self.count = 0
self.user_agent = "" self.user_agent = ""
self.config = config self.config = config
self.config.Bearer_token = bearer
# TODO might have to make some adjustments for it to work with multi-treading
# USAGE : to get a new guest token simply do `self.token.refresh()`
self.token = token.Token(config)
self.token.refresh()
self.conn = db.Conn(config.Database) self.conn = db.Conn(config.Database)
self.d = datelock.Set(self.config.Until, self.config.Since) self.d = datelock.Set(self.config.Until, self.config.Since)
verbose.Elastic(config.Elasticsearch) verbose.Elastic(config.Elasticsearch)
if self.config.Store_object: if self.config.Store_object:
logme.debug(__name__+':Twint:__init__:clean_follow_list') logme.debug(__name__ + ':Twint:__init__:clean_follow_list')
output._clean_follow_list() output._clean_follow_list()
if self.config.Pandas_clean: if self.config.Pandas_clean:
logme.debug(__name__+':Twint:__init__:pandas_clean') logme.debug(__name__ + ':Twint:__init__:pandas_clean')
storage.panda.clean() storage.panda.clean()
def get_resume(self, resumeFile): def get_resume(self, resumeFile):
...@@ -41,10 +53,17 @@ class Twint: ...@@ -41,10 +53,17 @@ class Twint:
return _init return _init
async def Feed(self): async def Feed(self):
logme.debug(__name__+':Twint:Feed') logme.debug(__name__ + ':Twint:Feed')
consecutive_errors_count = 0 consecutive_errors_count = 0
while True: while True:
# this will receive a JSON string, parse it into a `dict` and do the required stuff
try:
response = await get.RequestUrl(self.config, self.init, headers=[("User-Agent", self.user_agent)])
except TokenExpiryException as e:
logme.debug(__name__ + 'Twint:Feed:' + str(e))
self.token.refresh()
response = await get.RequestUrl(self.config, self.init, headers=[("User-Agent", self.user_agent)]) response = await get.RequestUrl(self.config, self.init, headers=[("User-Agent", self.user_agent)])
if self.config.Debug: if self.config.Debug:
print(response, file=open("twint-last-request.log", "w", encoding="utf-8")) print(response, file=open("twint-last-request.log", "w", encoding="utf-8"))
...@@ -75,29 +94,36 @@ class Twint: ...@@ -75,29 +94,36 @@ class Twint:
else: else:
self.feed, self.init = feed.profile(response) self.feed, self.init = feed.profile(response)
elif self.config.TwitterSearch: elif self.config.TwitterSearch:
self.feed, self.init = feed.Json(response) try:
self.feed, self.init = feed.search_v2(response)
except NoMoreTweetsException as e:
logme.debug(__name__ + ':Twint:Feed:' + str(e))
print(e, 'is it though? because sometimes twitter lie.')
break break
except TimeoutError as e: except TimeoutError as e:
if self.config.Proxy_host.lower() == "tor": if self.config.Proxy_host.lower() == "tor":
print("[?] Timed out, changing Tor identity...") print("[?] Timed out, changing Tor identity...")
if self.config.Tor_control_password is None: if self.config.Tor_control_password is None:
logme.critical(__name__+':Twint:Feed:tor-password') logme.critical(__name__ + ':Twint:Feed:tor-password')
sys.stderr.write("Error: config.Tor_control_password must be set for proxy autorotation!\r\n") sys.stderr.write("Error: config.Tor_control_password must be set for proxy autorotation!\r\n")
sys.stderr.write("Info: What is it? See https://stem.torproject.org/faq.html#can-i-interact-with-tors-controller-interface-directly\r\n") sys.stderr.write(
"Info: What is it? See https://stem.torproject.org/faq.html#can-i-interact-with-tors-controller-interface-directly\r\n")
break break
else: else:
get.ForceNewTorIdentity(self.config) get.ForceNewTorIdentity(self.config)
continue continue
else: else:
logme.critical(__name__+':Twint:Feed:' + str(e)) logme.critical(__name__ + ':Twint:Feed:' + str(e))
print(str(e)) print(str(e))
break break
except Exception as e: except Exception as e:
if self.config.Profile or self.config.Favorites: if self.config.Profile or self.config.Favorites:
print("[!] Twitter does not return more data, scrape stops here.") print("[!] Twitter does not return more data, scrape stops here.")
break break
logme.critical(__name__+':Twint:Feed:noData' + str(e))
logme.critical(__name__ + ':Twint:Feed:noData' + str(e))
# Sometimes Twitter says there is no data. But it's a lie. # Sometimes Twitter says there is no data. But it's a lie.
# raise
consecutive_errors_count += 1 consecutive_errors_count += 1
if consecutive_errors_count < self.config.Retries_count: if consecutive_errors_count < self.config.Retries_count:
# skip to the next iteration if wait time does not satisfy limit constraints # skip to the next iteration if wait time does not satisfy limit constraints
...@@ -111,9 +137,10 @@ class Twint: ...@@ -111,9 +137,10 @@ class Twint:
time.sleep(delay) time.sleep(delay)
self.user_agent = await get.RandomUserAgent(wa=True) self.user_agent = await get.RandomUserAgent(wa=True)
continue continue
logme.critical(__name__+':Twint:Feed:Tweets_known_error:' + str(e)) logme.critical(__name__ + ':Twint:Feed:Tweets_known_error:' + str(e))
sys.stderr.write(str(e) + " [x] run.Feed") sys.stderr.write(str(e) + " [x] run.Feed")
sys.stderr.write("[!] if get this error but you know for sure that more tweets exist, please open an issue and we will investigate it!") sys.stderr.write(
"[!] if get this error but you know for sure that more tweets exist, please open an issue and we will investigate it!")
break break
if self.config.Resume: if self.config.Resume:
print(self.init, file=open(self.config.Resume, "a", encoding="utf-8")) print(self.init, file=open(self.config.Resume, "a", encoding="utf-8"))
...@@ -121,17 +148,17 @@ class Twint: ...@@ -121,17 +148,17 @@ class Twint:
async def follow(self): async def follow(self):
await self.Feed() await self.Feed()
if self.config.User_full: if self.config.User_full:
logme.debug(__name__+':Twint:follow:userFull') logme.debug(__name__ + ':Twint:follow:userFull')
self.count += await get.Multi(self.feed, self.config, self.conn) self.count += await get.Multi(self.feed, self.config, self.conn)
else: else:
logme.debug(__name__+':Twint:follow:notUserFull') logme.debug(__name__ + ':Twint:follow:notUserFull')
for user in self.feed: for user in self.feed:
self.count += 1 self.count += 1
username = user.find("a")["name"] username = user.find("a")["name"]
await output.Username(username, self.config, self.conn) await output.Username(username, self.config, self.conn)
async def favorite(self): async def favorite(self):
logme.debug(__name__+':Twint:favorite') logme.debug(__name__ + ':Twint:favorite')
await self.Feed() await self.Feed()
favorited_tweets_list = [] favorited_tweets_list = []
for tweet in self.feed: for tweet in self.feed:
...@@ -182,21 +209,22 @@ class Twint: ...@@ -182,21 +209,22 @@ class Twint:
async def profile(self): async def profile(self):
await self.Feed() await self.Feed()
if self.config.Profile_full: if self.config.Profile_full:
logme.debug(__name__+':Twint:profileFull') logme.debug(__name__ + ':Twint:profileFull')
self.count += await get.Multi(self.feed, self.config, self.conn) self.count += await get.Multi(self.feed, self.config, self.conn)
else: else:
logme.debug(__name__+':Twint:notProfileFull') logme.debug(__name__ + ':Twint:notProfileFull')
for tweet in self.feed: for tweet in self.feed:
self.count += 1 self.count += 1
await output.Tweets(tweet, self.config, self.conn) await output.Tweets(tweet, self.config, self.conn)
async def tweets(self): async def tweets(self):
await self.Feed() await self.Feed()
# TODO : need to take care of this later
if self.config.Location: if self.config.Location:
logme.debug(__name__+':Twint:tweets:location') logme.debug(__name__ + ':Twint:tweets:location')
self.count += await get.Multi(self.feed, self.config, self.conn) self.count += await get.Multi(self.feed, self.config, self.conn)
else: else:
logme.debug(__name__+':Twint:tweets:notLocation') logme.debug(__name__ + ':Twint:tweets:notLocation')
for tweet in self.feed: for tweet in self.feed:
self.count += 1 self.count += 1
await output.Tweets(tweet, self.config, self.conn) await output.Tweets(tweet, self.config, self.conn)
...@@ -217,75 +245,82 @@ class Twint: ...@@ -217,75 +245,82 @@ class Twint:
self.user_agent = await get.RandomUserAgent() self.user_agent = await get.RandomUserAgent()
if self.config.User_id is not None and self.config.Username is None: if self.config.User_id is not None and self.config.Username is None:
logme.debug(__name__+':Twint:main:user_id') logme.debug(__name__ + ':Twint:main:user_id')
self.config.Username = await get.Username(self.config.User_id) self.config.Username = await get.Username(self.config.User_id, self.config.Bearer_token,
self.config.Guest_token)
if self.config.Username is not None and self.config.User_id is None: if self.config.Username is not None and self.config.User_id is None:
logme.debug(__name__+':Twint:main:username') logme.debug(__name__ + ':Twint:main:username')
url = f"https://twitter.com/{self.config.Username}?lang=en"
self.config.User_id = await get.User(url, self.config, self.conn, True) self.config.User_id = await get.User(self.config.Username, self.config, self.conn,
self.config.Bearer_token,
self.config.Guest_token, True)
if self.config.User_id is None: if self.config.User_id is None:
raise ValueError("Cannot find twitter account with name = " + self.config.Username) raise ValueError("Cannot find twitter account with name = " + self.config.Username)
# TODO : will need to modify it to work with the new endpoints
if self.config.TwitterSearch and self.config.Since and self.config.Until: if self.config.TwitterSearch and self.config.Since and self.config.Until:
logme.debug(__name__+':Twint:main:search+since+until') logme.debug(__name__ + ':Twint:main:search+since+until')
while self.d._since < self.d._until: while self.d._since < self.d._until:
self.config.Since = str(self.d._since) self.config.Since = str(self.d._since)
self.config.Until = str(self.d._until) self.config.Until = str(self.d._until)
if len(self.feed) > 0: if len(self.feed) > 0:
await self.tweets() await self.tweets()
else: else:
logme.debug(__name__+':Twint:main:gettingNewTweets') logme.debug(__name__ + ':Twint:main:gettingNewTweets')
break break
if get.Limit(self.config.Limit, self.count): if get.Limit(self.config.Limit, self.count):
break break
else: else:
logme.debug(__name__+':Twint:main:not-search+since+until') logme.debug(__name__ + ':Twint:main:not-search+since+until')
while True: while True:
if len(self.feed) > 0: if len(self.feed) > 0:
if self.config.Followers or self.config.Following: if self.config.Followers or self.config.Following:
logme.debug(__name__+':Twint:main:follow') logme.debug(__name__ + ':Twint:main:follow')
await self.follow() await self.follow()
elif self.config.Favorites: elif self.config.Favorites:
logme.debug(__name__+':Twint:main:favorites') logme.debug(__name__ + ':Twint:main:favorites')
await self.favorite() await self.favorite()
elif self.config.Profile: elif self.config.Profile:
logme.debug(__name__+':Twint:main:profile') logme.debug(__name__ + ':Twint:main:profile')
await self.profile() await self.profile()
elif self.config.TwitterSearch: elif self.config.TwitterSearch:
logme.debug(__name__+':Twint:main:twitter-search') logme.debug(__name__ + ':Twint:main:twitter-search')
await self.tweets() await self.tweets()
else: else:
logme.debug(__name__+':Twint:main:no-more-tweets') logme.debug(__name__ + ':Twint:main:no-more-tweets')
break break
#logging.info("[<] " + str(datetime.now()) + ':: run+Twint+main+CallingGetLimit2') # logging.info("[<] " + str(datetime.now()) + ':: run+Twint+main+CallingGetLimit2')
if get.Limit(self.config.Limit, self.count): if get.Limit(self.config.Limit, self.count):
logme.debug(__name__+':Twint:main:reachedLimit') logme.debug(__name__ + ':Twint:main:reachedLimit')
break break
if self.config.Count: if self.config.Count:
verbose.Count(self.count, self.config) verbose.Count(self.count, self.config)
def run(config, callback=None): def run(config, callback=None):
logme.debug(__name__+':run') logme.debug(__name__ + ':run')
try: try:
get_event_loop() get_event_loop()
except RuntimeError as e: except RuntimeError as e:
if "no current event loop" in str(e): if "no current event loop" in str(e):
set_event_loop(new_event_loop()) set_event_loop(new_event_loop())
else: else:
logme.exception(__name__+':Lookup:Unexpected exception while handling an expected RuntimeError.') logme.exception(__name__ + ':run:Unexpected exception while handling an expected RuntimeError.')
raise raise
except Exception as e: except Exception as e:
logme.exception(__name__+':Lookup:Unexpected exception occured while attempting to get or create a new event loop.') logme.exception(
__name__ + ':run:Unexpected exception occurred while attempting to get or create a new event loop.')
raise raise
get_event_loop().run_until_complete(Twint(config).main(callback)) get_event_loop().run_until_complete(Twint(config).main(callback))
def Favorites(config): def Favorites(config):
logme.debug(__name__+':Favorites') logme.debug(__name__ + ':Favorites')
config.Favorites = True config.Favorites = True
config.Following = False config.Following = False
config.Followers = False config.Followers = False
...@@ -296,8 +331,9 @@ def Favorites(config): ...@@ -296,8 +331,9 @@ def Favorites(config):
if config.Pandas_au: if config.Pandas_au:
storage.panda._autoget("tweet") storage.panda._autoget("tweet")
def Followers(config): def Followers(config):
logme.debug(__name__+':Followers') logme.debug(__name__ + ':Followers')
config.Followers = True config.Followers = True
config.Following = False config.Following = False
config.Profile = False config.Profile = False
...@@ -310,11 +346,12 @@ def Followers(config): ...@@ -310,11 +346,12 @@ def Followers(config):
if config.User_full: if config.User_full:
storage.panda._autoget("user") storage.panda._autoget("user")
if config.Pandas_clean and not config.Store_object: if config.Pandas_clean and not config.Store_object:
#storage.panda.clean() # storage.panda.clean()
output._clean_follow_list() output._clean_follow_list()
def Following(config): def Following(config):
logme.debug(__name__+':Following') logme.debug(__name__ + ':Following')
config.Following = True config.Following = True
config.Followers = False config.Followers = False
config.Profile = False config.Profile = False
...@@ -327,11 +364,12 @@ def Following(config): ...@@ -327,11 +364,12 @@ def Following(config):
if config.User_full: if config.User_full:
storage.panda._autoget("user") storage.panda._autoget("user")
if config.Pandas_clean and not config.Store_object: if config.Pandas_clean and not config.Store_object:
#storage.panda.clean() # storage.panda.clean()
output._clean_follow_list() output._clean_follow_list()
def Lookup(config): def Lookup(config):
logme.debug(__name__+':Lookup') logme.debug(__name__ + ':Lookup')
try: try:
get_event_loop() get_event_loop()
...@@ -339,15 +377,16 @@ def Lookup(config): ...@@ -339,15 +377,16 @@ def Lookup(config):
if "no current event loop" in str(e): if "no current event loop" in str(e):
set_event_loop(new_event_loop()) set_event_loop(new_event_loop())
else: else:
logme.exception(__name__+':Lookup:Unexpected exception while handling an expected RuntimeError.') logme.exception(__name__ + ':Lookup:Unexpected exception while handling an expected RuntimeError.')
raise raise
except Exception as e: except Exception as e:
logme.exception(__name__+':Lookup:Unexpected exception occured while attempting to get or create a new event loop.') logme.exception(
__name__ + ':Lookup:Unexpected exception occured while attempting to get or create a new event loop.')
raise raise
try: try:
if config.User_id is not None: if config.User_id is not None:
logme.debug(__name__+':Twint:Lookup:user_id') logme.debug(__name__ + ':Twint:Lookup:user_id')
config.Username = get_event_loop().run_until_complete(get.Username(config.User_id)) config.Username = get_event_loop().run_until_complete(get.Username(config.User_id))
url = f"https://mobile.twitter.com/{config.Username}?prefetchTimestamp=" + str(int(time.time() * 1000)) url = f"https://mobile.twitter.com/{config.Username}?prefetchTimestamp=" + str(int(time.time() * 1000))
...@@ -357,15 +396,16 @@ def Lookup(config): ...@@ -357,15 +396,16 @@ def Lookup(config):
storage.panda._autoget("user") storage.panda._autoget("user")
except RuntimeError as e: except RuntimeError as e:
if "no current event loop" in str(e): if "no current event loop" in str(e):
logme.exception(__name__+':Lookup:Previous attempt to to create an event loop failed.') logme.exception(__name__ + ':Lookup:Previous attempt to to create an event loop failed.')
raise raise
except Exception as e: except Exception as e:
logme.exception(__name__+':Lookup:Unexpected exception occured.') logme.exception(__name__ + ':Lookup:Unexpected exception occured.')
raise raise
def Profile(config): def Profile(config):
logme.debug(__name__+':Profile') logme.debug(__name__ + ':Profile')
config.Profile = True config.Profile = True
config.Favorites = False config.Favorites = False
config.Following = False config.Following = False
...@@ -375,8 +415,9 @@ def Profile(config): ...@@ -375,8 +415,9 @@ def Profile(config):
if config.Pandas_au: if config.Pandas_au:
storage.panda._autoget("tweet") storage.panda._autoget("tweet")
def Search(config, callback=None): def Search(config, callback=None):
logme.debug(__name__+':Search') logme.debug(__name__ + ':Search')
config.TwitterSearch = True config.TwitterSearch = True
config.Favorites = False config.Favorites = False
config.Following = False config.Following = False
......
import re
import time
import requests
import logging as logme
class TokenExpiryException(Exception):
def __init__(self, msg):
super().__init__(msg)
class Token:
def __init__(self, config):
self._session = requests.Session()
self.config = config
self._retries = 5
self._timeout = 10
self.url = 'https://twitter.com'
def _request(self):
for attempt in range(self._retries + 1):
# The request is newly prepared on each retry because of potential cookie updates.
req = self._session.prepare_request(requests.Request('GET', self.url))
logme.debug(f'Retrieving {req.url}')
try:
r = self._session.send(req, allow_redirects=True, timeout=self._timeout)
except requests.exceptions.RequestException as exc:
if attempt < self._retries:
retrying = ', retrying'
level = logme.WARNING
else:
retrying = ''
level = logme.ERROR
logme.log(level, f'Error retrieving {req.url}: {exc!r}{retrying}')
else:
success, msg = (True, None)
msg = f': {msg}' if msg else ''
if success:
logme.debug(f'{req.url} retrieved successfully{msg}')
return r
if attempt < self._retries:
# TODO : might wanna tweak this back-off timer
sleep_time = 2.0 * 2 ** attempt
logme.info(f'Waiting {sleep_time:.0f} seconds')
time.sleep(sleep_time)
else:
msg = f'{self._retries + 1} requests to {self.url} failed, giving up.'
logme.fatal(msg)
self.config.Guest_token = None
raise RefreshTokenException(msg)
def refresh(self):
logme.debug('Retrieving guest token')
res = self._request()
match = re.search(r'\("gt=(\d+);', res.text)
if match:
logme.debug('Found guest token in HTML')
self.config.Guest_token = str(match.group(1))
else:
self.config.Guest_token = None
raise RefreshTokenException('Could not find the Guest token in HTML')
from time import strftime, localtime from time import strftime, localtime
from datetime import datetime from datetime import datetime, timezone
import json import json
import logging as logme import logging as logme
...@@ -9,6 +9,7 @@ from googletransx import Translator ...@@ -9,6 +9,7 @@ from googletransx import Translator
# - https://github.com/x0rzkov/py-googletrans#basic-usage # - https://github.com/x0rzkov/py-googletrans#basic-usage
translator = Translator() translator = Translator()
class tweet: class tweet:
"""Define Tweet class """Define Tweet class
""" """
...@@ -17,52 +18,63 @@ class tweet: ...@@ -17,52 +18,63 @@ class tweet:
def __init__(self): def __init__(self):
pass pass
def utc_to_local(utc_dt):
return utc_dt.replace(tzinfo=timezone.utc).astimezone(tz=None)
def getMentions(tw): def getMentions(tw):
"""Extract ment from tweet """Extract mentions from tweet
""" """
logme.debug(__name__+':getMentions') logme.debug(__name__ + ':getMentions')
mentions = []
try: try:
mentions = tw["data-mentions"].split(" ") for mention in tw['entities']['user_mentions']:
except: mentions.append(mention['screen_name'])
except KeyError:
mentions = [] mentions = []
return mentions return mentions
def getQuoteURL(tw): def getQuoteURL(tw):
"""Extract quote from tweet """Extract quote from tweet
""" """
logme.debug(__name__+':getQuoteURL') logme.debug(__name__ + ':getQuoteURL')
base_twitter = "https://twitter.com" base_twitter = "https://twitter.com"
quote_url = "" quote_url = ""
try: try:
quote = tw.find("div","QuoteTweet-innerContainer") quote = tw.find("div", "QuoteTweet-innerContainer")
quote_url = base_twitter + quote.get("href") quote_url = base_twitter + quote.get("href")
except: except:
quote_url = "" quote_url = ""
return quote_url return quote_url
def getText(tw):
"""Replace some text
"""
logme.debug(__name__+':getText')
text = tw.find("p", "tweet-text").text
text = text.replace("http", " http")
text = text.replace("pic.twitter", " pic.twitter")
return text # def getText(tw):
# """Replace some text
# """
# logme.debug(__name__ + ':getText')
# text = tw.find("p", "tweet-text").text
# text = text.replace("http", " http")
# text = text.replace("pic.twitter", " pic.twitter")
#
# return text
def getStat(tw, _type): def getStat(tw, _type):
"""Get stats about Tweet """Get stats about Tweet
""" """
logme.debug(__name__+':getStat') logme.debug(__name__ + ':getStat')
st = f"ProfileTweet-action--{_type} u-hiddenVisually" st = f"ProfileTweet-action--{_type} u-hiddenVisually"
return tw.find("span", st).find("span")["data-tweet-stat-count"] return tw.find("span", st).find("span")["data-tweet-stat-count"]
def getRetweet(tw, _config): def getRetweet(tw, _config):
"""Get Retweet """Get Retweet
""" """
logme.debug(__name__+':getRetweet') logme.debug(__name__ + ':getRetweet')
if _config.Profile: if _config.Profile:
if int(tw["data-user-id"]) != _config.User_id: if int(tw["data-user-id"]) != _config.User_id:
return _config.User_id, _config.Username return _config.User_id, _config.Username
...@@ -74,60 +86,156 @@ def getRetweet(tw, _config): ...@@ -74,60 +86,156 @@ def getRetweet(tw, _config):
return _rt_id, _rt_username return _rt_id, _rt_username
return '', '' return '', ''
def getThumbnail(tw):
"""Get Thumbnail # def getThumbnail(tw):
""" # """Get Thumbnail
divs = tw.find_all("div","PlayableMedia-player") # """
thumb = "" # divs = tw.find_all("div", "PlayableMedia-player")
for div in divs: # thumb = ""
thumb = div.attrs["style"].split("url('")[-1] # for div in divs:
thumb = thumb.replace("')","") # thumb = div.attrs["style"].split("url('")[-1]
return thumb # thumb = thumb.replace("')", "")
# return thumb
# def Tweet(tw, config):
# """Create Tweet object
# """
# logme.debug(__name__+':Tweet')
# t = tweet()
# t.id = int(tw["data-item-id"])
# t.id_str = tw["data-item-id"]
# t.conversation_id = tw["data-conversation-id"]
# t.datetime = int(tw.find("span", "_timestamp")["data-time-ms"])
# t.datestamp = strftime("%Y-%m-%d", localtime(t.datetime/1000.0))
# t.timestamp = strftime("%H:%M:%S", localtime(t.datetime/1000.0))
# t.user_id = int(tw["data-user-id"])
# t.user_id_str = tw["data-user-id"]
# t.username = tw["data-screen-name"]
# t.name = tw["data-name"]
# t.place = tw.find("a","js-geo-pivot-link").text.strip() if tw.find("a","js-geo-pivot-link") else ""
# t.timezone = strftime("%z", localtime())
# for img in tw.findAll("img", "Emoji Emoji--forText"):
# img.replaceWith(img["alt"])
# t.mentions = getMentions(tw)
# t.urls = [link.attrs["data-expanded-url"] for link in tw.find_all('a',{'class':'twitter-timeline-link'}) if link.has_attr("data-expanded-url")]
# t.photos = [photo_node.attrs['data-image-url'] for photo_node in tw.find_all("div", "AdaptiveMedia-photoContainer")]
# t.video = 1 if tw.find_all("div", "AdaptiveMedia-video") != [] else 0
# t.thumbnail = getThumbnail(tw)
# t.tweet = getText(tw)
# t.lang = tw.find('p', 'tweet-text')['lang']
# t.hashtags = [hashtag.text for hashtag in tw.find_all("a","twitter-hashtag")]
# t.cashtags = [cashtag.text for cashtag in tw.find_all("a", "twitter-cashtag")]
# t.replies_count = getStat(tw, "reply")
# t.retweets_count = getStat(tw, "retweet")
# t.likes_count = getStat(tw, "favorite")
# t.link = f"https://twitter.com/{t.username}/status/{t.id}"
# t.user_rt_id, t.user_rt = getRetweet(tw, config)
# t.retweet = True if t.user_rt else False
# t.retweet_id = ''
# t.retweet_date = ''
# if not config.Profile:
# t.retweet_id = tw['data-retweet-id'] if t.user_rt else ''
# t.retweet_date = datetime.fromtimestamp(((int(t.retweet_id) >> 22) + 1288834974657)/1000.0).strftime("%Y-%m-%d %H:%M:%S") if t.user_rt else ''
# t.quote_url = getQuoteURL(tw)
# t.near = config.Near if config.Near else ""
# t.geo = config.Geo if config.Geo else ""
# t.source = config.Source if config.Source else ""
# t.reply_to = [{'user_id': t['id_str'], 'username': t['screen_name']} for t in json.loads(tw["data-reply-to-users-json"])]
# t.translate = ''
# t.trans_src = ''
# t.trans_dest = ''
# if config.Translate == True:
# try:
# ts = translator.translate(text=t.tweet, dest=config.TranslateDest)
# t.translate = ts.text
# t.trans_src = ts.src
# t.trans_dest = ts.dest
# # ref. https://github.com/SuniTheFish/ChainTranslator/blob/master/ChainTranslator/__main__.py#L31
# except ValueError as e:
# raise Exception("Invalid destination language: {} / Tweet: {}".format(config.TranslateDest, t.tweet))
# logme.debug(__name__+':Tweet:translator.translate:'+str(e))
# return t
def Tweet(tw, config): def Tweet(tw, config):
"""Create Tweet object """Create Tweet object
""" """
logme.debug(__name__+':Tweet') logme.debug(__name__ + ':Tweet')
t = tweet() t = tweet()
t.id = int(tw["data-item-id"]) t.id = int(tw['id_str'])
t.id_str = tw["data-item-id"] t.id_str = tw["id_str"]
t.conversation_id = tw["data-conversation-id"] t.conversation_id = tw["conversation_id_str"]
t.datetime = int(tw.find("span", "_timestamp")["data-time-ms"])
t.datestamp = strftime("%Y-%m-%d", localtime(t.datetime/1000.0)) # parsing date to user-friendly format
t.timestamp = strftime("%H:%M:%S", localtime(t.datetime/1000.0)) _dt = tw['created_at']
t.user_id = int(tw["data-user-id"]) _dt = datetime.strptime(_dt, '%a %b %d %H:%M:%S %z %Y')
t.user_id_str = tw["data-user-id"] _dt = utc_to_local(_dt)
t.username = tw["data-screen-name"] t.datetime = str(_dt.strftime('%d-%m-%Y %H:%M:%S %Z'))
t.name = tw["data-name"] # date is of the format year,
t.place = tw.find("a","js-geo-pivot-link").text.strip() if tw.find("a","js-geo-pivot-link") else "" t.datestamp = _dt.strftime('%d-%m-%Y')
t.timestamp = _dt.strftime('%H:%M:%S')
t.user_id = int(tw["user_id_str"])
t.user_id_str = tw["user_id_str"]
t.username = tw["user_data"]['screen_name']
t.name = tw["user_data"]['name']
t.place = tw['geo'] if tw['geo'] else ""
t.timezone = strftime("%z", localtime()) t.timezone = strftime("%z", localtime())
for img in tw.findAll("img", "Emoji Emoji--forText"): # for img in tw.findAll("img", "Emoji Emoji--forText"):
img.replaceWith(img["alt"]) # img.replaceWith(img["alt"])
t.mentions = getMentions(tw) try:
t.urls = [link.attrs["data-expanded-url"] for link in tw.find_all('a',{'class':'twitter-timeline-link'}) if link.has_attr("data-expanded-url")] t.mentions = [_mention['screen_name'] for _mention in tw['entities']['user_mentions']]
t.photos = [photo_node.attrs['data-image-url'] for photo_node in tw.find_all("div", "AdaptiveMedia-photoContainer")] except KeyError:
t.video = 1 if tw.find_all("div", "AdaptiveMedia-video") != [] else 0 t.mentions = []
t.thumbnail = getThumbnail(tw) try:
t.tweet = getText(tw) t.urls = [_url['expanded_url'] for _url in tw['entities']['urls']]
t.lang = tw.find('p', 'tweet-text')['lang'] except KeyError:
t.hashtags = [hashtag.text for hashtag in tw.find_all("a","twitter-hashtag")] t.urls = []
t.cashtags = [cashtag.text for cashtag in tw.find_all("a", "twitter-cashtag")] try:
t.replies_count = getStat(tw, "reply") t.photos = [_img['media_url_https'] for _img in tw['entities']['media'] if _img['type'] == 'photo' and
t.retweets_count = getStat(tw, "retweet") _img['expanded_url'].find('/photo/') != -1]
t.likes_count = getStat(tw, "favorite") except KeyError:
t.photos = []
try:
t.video = 1 if len(tw['extended_entities']['media']) else 0
except KeyError:
t.video = 0
try:
t.thumbnail = tw['extended_entities']['media'][0]['media_url_https']
except KeyError:
t.thumbnail = ''
t.tweet = tw['full_text']
t.lang = tw['lang']
try:
t.hashtags = [hashtag['text'] for hashtag in tw['entities']['hashtags']]
except KeyError:
t.hashtags = []
# don't know what this is
# t.cashtags = [cashtag.text for cashtag in tw.find_all("a", "twitter-cashtag")]
t.replies_count = tw['reply_count']
t.retweets_count = tw['retweet_count']
t.likes_count = tw['favorite_count']
t.link = f"https://twitter.com/{t.username}/status/{t.id}" t.link = f"https://twitter.com/{t.username}/status/{t.id}"
t.user_rt_id, t.user_rt = getRetweet(tw, config) # TODO: someone who is familiar with this code, needs to take a look at what this is
t.retweet = True if t.user_rt else False # t.user_rt_id, t.user_rt = getRetweet(tw, config)
t.retweet_id = '' # t.retweet = True if t.user_rt else False
t.retweet_date = '' # t.retweet_id = ''
if not config.Profile: # t.retweet_date = ''
t.retweet_id = tw['data-retweet-id'] if t.user_rt else '' # if not config.Profile:
t.retweet_date = datetime.fromtimestamp(((int(t.retweet_id) >> 22) + 1288834974657)/1000.0).strftime("%Y-%m-%d %H:%M:%S") if t.user_rt else '' # t.retweet_id = tw['data-retweet-id'] if t.user_rt else ''
t.quote_url = getQuoteURL(tw) # t.retweet_date = datetime.fromtimestamp(((int(t.retweet_id) >> 22) + 1288834974657) / 1000.0).strftime(
# "%Y-%m-%d %H:%M:%S") if t.user_rt else ''
try:
t.quote_url = tw['quoted_status_permalink']['expanded'] if tw['is_quote_status'] else ''
except KeyError:
# means that the quoted tweet have been deleted
t.quote_url = 0
t.near = config.Near if config.Near else "" t.near = config.Near if config.Near else ""
t.geo = config.Geo if config.Geo else "" t.geo = config.Geo if config.Geo else ""
t.source = config.Source if config.Source else "" t.source = config.Source if config.Source else ""
t.reply_to = [{'user_id': t['id_str'], 'username': t['screen_name']} for t in json.loads(tw["data-reply-to-users-json"])] # TODO: check this whether we need the list of all the users to whom this tweet is a reply or we only need
# the immediately above user id
t.reply_to = {'user_id': tw['in_reply_to_user_id_str'], 'username': tw['in_reply_to_screen_name']}
t.translate = '' t.translate = ''
t.trans_src = '' t.trans_src = ''
t.trans_dest = '' t.trans_dest = ''
...@@ -140,5 +248,5 @@ def Tweet(tw, config): ...@@ -140,5 +248,5 @@ def Tweet(tw, config):
# ref. https://github.com/SuniTheFish/ChainTranslator/blob/master/ChainTranslator/__main__.py#L31 # ref. https://github.com/SuniTheFish/ChainTranslator/blob/master/ChainTranslator/__main__.py#L31
except ValueError as e: except ValueError as e:
raise Exception("Invalid destination language: {} / Tweet: {}".format(config.TranslateDest, t.tweet)) raise Exception("Invalid destination language: {} / Tweet: {}".format(config.TranslateDest, t.tweet))
logme.debug(__name__+':Tweet:translator.translate:'+str(e)) logme.debug(__name__ + ':Tweet:translator.translate:' + str(e))
return t return t
import datetime import datetime
from sys import platform from sys import platform
import logging as logme import logging as logme
from urllib.parse import urlencode
from urllib.parse import quote
mobile = "https://mobile.twitter.com" mobile = "https://mobile.twitter.com"
base = "https://twitter.com/i" # base = "https://twitter.com/i"
base = "https://api.twitter.com/2/search/adaptive.json"
def _sanitizeQuery(base,params):
def _sanitizeQuery(_url, params):
_serialQuery = "" _serialQuery = ""
for p in params: _serialQuery = urlencode(params, quote_via=quote)
_serialQuery += p[0]+"="+p[1]+"&" _serialQuery = _url + "?" + _serialQuery
_serialQuery = base + "?" + _serialQuery[:-1].replace(":", "%3A").replace(" ", "%20")
return _serialQuery return _serialQuery
def _formatDate(date): def _formatDate(date):
if "win" in platform: if "win" in platform:
return f'\"{date.split()[0]}\"' return f'\"{date.split()[0]}\"'
...@@ -20,8 +24,9 @@ def _formatDate(date): ...@@ -20,8 +24,9 @@ def _formatDate(date):
except ValueError: except ValueError:
return int(datetime.datetime.strptime(date, "%Y-%m-%d").timestamp()) return int(datetime.datetime.strptime(date, "%Y-%m-%d").timestamp())
async def Favorites(username, init): async def Favorites(username, init):
logme.debug(__name__+':Favorites') logme.debug(__name__ + ':Favorites')
url = f"{mobile}/{username}/favorites?lang=en" url = f"{mobile}/{username}/favorites?lang=en"
if init != '-1': if init != '-1':
...@@ -29,8 +34,9 @@ async def Favorites(username, init): ...@@ -29,8 +34,9 @@ async def Favorites(username, init):
return url return url
async def Followers(username, init): async def Followers(username, init):
logme.debug(__name__+':Followers') logme.debug(__name__ + ':Followers')
url = f"{mobile}/{username}/followers?lang=en" url = f"{mobile}/{username}/followers?lang=en"
if init != '-1': if init != '-1':
...@@ -38,8 +44,9 @@ async def Followers(username, init): ...@@ -38,8 +44,9 @@ async def Followers(username, init):
return url return url
async def Following(username, init): async def Following(username, init):
logme.debug(__name__+':Following') logme.debug(__name__ + ':Following')
url = f"{mobile}/{username}/following?lang=en" url = f"{mobile}/{username}/following?lang=en"
if init != '-1': if init != '-1':
...@@ -47,8 +54,9 @@ async def Following(username, init): ...@@ -47,8 +54,9 @@ async def Following(username, init):
return url return url
async def MobileProfile(username, init): async def MobileProfile(username, init):
logme.debug(__name__+':MobileProfile') logme.debug(__name__ + ':MobileProfile')
url = f"{mobile}/{username}?lang=en" url = f"{mobile}/{username}?lang=en"
if init != '-1': if init != '-1':
...@@ -56,8 +64,9 @@ async def MobileProfile(username, init): ...@@ -56,8 +64,9 @@ async def MobileProfile(username, init):
return url return url
async def Profile(username, init): async def Profile(username, init):
logme.debug(__name__+':Profile') logme.debug(__name__ + ':Profile')
url = f"{base}/profiles/show/{username}/timeline/tweets?include_" url = f"{base}/profiles/show/{username}/timeline/tweets?include_"
url += "available_features=1&lang=en&include_entities=1" url += "available_features=1&lang=en&include_entities=1"
url += "&include_new_items_bar=true" url += "&include_new_items_bar=true"
...@@ -67,17 +76,38 @@ async def Profile(username, init): ...@@ -67,17 +76,38 @@ async def Profile(username, init):
return url return url
async def Search(config, init): async def Search(config, init):
logme.debug(__name__+':Search') logme.debug(__name__ + ':Search')
url = f"{base}/search/timeline" url = base
tweet_count = 100
q = "" q = ""
params = [ params = [
('vertical', 'default'), # ('include_blocking', '1'),
('src', 'unkn'), # ('include_blocked_by', '1'),
('include_available_features', '1'), # ('include_followed_by', '1'),
('include_entities', '1'), # ('include_want_retweets', '1'),
('max_position', str(init)), # ('include_mute_edge', '1'),
('reset_error_state', 'false'), # ('include_can_dm', '1'),
('include_can_media_tag', '1'),
# ('skip_status', '1'),
# ('include_cards', '1'),
('include_ext_alt_text', 'true'),
('include_quote_count', 'true'),
('include_reply_count', '1'),
('tweet_mode', 'extended'),
('include_entities', 'true'),
('include_user_entities', 'true'),
('include_ext_media_availability', 'true'),
('send_error_codes', 'true'),
('simple_quoted_tweet', 'true'),
('count', tweet_count),
# ('query_source', 'typed_query'),
# ('pc', '1'),
('cursor', str(init)),
('spelling_corrections', '1'),
('ext', 'mediaStats%2ChighlightedLabel'),
('tweet_search_mode', 'live'), # this can be handled better, maybe take an argument and set it then
] ]
if not config.Popular_tweets: if not config.Popular_tweets:
params.append(('f', 'tweets')) params.append(('f', 'tweets'))
...@@ -92,7 +122,8 @@ async def Search(config, init): ...@@ -92,7 +122,8 @@ async def Search(config, init):
config.Geo = config.Geo.replace(" ", "") config.Geo = config.Geo.replace(" ", "")
q += f" geocode:{config.Geo}" q += f" geocode:{config.Geo}"
if config.Search: if config.Search:
q += f" {config.Search}"
q += f"{config.Search}"
if config.Year: if config.Year:
q += f" until:{config.Year}-1-1" q += f" until:{config.Year}-1-1"
if config.Since: if config.Since:
...@@ -120,6 +151,7 @@ async def Search(config, init): ...@@ -120,6 +151,7 @@ async def Search(config, init):
q += " filter:media" q += " filter:media"
if config.Replies: if config.Replies:
q += " filter:replies" q += " filter:replies"
# although this filter can still be used, but I found it broken in my preliminary testing, needs more testing
if config.Native_retweets: if config.Native_retweets:
q += " filter:nativeretweets" q += " filter:nativeretweets"
if config.Min_likes: if config.Min_likes:
...@@ -144,3 +176,43 @@ async def Search(config, init): ...@@ -144,3 +176,43 @@ async def Search(config, init):
params.append(("q", q)) params.append(("q", q))
_serialQuery = _sanitizeQuery(url, params) _serialQuery = _sanitizeQuery(url, params)
return url, params, _serialQuery return url, params, _serialQuery
# maybe dont need this
async def SearchProfile(config, init=None):
logme.debug(__name__ + ':SearchProfile')
_url = 'https://api.twitter.com/2/timeline/profile/{}.json?'
q = ""
params = [
('include_profile_interstitial_type', '1'),
('include_blocking', '1'),
('include_blocked_by', '1'),
('include_followed_by', '1'),
('include_want_retweets', '1'),
('include_mute_edge', '1'),
('include_can_dm', '1'),
('include_can_media_tag', '1'),
('skip_status', '1'),
('cards_platform', 'Web - 12'),
('include_cards', '1'),
('include_ext_alt_text', 'true'),
('include_quote_count', 'true'),
('include_reply_count', '1'),
('tweet_mode', 'extended'),
('include_entities', 'true'),
('include_user_entities', 'true'),
('include_ext_media_color', 'true'),
('include_ext_media_availability', 'true'),
('send_error_codes', 'true'),
('simple_quoted_tweet', 'true'),
('include_tweet_replies', 'false'),
('count', '50'),
('userId', '1934388686'),
('ext', 'mediaStats,ChighlightedLabel'),
]
if init:
params.append(('cursor', init))
_serialQuery = _sanitizeQuery(_url, params)
return _url, params, _serialQuery
pass
import datetime
import logging as logme import logging as logme
class user:
class User:
type = "user" type = "user"
def __init__(self): def __init__(self):
pass pass
def inf(ur, _type):
logme.debug(__name__+':inf')
try:
group = ur.find("div", "profile")
if group == None:
group = ur.find("div", "user-actions btn-group not-following")
if group == None:
group = ur.find("div", "user-actions btn-group not-following protected")
except Exception as e:
print("Error: " + str(e))
if _type == "id":
screen_name = group.find("span", "screen-name").text
ret = ur.find("a", {"data-screenname": screen_name})
ret = ret.get('data-mentioned-user-id') if ret is not None else None
ret = "" if ret is None else ret
elif _type == "name":
ret = group.find("div", "fullname").text.split('\n')[0]
elif _type == "username":
ret = group.find("span", "screen-name").text
elif _type == "private":
ret = group.find("div","protected")
if ret:
ret = 1
else:
ret = 0
return ret
def card(ur, _type):
logme.debug(__name__+':card')
if _type == "bio":
try:
ret = ur.find("div", "bio").text.replace("\n", " ").strip()
except:
ret = ""
elif _type == "location":
try:
ret = ur.find("div", "location").text
except:
ret = ""
elif _type == "url":
try:
ret = ur.find("link")["href"]
except:
ret = ""
return ret
def join(ur):
try:
logme.debug(__name__+':join')
jd = ur.find("span", "ProfileHeaderCard-joinDateText js-tooltip u-dir")["title"]
return jd.split(" - ")
except:
return ["", ""]
def convertToInt(x):
logme.debug(__name__+':contertToInt')
multDict = {
"k" : 1000,
"m" : 1000000,
"b" : 1000000000,
}
try:
if ',' in x:
x = x.replace(',', '')
y = int(x)
return y
except:
pass
try:
y = float(str(x)[:-1])
y = y * multDict[str(x)[-1:].lower()]
return int(y)
except:
pass
return 0
def stat(ur, _type):
logme.debug(__name__+':stat')
stats = ur.find('table', 'profile-stats')
stat_dict = {}
for stat in stats.find_all('td', 'stat'):
statnum, statlabel = stat.text.replace('\n', '').replace(',', '').split(' ')[:2]
stat_dict[statlabel.lower()] = int(statnum.replace(',', ''))
try :
return stat_dict[_type]
except AttributeError:
return 0
def media(ur):
logme.debug(__name__+':media')
try:
media_count = ur.find("a", "PhotoRail-headingWithCount js-nav").text.strip().split(" ")[0]
return convertToInt(media_count)
except:
return 0
def verified(ur):
logme.debug(__name__+':verified')
try:
is_verified = ur.find("img", {"alt": "Verified Account"})['alt']
if "Verified Account" in is_verified:
is_verified = 1
else:
is_verified = 0
except:
is_verified = 0
return is_verified
# ur object must be a json from the endpoint https://api.twitter.com/graphql
def User(ur): def User(ur):
logme.debug(__name__+':User') logme.debug(__name__ + ':User')
u = user() if 'data' not in ur and 'user' not in ur['data']:
for img in ur.findAll("img", "Emoji Emoji--forText"): msg = 'malformed json! cannot be parsed to get user data'
img.replaceWith(img["alt"]) logme.fatal(msg)
u.id = inf(ur, "id") raise KeyError(msg)
u.name = inf(ur, "name") _usr = User()
u.username = inf(ur, "username") _usr.id = ur['data']['user']['rest_id']
u.bio = card(ur, "bio") _usr.name = ur['data']['user']['rest_id']['legacy']['name']
u.location = card(ur, "location") _usr.username = ur['data']['user']['rest_id']['legacy']['screen_name']
u.url = card(ur, "url") _usr.bio = ur['data']['user']['rest_id']['legacy']['description']
u.join_date = join(ur)[1] _usr.location = ur['data']['user']['rest_id']['legacy']['location']
u.join_time = join(ur)[0] _usr.url = ur['data']['user']['rest_id']['legacy']['screen_name']['url']
u.tweets = stat(ur, "tweets") # parsing date to user-friendly format
u.following = stat(ur, "following") _dt = ur['data']['user']['rest_id']['legacy']['created_at']
u.followers = stat(ur, "followers") _dt = datetime.datetime.strptime(_dt, '%a %b %d %H:%M:%S %z %Y')
u.likes = "" # stat(ur, "favorites") # date is of the format year,
u.media_count = "" # media(ur) _usr.join_date = _dt.strftime('%d-%m-%Y')
u.is_private = inf(ur, "private") _usr.join_time = _dt.strftime('%H:%M:%S %Z')
u.is_verified = verified(ur)
u.avatar = ur.find("img", {"alt": u.name})["src"] # :type `int`
#u.background_image = ur.find('div',{'class':'ProfileCanopy-headerBg'}).find('img').get('src') _usr.tweets = int(ur['data']['user']['rest_id']['legacy']['statuses_count'])
return u _usr.following = int(ur['data']['user']['rest_id']['legacy']['friends_count'])
_usr.followers = int(ur['data']['user']['rest_id']['legacy']['followers_count'])
_usr.likes = int(ur['data']['user']['rest_id']['legacy']['favourites_count'])
_usr.media_count = int(ur['data']['user']['rest_id']['legacy']['media_count'])
_usr.is_private = ur['data']['user']['rest_id']['legacy']['protected']
_usr.is_verified = ur['data']['user']['rest_id']['legacy']['verified']
_usr.avatar = ur['data']['user']['rest_id']['legacy']['profile_image_url_https']
_usr.background_image = ur['data']['user']['rest_id']['legacy']['profile_banner_url']
# TODO : future implementation
# legacy_extended_profile is also available in some cases which can be used to get DOB of user
return _usr
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment