Commit 26f6f11f authored by Raphael Beer's avatar Raphael Beer

Refactor: profile test, linting

parent 8f91a6a8
import aiohttp
import argparse import argparse
import asyncio import asyncio
import daemon
import json import json
import os import os
import re
import urllib.parse
import sys
import time import time
from aiohttp import web from aiohttp import web
from bs4 import BeautifulSoup import daemon
from log import * from db import connect, Database
from db import connect from log import log, add_file_handler, set_log_level, shutdown_logging
from twitter_session import TwitterSession from twitter_session import TwitterSession
log_file = None LOG_FILE = None
debug_file = None DEBUG_FILE = None
db = None DB: Database
routes = web.RouteTableDef() routes = web.RouteTableDef()
...@@ -54,12 +49,12 @@ async def api(request): ...@@ -54,12 +49,12 @@ async def api(request):
session = TwitterSession.guest_sessions[TwitterSession.test_index % len(TwitterSession.guest_sessions)] session = TwitterSession.guest_sessions[TwitterSession.test_index % len(TwitterSession.guest_sessions)]
TwitterSession.test_index += 1 TwitterSession.test_index += 1
result = await session.test(screen_name) result = await session.test(screen_name)
db.write_result(result) DB.write_result(result)
log.debug(json.dumps(result) + '\n') log.debug('\n %s', json.dumps(result))
if (args.cors_allow is not None): if args.cors_allow is not None:
return web.json_response(result, headers={"Access-Control-Allow-Origin": args.cors_allow}) return web.json_response(result, headers={"Access-Control-Allow-Origin": args.cors_allow})
else:
return web.json_response(result) return web.json_response(result)
async def login_accounts(accounts, cookie_dir=None): async def login_accounts(accounts, cookie_dir=None):
if cookie_dir is not None and not os.path.isdir(cookie_dir): if cookie_dir is not None and not os.path.isdir(cookie_dir):
...@@ -67,12 +62,12 @@ async def login_accounts(accounts, cookie_dir=None): ...@@ -67,12 +62,12 @@ async def login_accounts(accounts, cookie_dir=None):
coroutines = [] coroutines = []
for acc in accounts: for acc in accounts:
session = TwitterSession() session = TwitterSession()
coroutines.append(session.login(*acc, cookie_dir=cookie_dir)) coroutines.append(session.login(**acc, cookie_dir=cookie_dir))
TwitterSession.account_sessions.append(session) TwitterSession.account_sessions.append(session)
await asyncio.gather(*coroutines) await asyncio.gather(*coroutines)
async def login_guests(): async def login_guests():
for i in range(0, guest_session_pool_size): for _ in range(0, guest_session_pool_size):
session = TwitterSession() session = TwitterSession()
TwitterSession.guest_sessions.append(session) TwitterSession.guest_sessions.append(session)
await asyncio.gather(*[s.login() for s in TwitterSession.guest_sessions]) await asyncio.gather(*[s.login() for s in TwitterSession.guest_sessions])
...@@ -80,7 +75,7 @@ async def login_guests(): ...@@ -80,7 +75,7 @@ async def login_guests():
def ensure_dir(path): def ensure_dir(path):
if os.path.isdir(path) is False: if os.path.isdir(path) is False:
log.info('Creating directory %s' % path) log.info('Creating directory %s', path)
os.mkdir(path) os.mkdir(path)
parser = argparse.ArgumentParser(description='Twitter Shadowban Tester') parser = argparse.ArgumentParser(description='Twitter Shadowban Tester')
...@@ -93,8 +88,8 @@ parser.add_argument('--port', type=int, default=8080, help='port which to listen ...@@ -93,8 +88,8 @@ parser.add_argument('--port', type=int, default=8080, help='port which to listen
parser.add_argument('--host', type=str, default='127.0.0.1', help='hostname/ip which to listen on (default:127.0.0.1)') parser.add_argument('--host', type=str, default='127.0.0.1', help='hostname/ip which to listen on (default:127.0.0.1)')
parser.add_argument('--mongo-host', type=str, default='localhost', help='hostname or IP of mongoDB service to connect to (default: localhost)') parser.add_argument('--mongo-host', type=str, default='localhost', help='hostname or IP of mongoDB service to connect to (default: localhost)')
parser.add_argument('--mongo-port', type=int, default=27017, help='port of mongoDB service to connect to (default: 27017)') parser.add_argument('--mongo-port', type=int, default=27017, help='port of mongoDB service to connect to (default: 27017)')
parser.add_argument('--mongo-db', type=str, default='tester', help='name of mongo database to use (default: tester)') parser.add_argument('--mongo-DB', type=str, default='tester', help='name of mongo database to use (default: tester)')
parser.add_argument('--mongo-username', type=str, default=None, help='user with read/write permissions to --mongo-db') parser.add_argument('--mongo-username', type=str, default=None, help='user with read/write permissions to --mongo-DB')
parser.add_argument('--mongo-password', type=str, default=None, help='password for --mongo-username') parser.add_argument('--mongo-password', type=str, default=None, help='password for --mongo-username')
parser.add_argument('--twitter-auth-key', type=str, default=None, help='auth key for twitter guest session', required=True) parser.add_argument('--twitter-auth-key', type=str, default=None, help='auth key for twitter guest session', required=True)
parser.add_argument('--cors-allow', type=str, default=None, help='value for Access-Control-Allow-Origin header') parser.add_argument('--cors-allow', type=str, default=None, help='value for Access-Control-Allow-Origin header')
...@@ -104,10 +99,10 @@ args = parser.parse_args() ...@@ -104,10 +99,10 @@ args = parser.parse_args()
TwitterSession.twitter_auth_key = args.twitter_auth_key TwitterSession.twitter_auth_key = args.twitter_auth_key
guest_session_pool_size = args.guest_sessions guest_session_pool_size = args.guest_sessions
if (args.cors_allow is None): if args.cors_allow is None:
log.warning('[CORS] Running without CORS headers') log.warning('[CORS] Running without CORS headers')
else: else:
log.info('[CORS] Allowing requests from: ' + args.cors_allow) log.info('[CORS] Allowing requests from: %s', args.cors_allow)
ensure_dir(args.cookie_dir) ensure_dir(args.cookie_dir)
...@@ -117,9 +112,9 @@ add_file_handler(args.log) ...@@ -117,9 +112,9 @@ add_file_handler(args.log)
try: try:
with open(args.account_file, "r") as f: with open(args.account_file, "r") as f:
accounts = json.loads(f.read()) TwitterSession.accounts = json.loads(f.read())
except: except:
accounts = [] pass
if args.debug is True: if args.debug is True:
set_log_level('debug') set_log_level('debug')
...@@ -127,20 +122,20 @@ else: ...@@ -127,20 +122,20 @@ else:
set_log_level('info') set_log_level('info')
async def shut_down(app): async def shut_down(app):
log.info("Closing %s guest sessions" % len(TwitterSession.guest_sessions)) log.info("Closing %d guest sessions", len(TwitterSession.guest_sessions))
for session in TwitterSession.guest_sessions: for session in TwitterSession.guest_sessions:
await session.close() await session.close()
async def clean_up(app): async def clean_up(app):
global db global DB
log.info("Closing database connection") log.info("Closing database connection")
db.close() DB.close()
shutdown_logging() shutdown_logging()
def run(): def run():
global db global DB
db = connect( DB = connect(
host=args.mongo_host, host=args.mongo_host,
port=args.mongo_port, port=args.mongo_port,
username=args.mongo_username, username=args.mongo_username,
...@@ -148,7 +143,7 @@ def run(): ...@@ -148,7 +143,7 @@ def run():
) )
loop = asyncio.get_event_loop() loop = asyncio.get_event_loop()
loop.run_until_complete(login_accounts(accounts, args.cookie_dir)) loop.run_until_complete(login_accounts(TwitterSession.accounts, args.cookie_dir))
loop.run_until_complete(login_guests()) loop.run_until_complete(login_guests())
app = web.Application() app = web.Application()
......
import copy import copy
import traceback
import sys
from time import sleep from time import sleep
from pymongo import MongoClient, errors as MongoErrors from pymongo import MongoClient
from log import log from log import log
...@@ -12,8 +10,8 @@ class Database: ...@@ -12,8 +10,8 @@ class Database:
RESULTS_COLLECTION = 'results' RESULTS_COLLECTION = 'results'
RATELIMIT_COLLECTION = 'rate-limits' RATELIMIT_COLLECTION = 'rate-limits'
log.info('Connecting to ' + host + ':' + str(port)) log.info('Connecting to %s:%d', host, port)
log.info('Using Database `' + db + '`') log.info('Using Database `%s`', db)
# client and DB # client and DB
self.client = MongoClient(host, port, serverSelectionTimeoutMS=3, username=username, password=password) self.client = MongoClient(host, port, serverSelectionTimeoutMS=3, username=username, password=password)
self.db = self.client[db] self.db = self.client[db]
......
import sys
# Count amount of "possibly_sensitive_editable" and "possibly_sensitive" # Count amount of "possibly_sensitive_editable" and "possibly_sensitive"
# flagged tweets in user's timeline # flagged tweets in user's timeline
async def count_sensitives(session, user_id): async def count_sensitives(session, user_id):
......
from tests.typeahead import test as test_typeahead from tests.typeahead import test as test_typeahead
from tests.ghostban import test as test_ghost_ban from tests.ghostban import test as test_ghost_ban
from tests.reply_deboosting import test as test_reply_deboosting from tests.reply_deboosting import test as test_reply_deboosting
from tests.profile import test as test_profile
__all__ = ['test_typeahead', 'test_ghost_ban', 'test_reply_deboosting'] __all__ = ['test_typeahead', 'test_ghost_ban', 'test_reply_deboosting', 'test_profile']
import traceback
from log import log from log import log
async def test(session, user_id): async def test(session, user_id):
......
from typing import Any
from features import count_sensitives
from log import log
from twitter_session import TwitterSession, UnexpectedApiError
from util import is_error, is_generic_error
async def test(session: TwitterSession, username: str) -> tuple[str, dict[str, Any]]:
profile: dict[str, Any] = {}
profile_raw = await session.profile_raw(username)
log.info('Testing ' + str(username))
if is_generic_error(profile_raw, [50, 63]):
log.debug("Other error:" + str(username))
raise UnexpectedApiError
try:
user_id = str(profile_raw["id"])
except KeyError:
user_id = ''
try:
profile["screen_name"] = profile_raw["screen_name"]
except KeyError:
profile["screen_name"] = username
try:
profile["restriction"] = profile_raw["profile_interstitial_type"]
except KeyError:
pass
if profile.get("restriction", None) == "":
del profile["restriction"]
try:
profile["protected"] = profile_raw["protected"]
except KeyError:
pass
profile["exists"] = not is_error(profile_raw, 50)
suspended = is_error(profile_raw, 63)
if suspended:
profile["suspended"] = suspended
try:
profile["has_tweets"] = int(profile_raw["statuses_count"]) > 0
except KeyError:
profile["has_tweets"] = False
profile["sensitives"] = await count_sensitives(session, user_id)
return user_id, profile
import aiohttp from typing import Any
import time import time
import urllib import urllib, urllib.parse
import os
from log import log
from statistics import count_sensitives
from util import get_nested import aiohttp
from bs4 import BeautifulSoup
from yarl import URL
from log import log
from tests import * from tests import *
# from ghostban import test as test_ghost_ban from util import get_nested, is_error
# from reply_deboosting import test as test_reply_deboosting
# from typeahead import test as test_typeahead
class UnexpectedApiError(Exception): class UnexpectedApiError(Exception):
pass pass
class TwitterSession: class TwitterSession:
twitter_auth_key = None twitter_auth_key = ''
account_sessions = [] account_sessions = []
account_index = 0 account_index = 0
guest_sessions = [] guest_sessions = []
test_index = 0 test_index = 0
accounts = []
def __init__(self): def __init__(self):
self._guest_token = None self._guest_token = ''
self._csrf_token = None self._csrf_token = ''
# aiohttp ClientSession # aiohttp ClientSession
self._session = None self._session = None
...@@ -46,8 +46,8 @@ class TwitterSession: ...@@ -46,8 +46,8 @@ class TwitterSession:
self.reset_headers() self.reset_headers()
def set_csrf_header(self): def set_csrf_header(self):
cookies = self._session.cookie_jar.filter_cookies('https://twitter.com/') cookies = self._session.cookie_jar.filter_cookies(URL('https://twitter.com/'))
for key, cookie in cookies.items(): for _, cookie in cookies.items():
if cookie.key == 'ct0': if cookie.key == 'ct0':
self._headers['X-Csrf-Token'] = cookie.value self._headers['X-Csrf-Token'] = cookie.value
...@@ -57,9 +57,9 @@ class TwitterSession: ...@@ -57,9 +57,9 @@ class TwitterSession:
response = await r.json() response = await r.json()
guest_token = response.get("guest_token", None) guest_token = response.get("guest_token", None)
if guest_token is None: if guest_token is None:
log.debug("Failed to fetch guest token") log.error("Failed to fetch guest token")
log.debug(str(response)) log.error(str(response))
log.debug(str(self._headers)) log.error(str(self._headers))
return guest_token return guest_token
def reset_headers(self): def reset_headers(self):
...@@ -106,6 +106,8 @@ class TwitterSession: ...@@ -106,6 +106,8 @@ class TwitterSession:
cookie_file = os.path.join(cookie_dir, username) cookie_file = os.path.join(cookie_dir, username)
if os.path.isfile(cookie_file): if os.path.isfile(cookie_file):
log.info("Use cookie file for %s" % username) log.info("Use cookie file for %s" % username)
# satisfy linter; https://github.com/aio-libs/aiohttp/issues/4043#issuecomment-529085744
assert isinstance(self._session.cookie_jar, aiohttp.CookieJar)
self._session.cookie_jar.load(cookie_file) self._session.cookie_jar.load(cookie_file)
login_required = False login_required = False
...@@ -136,6 +138,8 @@ class TwitterSession: ...@@ -136,6 +138,8 @@ class TwitterSession:
self.username = username self.username = username
if cookie_file is not None and store_cookies: if cookie_file is not None and store_cookies:
# satisfy linter; https://github.com/aio-libs/aiohttp/issues/4043#issuecomment-529085744
assert isinstance(self._session.cookie_jar, aiohttp.CookieJar)
self._session.cookie_jar.save(cookie_file) self._session.cookie_jar.save(cookie_file)
else: else:
...@@ -174,79 +178,21 @@ class TwitterSession: ...@@ -174,79 +178,21 @@ class TwitterSession:
async def get_profile_tweets_raw(self, user_id): async def get_profile_tweets_raw(self, user_id):
return await self.get("https://api.twitter.com/2/timeline/profile/" + str(user_id) +".json?include_tweet_replies=1&include_want_retweets=0&include_reply_count=1&count=1000") return await self.get("https://api.twitter.com/2/timeline/profile/" + str(user_id) +".json?include_tweet_replies=1&include_want_retweets=0&include_reply_count=1&count=1000")
async def tweet_raw(self, tweet_id, count=20, cursor=None, retry_csrf=True): async def tweet_raw(self, tweet_id, count=20, cursor=None):
if cursor is None: if cursor is None:
cursor = "" cursor = ""
else: else:
cursor = "&cursor=" + urllib.parse.quote(cursor) cursor = "&cursor=" + urllib.parse.quote(cursor)
return await self.get("https://api.twitter.com/2/timeline/conversation/" + tweet_id + ".json?include_reply_count=1&send_error_codes=true&count="+str(count)+ cursor) return await self.get("https://api.twitter.com/2/timeline/conversation/" + tweet_id + ".json?include_reply_count=1&send_error_codes=true&count="+str(count)+ cursor)
@classmethod
def flatten_timeline(cls, timeline_items):
result = []
for item in timeline_items:
if get_nested(item, ["content", "item", "content", "tweet", "id"]) is not None:
result.append(item["content"]["item"]["content"]["tweet"]["id"])
elif get_nested(item, ["content", "timelineModule", "items"]) is not None:
timeline_items = item["content"]["timelineModule"]["items"]
titems = [get_nested(x, ["item", "content", "tweet", "id"]) for x in timeline_items]
result += [x for x in titems if x is not None]
return result
@classmethod
def get_ordered_tweet_ids(cls, obj, filtered=True):
try:
entries = [x for x in obj["timeline"]["instructions"] if "addEntries" in x][0]["addEntries"]["entries"]
except (IndexError, KeyError):
return []
entries.sort(key=lambda x: -int(x["sortIndex"]))
flat = cls.flatten_timeline(entries)
return [x for x in flat if not filtered or x in obj["globalObjects"]["tweets"]]
async def test(self, username): async def test(self, username):
result = {"timestamp": time.time()} result: dict[str, Any] = {"timestamp": time.time()}
profile = {} user_id, profile = await test_profile(self, username)
profile_raw = await self.profile_raw(username)
log.info('Testing ' + str(username))
if is_another_error(profile_raw, [50, 63]):
log.debug("Other error:" + str(username))
raise UnexpectedApiError
try:
user_id = str(profile_raw["id"])
except KeyError:
user_id = None
try:
profile["screen_name"] = profile_raw["screen_name"]
except KeyError:
profile["screen_name"] = username
try:
profile["restriction"] = profile_raw["profile_interstitial_type"]
except KeyError:
pass
if profile.get("restriction", None) == "":
del profile["restriction"]
try:
profile["protected"] = profile_raw["protected"]
except KeyError:
pass
profile["exists"] = not is_error(profile_raw, 50)
suspended = is_error(profile_raw, 63)
if suspended:
profile["suspended"] = suspended
try:
profile["has_tweets"] = int(profile_raw["statuses_count"]) > 0
except KeyError:
profile["has_tweets"] = False
result["profile"] = profile result["profile"] = profile
if not profile["exists"] or profile.get("suspended", False) or profile.get("protected", False) or not profile.get('has_tweets'): if not profile["exists"] or profile.get("suspended", False) or profile.get("protected", False) or not profile.get('has_tweets'):
return result return result
result["profile"]["sensitives"] = await count_sensitives(self, user_id)
result["tests"] = {} result["tests"] = {}
search_raw = await self.search_raw("from:@" + username) search_raw = await self.search_raw("from:@" + username)
...@@ -254,7 +200,7 @@ class TwitterSession: ...@@ -254,7 +200,7 @@ class TwitterSession:
result["tests"]["search"] = False result["tests"]["search"] = False
try: try:
tweets = search_raw["globalObjects"]["tweets"] tweets = search_raw["globalObjects"]["tweets"]
for tweet_id, tweet in sorted(tweets.items(), key=lambda t: t[1]["id"], reverse=True): for tweet_id, _ in sorted(tweets.items(), key=lambda t: t[1]["id"], reverse=True):
result["tests"]["search"] = str(tweet_id) result["tests"]["search"] = str(tweet_id)
break break
...@@ -280,20 +226,13 @@ class TwitterSession: ...@@ -280,20 +226,13 @@ class TwitterSession:
async def close(self): async def close(self):
await self._session.close() await self._session.close()
@classmethod # unused
def next_session(): def next_session():
def key(s): def key(s):
remaining_time = s.reset - time.time() remaining_time = s.reset - time.time()
if s.remaining <= 3 and remaining_time > 0: if s.remaining <= 3 and remaining_time > 0:
return 900 return 900
return remaining_time return remaining_time
sessions = sorted([s for s in TwitterSession.account_sessions if not s.locked], key=key) sessions = sorted([s for s in TwitterSession.account_sessions if not s.locked], key=key)
if len(sessions) > 0: if len(sessions) > 0:
return sessions[0] return sessions[0]
def is_error(result, code=None):
return isinstance(result.get("errors", None), list) and (len([x for x in result["errors"] if x.get("code", None) == code]) > 0 or code is None and len(result["errors"] > 0))
def is_another_error(result, codes):
return isinstance(result.get("errors", None), list) and len([x for x in result["errors"] if x.get("code", None) not in codes]) > 0
...@@ -5,3 +5,28 @@ def get_nested(obj, path, default=None): ...@@ -5,3 +5,28 @@ def get_nested(obj, path, default=None):
obj = obj[p] obj = obj[p]
return obj return obj
def is_error(result, code=None):
return isinstance(result.get("errors", None), list) and (len([x for x in result["errors"] if x.get("code", None) == code]) > 0 or code is None and len(result["errors"] > 0))
def is_generic_error(result, codes):
return isinstance(result.get("errors", None), list) and len([x for x in result["errors"] if x.get("code", None) not in codes]) > 0
def flatten_timeline(timeline_items):
result = []
for item in timeline_items:
if get_nested(item, ["content", "item", "content", "tweet", "id"]) is not None:
result.append(item["content"]["item"]["content"]["tweet"]["id"])
elif get_nested(item, ["content", "timelineModule", "items"]) is not None:
timeline_items = item["content"]["timelineModule"]["items"]
titems = [get_nested(x, ["item", "content", "tweet", "id"]) for x in timeline_items]
result += [x for x in titems if x is not None]
return result
def get_ordered_tweet_ids(obj, filtered=True):
try:
entries = [x for x in obj["timeline"]["instructions"] if "addEntries" in x][0]["addEntries"]["entries"]
except (IndexError, KeyError):
return []
entries.sort(key=lambda x: -int(x["sortIndex"]))
flat = flatten_timeline(entries)
return [x for x in flat if not filtered or x in obj["globalObjects"]["tweets"]]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment