Commit 26f6f11f authored by Raphael Beer's avatar Raphael Beer

Refactor: profile test, linting

parent 8f91a6a8
import aiohttp
import argparse
import asyncio
import daemon
import json
import os
import re
import urllib.parse
import sys
import time
from aiohttp import web
from bs4 import BeautifulSoup
import daemon
from log import *
from db import connect
from db import connect, Database
from log import log, add_file_handler, set_log_level, shutdown_logging
from twitter_session import TwitterSession
log_file = None
debug_file = None
db = None
LOG_FILE = None
DEBUG_FILE = None
DB: Database
routes = web.RouteTableDef()
......@@ -54,12 +49,12 @@ async def api(request):
session = TwitterSession.guest_sessions[TwitterSession.test_index % len(TwitterSession.guest_sessions)]
TwitterSession.test_index += 1
result = await session.test(screen_name)
db.write_result(result)
log.debug(json.dumps(result) + '\n')
if (args.cors_allow is not None):
DB.write_result(result)
log.debug('\n %s', json.dumps(result))
if args.cors_allow is not None:
return web.json_response(result, headers={"Access-Control-Allow-Origin": args.cors_allow})
else:
return web.json_response(result)
return web.json_response(result)
async def login_accounts(accounts, cookie_dir=None):
if cookie_dir is not None and not os.path.isdir(cookie_dir):
......@@ -67,12 +62,12 @@ async def login_accounts(accounts, cookie_dir=None):
coroutines = []
for acc in accounts:
session = TwitterSession()
coroutines.append(session.login(*acc, cookie_dir=cookie_dir))
coroutines.append(session.login(**acc, cookie_dir=cookie_dir))
TwitterSession.account_sessions.append(session)
await asyncio.gather(*coroutines)
async def login_guests():
for i in range(0, guest_session_pool_size):
for _ in range(0, guest_session_pool_size):
session = TwitterSession()
TwitterSession.guest_sessions.append(session)
await asyncio.gather(*[s.login() for s in TwitterSession.guest_sessions])
......@@ -80,7 +75,7 @@ async def login_guests():
def ensure_dir(path):
if os.path.isdir(path) is False:
log.info('Creating directory %s' % path)
log.info('Creating directory %s', path)
os.mkdir(path)
parser = argparse.ArgumentParser(description='Twitter Shadowban Tester')
......@@ -93,8 +88,8 @@ parser.add_argument('--port', type=int, default=8080, help='port which to listen
parser.add_argument('--host', type=str, default='127.0.0.1', help='hostname/ip which to listen on (default:127.0.0.1)')
parser.add_argument('--mongo-host', type=str, default='localhost', help='hostname or IP of mongoDB service to connect to (default: localhost)')
parser.add_argument('--mongo-port', type=int, default=27017, help='port of mongoDB service to connect to (default: 27017)')
parser.add_argument('--mongo-db', type=str, default='tester', help='name of mongo database to use (default: tester)')
parser.add_argument('--mongo-username', type=str, default=None, help='user with read/write permissions to --mongo-db')
parser.add_argument('--mongo-DB', type=str, default='tester', help='name of mongo database to use (default: tester)')
parser.add_argument('--mongo-username', type=str, default=None, help='user with read/write permissions to --mongo-DB')
parser.add_argument('--mongo-password', type=str, default=None, help='password for --mongo-username')
parser.add_argument('--twitter-auth-key', type=str, default=None, help='auth key for twitter guest session', required=True)
parser.add_argument('--cors-allow', type=str, default=None, help='value for Access-Control-Allow-Origin header')
......@@ -104,10 +99,10 @@ args = parser.parse_args()
TwitterSession.twitter_auth_key = args.twitter_auth_key
guest_session_pool_size = args.guest_sessions
if (args.cors_allow is None):
if args.cors_allow is None:
log.warning('[CORS] Running without CORS headers')
else:
log.info('[CORS] Allowing requests from: ' + args.cors_allow)
log.info('[CORS] Allowing requests from: %s', args.cors_allow)
ensure_dir(args.cookie_dir)
......@@ -117,9 +112,9 @@ add_file_handler(args.log)
try:
with open(args.account_file, "r") as f:
accounts = json.loads(f.read())
TwitterSession.accounts = json.loads(f.read())
except:
accounts = []
pass
if args.debug is True:
set_log_level('debug')
......@@ -127,20 +122,20 @@ else:
set_log_level('info')
async def shut_down(app):
log.info("Closing %s guest sessions" % len(TwitterSession.guest_sessions))
log.info("Closing %d guest sessions", len(TwitterSession.guest_sessions))
for session in TwitterSession.guest_sessions:
await session.close()
async def clean_up(app):
global db
global DB
log.info("Closing database connection")
db.close()
DB.close()
shutdown_logging()
def run():
global db
db = connect(
global DB
DB = connect(
host=args.mongo_host,
port=args.mongo_port,
username=args.mongo_username,
......@@ -148,7 +143,7 @@ def run():
)
loop = asyncio.get_event_loop()
loop.run_until_complete(login_accounts(accounts, args.cookie_dir))
loop.run_until_complete(login_accounts(TwitterSession.accounts, args.cookie_dir))
loop.run_until_complete(login_guests())
app = web.Application()
......
import copy
import traceback
import sys
from time import sleep
from pymongo import MongoClient, errors as MongoErrors
from pymongo import MongoClient
from log import log
......@@ -12,8 +10,8 @@ class Database:
RESULTS_COLLECTION = 'results'
RATELIMIT_COLLECTION = 'rate-limits'
log.info('Connecting to ' + host + ':' + str(port))
log.info('Using Database `' + db + '`')
log.info('Connecting to %s:%d', host, port)
log.info('Using Database `%s`', db)
# client and DB
self.client = MongoClient(host, port, serverSelectionTimeoutMS=3, username=username, password=password)
self.db = self.client[db]
......
import sys
# Count amount of "possibly_sensitive_editable" and "possibly_sensitive"
# flagged tweets in user's timeline
async def count_sensitives(session, user_id):
......
from tests.typeahead import test as test_typeahead
from tests.ghostban import test as test_ghost_ban
from tests.reply_deboosting import test as test_reply_deboosting
from tests.profile import test as test_profile
__all__ = ['test_typeahead', 'test_ghost_ban', 'test_reply_deboosting']
__all__ = ['test_typeahead', 'test_ghost_ban', 'test_reply_deboosting', 'test_profile']
import traceback
from log import log
async def test(session, user_id):
......
from typing import Any
from features import count_sensitives
from log import log
from twitter_session import TwitterSession, UnexpectedApiError
from util import is_error, is_generic_error
async def test(session: TwitterSession, username: str) -> tuple[str, dict[str, Any]]:
profile: dict[str, Any] = {}
profile_raw = await session.profile_raw(username)
log.info('Testing ' + str(username))
if is_generic_error(profile_raw, [50, 63]):
log.debug("Other error:" + str(username))
raise UnexpectedApiError
try:
user_id = str(profile_raw["id"])
except KeyError:
user_id = ''
try:
profile["screen_name"] = profile_raw["screen_name"]
except KeyError:
profile["screen_name"] = username
try:
profile["restriction"] = profile_raw["profile_interstitial_type"]
except KeyError:
pass
if profile.get("restriction", None) == "":
del profile["restriction"]
try:
profile["protected"] = profile_raw["protected"]
except KeyError:
pass
profile["exists"] = not is_error(profile_raw, 50)
suspended = is_error(profile_raw, 63)
if suspended:
profile["suspended"] = suspended
try:
profile["has_tweets"] = int(profile_raw["statuses_count"]) > 0
except KeyError:
profile["has_tweets"] = False
profile["sensitives"] = await count_sensitives(session, user_id)
return user_id, profile
import aiohttp
from typing import Any
import time
import urllib
from log import log
from statistics import count_sensitives
import urllib, urllib.parse
import os
from util import get_nested
import aiohttp
from bs4 import BeautifulSoup
from yarl import URL
from log import log
from tests import *
# from ghostban import test as test_ghost_ban
# from reply_deboosting import test as test_reply_deboosting
# from typeahead import test as test_typeahead
from util import get_nested, is_error
class UnexpectedApiError(Exception):
pass
class TwitterSession:
twitter_auth_key = None
twitter_auth_key = ''
account_sessions = []
account_index = 0
guest_sessions = []
test_index = 0
accounts = []
def __init__(self):
self._guest_token = None
self._csrf_token = None
self._guest_token = ''
self._csrf_token = ''
# aiohttp ClientSession
self._session = None
......@@ -46,8 +46,8 @@ class TwitterSession:
self.reset_headers()
def set_csrf_header(self):
cookies = self._session.cookie_jar.filter_cookies('https://twitter.com/')
for key, cookie in cookies.items():
cookies = self._session.cookie_jar.filter_cookies(URL('https://twitter.com/'))
for _, cookie in cookies.items():
if cookie.key == 'ct0':
self._headers['X-Csrf-Token'] = cookie.value
......@@ -57,9 +57,9 @@ class TwitterSession:
response = await r.json()
guest_token = response.get("guest_token", None)
if guest_token is None:
log.debug("Failed to fetch guest token")
log.debug(str(response))
log.debug(str(self._headers))
log.error("Failed to fetch guest token")
log.error(str(response))
log.error(str(self._headers))
return guest_token
def reset_headers(self):
......@@ -106,6 +106,8 @@ class TwitterSession:
cookie_file = os.path.join(cookie_dir, username)
if os.path.isfile(cookie_file):
log.info("Use cookie file for %s" % username)
# satisfy linter; https://github.com/aio-libs/aiohttp/issues/4043#issuecomment-529085744
assert isinstance(self._session.cookie_jar, aiohttp.CookieJar)
self._session.cookie_jar.load(cookie_file)
login_required = False
......@@ -136,6 +138,8 @@ class TwitterSession:
self.username = username
if cookie_file is not None and store_cookies:
# satisfy linter; https://github.com/aio-libs/aiohttp/issues/4043#issuecomment-529085744
assert isinstance(self._session.cookie_jar, aiohttp.CookieJar)
self._session.cookie_jar.save(cookie_file)
else:
......@@ -174,79 +178,21 @@ class TwitterSession:
async def get_profile_tweets_raw(self, user_id):
return await self.get("https://api.twitter.com/2/timeline/profile/" + str(user_id) +".json?include_tweet_replies=1&include_want_retweets=0&include_reply_count=1&count=1000")
async def tweet_raw(self, tweet_id, count=20, cursor=None, retry_csrf=True):
async def tweet_raw(self, tweet_id, count=20, cursor=None):
if cursor is None:
cursor = ""
else:
cursor = "&cursor=" + urllib.parse.quote(cursor)
return await self.get("https://api.twitter.com/2/timeline/conversation/" + tweet_id + ".json?include_reply_count=1&send_error_codes=true&count="+str(count)+ cursor)
@classmethod
def flatten_timeline(cls, timeline_items):
result = []
for item in timeline_items:
if get_nested(item, ["content", "item", "content", "tweet", "id"]) is not None:
result.append(item["content"]["item"]["content"]["tweet"]["id"])
elif get_nested(item, ["content", "timelineModule", "items"]) is not None:
timeline_items = item["content"]["timelineModule"]["items"]
titems = [get_nested(x, ["item", "content", "tweet", "id"]) for x in timeline_items]
result += [x for x in titems if x is not None]
return result
@classmethod
def get_ordered_tweet_ids(cls, obj, filtered=True):
try:
entries = [x for x in obj["timeline"]["instructions"] if "addEntries" in x][0]["addEntries"]["entries"]
except (IndexError, KeyError):
return []
entries.sort(key=lambda x: -int(x["sortIndex"]))
flat = cls.flatten_timeline(entries)
return [x for x in flat if not filtered or x in obj["globalObjects"]["tweets"]]
async def test(self, username):
result = {"timestamp": time.time()}
profile = {}
profile_raw = await self.profile_raw(username)
log.info('Testing ' + str(username))
if is_another_error(profile_raw, [50, 63]):
log.debug("Other error:" + str(username))
raise UnexpectedApiError
try:
user_id = str(profile_raw["id"])
except KeyError:
user_id = None
try:
profile["screen_name"] = profile_raw["screen_name"]
except KeyError:
profile["screen_name"] = username
try:
profile["restriction"] = profile_raw["profile_interstitial_type"]
except KeyError:
pass
if profile.get("restriction", None) == "":
del profile["restriction"]
try:
profile["protected"] = profile_raw["protected"]
except KeyError:
pass
profile["exists"] = not is_error(profile_raw, 50)
suspended = is_error(profile_raw, 63)
if suspended:
profile["suspended"] = suspended
try:
profile["has_tweets"] = int(profile_raw["statuses_count"]) > 0
except KeyError:
profile["has_tweets"] = False
result: dict[str, Any] = {"timestamp": time.time()}
user_id, profile = await test_profile(self, username)
result["profile"] = profile
if not profile["exists"] or profile.get("suspended", False) or profile.get("protected", False) or not profile.get('has_tweets'):
return result
result["profile"]["sensitives"] = await count_sensitives(self, user_id)
result["tests"] = {}
search_raw = await self.search_raw("from:@" + username)
......@@ -254,7 +200,7 @@ class TwitterSession:
result["tests"]["search"] = False
try:
tweets = search_raw["globalObjects"]["tweets"]
for tweet_id, tweet in sorted(tweets.items(), key=lambda t: t[1]["id"], reverse=True):
for tweet_id, _ in sorted(tweets.items(), key=lambda t: t[1]["id"], reverse=True):
result["tests"]["search"] = str(tweet_id)
break
......@@ -280,20 +226,13 @@ class TwitterSession:
async def close(self):
await self._session.close()
@classmethod
def next_session():
def key(s):
remaining_time = s.reset - time.time()
if s.remaining <= 3 and remaining_time > 0:
return 900
return remaining_time
sessions = sorted([s for s in TwitterSession.account_sessions if not s.locked], key=key)
if len(sessions) > 0:
return sessions[0]
def is_error(result, code=None):
return isinstance(result.get("errors", None), list) and (len([x for x in result["errors"] if x.get("code", None) == code]) > 0 or code is None and len(result["errors"] > 0))
def is_another_error(result, codes):
return isinstance(result.get("errors", None), list) and len([x for x in result["errors"] if x.get("code", None) not in codes]) > 0
# unused
def next_session():
def key(s):
remaining_time = s.reset - time.time()
if s.remaining <= 3 and remaining_time > 0:
return 900
return remaining_time
sessions = sorted([s for s in TwitterSession.account_sessions if not s.locked], key=key)
if len(sessions) > 0:
return sessions[0]
......@@ -5,3 +5,28 @@ def get_nested(obj, path, default=None):
obj = obj[p]
return obj
def is_error(result, code=None):
return isinstance(result.get("errors", None), list) and (len([x for x in result["errors"] if x.get("code", None) == code]) > 0 or code is None and len(result["errors"] > 0))
def is_generic_error(result, codes):
return isinstance(result.get("errors", None), list) and len([x for x in result["errors"] if x.get("code", None) not in codes]) > 0
def flatten_timeline(timeline_items):
result = []
for item in timeline_items:
if get_nested(item, ["content", "item", "content", "tweet", "id"]) is not None:
result.append(item["content"]["item"]["content"]["tweet"]["id"])
elif get_nested(item, ["content", "timelineModule", "items"]) is not None:
timeline_items = item["content"]["timelineModule"]["items"]
titems = [get_nested(x, ["item", "content", "tweet", "id"]) for x in timeline_items]
result += [x for x in titems if x is not None]
return result
def get_ordered_tweet_ids(obj, filtered=True):
try:
entries = [x for x in obj["timeline"]["instructions"] if "addEntries" in x][0]["addEntries"]["entries"]
except (IndexError, KeyError):
return []
entries.sort(key=lambda x: -int(x["sortIndex"]))
flat = flatten_timeline(entries)
return [x for x in flat if not filtered or x in obj["globalObjects"]["tweets"]]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment