Commit 10b20d1a authored by minamotorin's avatar minamotorin Committed by GitHub

Merge pull request #2 from minamotorin/twintproject/twint#1335

Twintproject/twint#1335
parents e643a479 c31661d6
...@@ -12,6 +12,18 @@ This problem doesn't happen recently. ...@@ -12,6 +12,18 @@ This problem doesn't happen recently.
- [twintproject/twint#1061](https://github.com/twintproject/twint/issues/1061) - [twintproject/twint#1061](https://github.com/twintproject/twint/issues/1061)
- [twintproject/twint#1114](https://github.com/twintproject/twint/issues/1114) - [twintproject/twint#1114](https://github.com/twintproject/twint/issues/1114)
### json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
The fix is **not complete**.
`twint.run.Profile` will work but `twint.run.db` will not.
This means [`test.py`](./test.py) causes an error.
I think this is because the fields of the result table are not exactly the same as the traditional ones.
#### Related
- [twintproject/twint#1335](https://github.com/twintproject/twint/issues/1335)
# TWINT - Twitter Intelligence Tool # TWINT - Twitter Intelligence Tool
![2](https://i.imgur.com/iaH3s7z.png) ![2](https://i.imgur.com/iaH3s7z.png)
![3](https://i.imgur.com/hVeCrqL.png) ![3](https://i.imgur.com/hVeCrqL.png)
......
...@@ -59,9 +59,12 @@ def _get_cursor(response): ...@@ -59,9 +59,12 @@ def _get_cursor(response):
next_cursor = response['timeline']['instructions'][0]['addEntries']['entries'][-1]['content'][ next_cursor = response['timeline']['instructions'][0]['addEntries']['entries'][-1]['content'][
'operation']['cursor']['value'] 'operation']['cursor']['value']
except KeyError: except KeyError:
# this is needed because after the first request location of cursor is changed try:
next_cursor = response['timeline']['instructions'][-1]['replaceEntry']['entry']['content']['operation'][ # this is needed because after the first request location of cursor is changed
'cursor']['value'] next_cursor = response['timeline']['instructions'][-1]['replaceEntry']['entry']['content']['operation'][
'cursor']['value']
except KeyError:
next_cursor = response['timeline']['instructions'][0]['entries'][-1]['content']['value']
return next_cursor return next_cursor
...@@ -77,46 +80,54 @@ def Json(response): ...@@ -77,46 +80,54 @@ def Json(response):
def parse_tweets(config, response): def parse_tweets(config, response):
logme.debug(__name__ + ':parse_tweets') logme.debug(__name__ + ':parse_tweets')
response = loads(response) response = loads(response)
if len(response['globalObjects']['tweets']) == 0:
msg = 'No more data!'
raise NoMoreTweetsException(msg)
feed = [] feed = []
for timeline_entry in response['timeline']['instructions'][0]['addEntries']['entries']: if 'globalObjects' in response:
# this will handle the cases when the timeline entry is a tweet if len(response['globalObjects']['tweets']) == 0:
if (config.TwitterSearch or config.Profile) and (timeline_entry['entryId'].startswith('sq-I-t-') or msg = 'No more data!'
timeline_entry['entryId'].startswith('tweet-')): raise NoMoreTweetsException(msg)
if 'tweet' in timeline_entry['content']['item']['content']: for timeline_entry in response['timeline']['instructions'][0]['addEntries']['entries']:
_id = timeline_entry['content']['item']['content']['tweet']['id'] # this will handle the cases when the timeline entry is a tweet
# skip the ads if (config.TwitterSearch or config.Profile) and (timeline_entry['entryId'].startswith('sq-I-t-') or
if 'promotedMetadata' in timeline_entry['content']['item']['content']['tweet']: timeline_entry['entryId'].startswith('tweet-')):
if 'tweet' in timeline_entry['content']['item']['content']:
_id = timeline_entry['content']['item']['content']['tweet']['id']
# skip the ads
if 'promotedMetadata' in timeline_entry['content']['item']['content']['tweet']:
continue
elif 'tombstone' in timeline_entry['content']['item']['content'] and 'tweet' in \
timeline_entry['content']['item']['content']['tombstone']:
_id = timeline_entry['content']['item']['content']['tombstone']['tweet']['id']
else:
_id = None
if _id is None:
raise ValueError('Unable to find ID of tweet in timeline.')
try:
temp_obj = response['globalObjects']['tweets'][_id]
except KeyError:
logme.info('encountered a deleted tweet with id {}'.format(_id))
config.deleted.append(_id)
continue continue
elif 'tombstone' in timeline_entry['content']['item']['content'] and 'tweet' in \ temp_obj['user_data'] = response['globalObjects']['users'][temp_obj['user_id_str']]
timeline_entry['content']['item']['content']['tombstone']: if 'retweeted_status_id_str' in temp_obj:
_id = timeline_entry['content']['item']['content']['tombstone']['tweet']['id'] rt_id = temp_obj['retweeted_status_id_str']
else: _dt = response['globalObjects']['tweets'][rt_id]['created_at']
_id = None _dt = datetime.strptime(_dt, '%a %b %d %H:%M:%S %z %Y')
if _id is None: _dt = utc_to_local(_dt)
raise ValueError('Unable to find ID of tweet in timeline.') _dt = str(_dt.strftime(Tweet_formats['datetime']))
try: temp_obj['retweet_data'] = {
temp_obj = response['globalObjects']['tweets'][_id] 'user_rt_id': response['globalObjects']['tweets'][rt_id]['user_id_str'],
except KeyError: 'user_rt': response['globalObjects']['tweets'][rt_id]['full_text'],
logme.info('encountered a deleted tweet with id {}'.format(_id)) 'retweet_id': rt_id,
'retweet_date': _dt,
config.deleted.append(_id) }
continue feed.append(temp_obj)
temp_obj['user_data'] = response['globalObjects']['users'][temp_obj['user_id_str']] else:
if 'retweeted_status_id_str' in temp_obj: response = response['data']['user']['result']['timeline']
rt_id = temp_obj['retweeted_status_id_str'] for timeline_entry in response['timeline']['instructions'][0]['entries']:
_dt = response['globalObjects']['tweets'][rt_id]['created_at'] if timeline_entry['content'].get('itemContent'):
_dt = datetime.strptime(_dt, '%a %b %d %H:%M:%S %z %Y') temp_obj = timeline_entry['content']['itemContent']['tweet_results']['result']['legacy']
_dt = utc_to_local(_dt) temp_obj['user_data'] = timeline_entry['content']['itemContent']['tweet_results']['result']['core']['user_results']['result']['legacy']
_dt = str(_dt.strftime(Tweet_formats['datetime'])) feed.append(temp_obj)
temp_obj['retweet_data'] = {
'user_rt_id': response['globalObjects']['tweets'][rt_id]['user_id_str'],
'user_rt': response['globalObjects']['tweets'][rt_id]['full_text'],
'retweet_id': rt_id,
'retweet_date': _dt,
}
feed.append(temp_obj)
next_cursor = _get_cursor(response) next_cursor = _get_cursor(response)
return feed, next_cursor return feed, next_cursor
import datetime import datetime
import json
from sys import platform from sys import platform
import logging as logme import logging as logme
from urllib.parse import urlencode from urllib.parse import urlencode
...@@ -168,37 +169,27 @@ async def Search(config, init): ...@@ -168,37 +169,27 @@ async def Search(config, init):
def SearchProfile(config, init=None): def SearchProfile(config, init=None):
logme.debug(__name__ + ':SearchProfile') logme.debug(__name__ + ':SearchProfile')
_url = 'https://api.twitter.com/2/timeline/profile/{user_id}.json'.format(user_id=config.User_id) _url = 'https://twitter.com/i/api/graphql/CwLU7qTfeu0doqhSr6tW4A/UserTweetsAndReplies'
tweet_count = 100 tweet_count = 100
params = [ variables = {
# some of the fields are not required, need to test which ones aren't required "userId": config.User_id,
('include_profile_interstitial_type', '1'), "count": tweet_count,
('include_blocking', '1'), "includePromotedContent": True,
('include_blocked_by', '1'), "withCommunity": True,
('include_followed_by', '1'), "withSuperFollowsUserFields": True,
('include_want_retweets', '1'), "withBirdwatchPivots": False,
('include_mute_edge', '1'), "withDownvotePerspective": False,
('include_can_dm', '1'), "withReactionsMetadata": False,
('include_can_media_tag', '1'), "withReactionsPerspective": False,
('skip_status', '1'), "withSuperFollowsTweetFields": True,
('cards_platform', 'Web - 12'), "withVoice": True,
('include_cards', '1'), "withV2Timeline": False,
('include_ext_alt_text', 'true'), "__fs_interactive_text": False,
('include_quote_count', 'true'), "__fs_dont_mention_me_view_api_enabled": False,
('include_reply_count', '1'), }
('tweet_mode', 'extended'),
('include_entities', 'true'),
('include_user_entities', 'true'),
('include_ext_media_color', 'true'),
('include_ext_media_availability', 'true'),
('send_error_codes', 'true'),
('simple_quoted_tweet', 'true'),
('include_tweet_replies', 'true'),
('count', tweet_count),
('ext', 'mediaStats%2ChighlightedLabel'),
]
if type(init) == str: if type(init) == str:
params.append(('cursor', str(init))) variables['cursor'] = init
params = [('variables', json.dumps(variables, separators=(',',':')))]
_serialQuery = _sanitizeQuery(_url, params) _serialQuery = _sanitizeQuery(_url, params)
return _url, params, _serialQuery return _serialQuery, [], _serialQuery
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment