Commit 10b20d1a authored by minamotorin's avatar minamotorin Committed by GitHub

Merge pull request #2 from minamotorin/twintproject/twint#1335

Twintproject/twint#1335
parents e643a479 c31661d6
......@@ -12,6 +12,18 @@ This problem doesn't happen recently.
- [twintproject/twint#1061](https://github.com/twintproject/twint/issues/1061)
- [twintproject/twint#1114](https://github.com/twintproject/twint/issues/1114)
### json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
The fix is **not complete**.
`twint.run.Profile` will work but `twint.run.db` will not.
This means [`test.py`](./test.py) causes an error.
I think this is because the fields of the result table are not exactly the same as the traditional ones.
#### Related
- [twintproject/twint#1335](https://github.com/twintproject/twint/issues/1335)
# TWINT - Twitter Intelligence Tool
![2](https://i.imgur.com/iaH3s7z.png)
![3](https://i.imgur.com/hVeCrqL.png)
......
......@@ -59,9 +59,12 @@ def _get_cursor(response):
next_cursor = response['timeline']['instructions'][0]['addEntries']['entries'][-1]['content'][
'operation']['cursor']['value']
except KeyError:
# this is needed because after the first request location of cursor is changed
next_cursor = response['timeline']['instructions'][-1]['replaceEntry']['entry']['content']['operation'][
'cursor']['value']
try:
# this is needed because after the first request location of cursor is changed
next_cursor = response['timeline']['instructions'][-1]['replaceEntry']['entry']['content']['operation'][
'cursor']['value']
except KeyError:
next_cursor = response['timeline']['instructions'][0]['entries'][-1]['content']['value']
return next_cursor
......@@ -77,46 +80,54 @@ def Json(response):
def parse_tweets(config, response):
logme.debug(__name__ + ':parse_tweets')
response = loads(response)
if len(response['globalObjects']['tweets']) == 0:
msg = 'No more data!'
raise NoMoreTweetsException(msg)
feed = []
for timeline_entry in response['timeline']['instructions'][0]['addEntries']['entries']:
# this will handle the cases when the timeline entry is a tweet
if (config.TwitterSearch or config.Profile) and (timeline_entry['entryId'].startswith('sq-I-t-') or
timeline_entry['entryId'].startswith('tweet-')):
if 'tweet' in timeline_entry['content']['item']['content']:
_id = timeline_entry['content']['item']['content']['tweet']['id']
# skip the ads
if 'promotedMetadata' in timeline_entry['content']['item']['content']['tweet']:
if 'globalObjects' in response:
if len(response['globalObjects']['tweets']) == 0:
msg = 'No more data!'
raise NoMoreTweetsException(msg)
for timeline_entry in response['timeline']['instructions'][0]['addEntries']['entries']:
# this will handle the cases when the timeline entry is a tweet
if (config.TwitterSearch or config.Profile) and (timeline_entry['entryId'].startswith('sq-I-t-') or
timeline_entry['entryId'].startswith('tweet-')):
if 'tweet' in timeline_entry['content']['item']['content']:
_id = timeline_entry['content']['item']['content']['tweet']['id']
# skip the ads
if 'promotedMetadata' in timeline_entry['content']['item']['content']['tweet']:
continue
elif 'tombstone' in timeline_entry['content']['item']['content'] and 'tweet' in \
timeline_entry['content']['item']['content']['tombstone']:
_id = timeline_entry['content']['item']['content']['tombstone']['tweet']['id']
else:
_id = None
if _id is None:
raise ValueError('Unable to find ID of tweet in timeline.')
try:
temp_obj = response['globalObjects']['tweets'][_id]
except KeyError:
logme.info('encountered a deleted tweet with id {}'.format(_id))
config.deleted.append(_id)
continue
elif 'tombstone' in timeline_entry['content']['item']['content'] and 'tweet' in \
timeline_entry['content']['item']['content']['tombstone']:
_id = timeline_entry['content']['item']['content']['tombstone']['tweet']['id']
else:
_id = None
if _id is None:
raise ValueError('Unable to find ID of tweet in timeline.')
try:
temp_obj = response['globalObjects']['tweets'][_id]
except KeyError:
logme.info('encountered a deleted tweet with id {}'.format(_id))
config.deleted.append(_id)
continue
temp_obj['user_data'] = response['globalObjects']['users'][temp_obj['user_id_str']]
if 'retweeted_status_id_str' in temp_obj:
rt_id = temp_obj['retweeted_status_id_str']
_dt = response['globalObjects']['tweets'][rt_id]['created_at']
_dt = datetime.strptime(_dt, '%a %b %d %H:%M:%S %z %Y')
_dt = utc_to_local(_dt)
_dt = str(_dt.strftime(Tweet_formats['datetime']))
temp_obj['retweet_data'] = {
'user_rt_id': response['globalObjects']['tweets'][rt_id]['user_id_str'],
'user_rt': response['globalObjects']['tweets'][rt_id]['full_text'],
'retweet_id': rt_id,
'retweet_date': _dt,
}
feed.append(temp_obj)
temp_obj['user_data'] = response['globalObjects']['users'][temp_obj['user_id_str']]
if 'retweeted_status_id_str' in temp_obj:
rt_id = temp_obj['retweeted_status_id_str']
_dt = response['globalObjects']['tweets'][rt_id]['created_at']
_dt = datetime.strptime(_dt, '%a %b %d %H:%M:%S %z %Y')
_dt = utc_to_local(_dt)
_dt = str(_dt.strftime(Tweet_formats['datetime']))
temp_obj['retweet_data'] = {
'user_rt_id': response['globalObjects']['tweets'][rt_id]['user_id_str'],
'user_rt': response['globalObjects']['tweets'][rt_id]['full_text'],
'retweet_id': rt_id,
'retweet_date': _dt,
}
feed.append(temp_obj)
else:
response = response['data']['user']['result']['timeline']
for timeline_entry in response['timeline']['instructions'][0]['entries']:
if timeline_entry['content'].get('itemContent'):
temp_obj = timeline_entry['content']['itemContent']['tweet_results']['result']['legacy']
temp_obj['user_data'] = timeline_entry['content']['itemContent']['tweet_results']['result']['core']['user_results']['result']['legacy']
feed.append(temp_obj)
next_cursor = _get_cursor(response)
return feed, next_cursor
import datetime
import json
from sys import platform
import logging as logme
from urllib.parse import urlencode
......@@ -168,37 +169,27 @@ async def Search(config, init):
def SearchProfile(config, init=None):
logme.debug(__name__ + ':SearchProfile')
_url = 'https://api.twitter.com/2/timeline/profile/{user_id}.json'.format(user_id=config.User_id)
_url = 'https://twitter.com/i/api/graphql/CwLU7qTfeu0doqhSr6tW4A/UserTweetsAndReplies'
tweet_count = 100
params = [
# some of the fields are not required, need to test which ones aren't required
('include_profile_interstitial_type', '1'),
('include_blocking', '1'),
('include_blocked_by', '1'),
('include_followed_by', '1'),
('include_want_retweets', '1'),
('include_mute_edge', '1'),
('include_can_dm', '1'),
('include_can_media_tag', '1'),
('skip_status', '1'),
('cards_platform', 'Web - 12'),
('include_cards', '1'),
('include_ext_alt_text', 'true'),
('include_quote_count', 'true'),
('include_reply_count', '1'),
('tweet_mode', 'extended'),
('include_entities', 'true'),
('include_user_entities', 'true'),
('include_ext_media_color', 'true'),
('include_ext_media_availability', 'true'),
('send_error_codes', 'true'),
('simple_quoted_tweet', 'true'),
('include_tweet_replies', 'true'),
('count', tweet_count),
('ext', 'mediaStats%2ChighlightedLabel'),
]
variables = {
"userId": config.User_id,
"count": tweet_count,
"includePromotedContent": True,
"withCommunity": True,
"withSuperFollowsUserFields": True,
"withBirdwatchPivots": False,
"withDownvotePerspective": False,
"withReactionsMetadata": False,
"withReactionsPerspective": False,
"withSuperFollowsTweetFields": True,
"withVoice": True,
"withV2Timeline": False,
"__fs_interactive_text": False,
"__fs_dont_mention_me_view_api_enabled": False,
}
if type(init) == str:
params.append(('cursor', str(init)))
variables['cursor'] = init
params = [('variables', json.dumps(variables, separators=(',',':')))]
_serialQuery = _sanitizeQuery(_url, params)
return _url, params, _serialQuery
return _serialQuery, [], _serialQuery
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment