Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
T
Twint
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Packages
Packages
List
Container Registry
Analytics
Analytics
CI / CD
Code Review
Insights
Issues
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nanahira
Twint
Commits
2d638de0
Commit
2d638de0
authored
Oct 09, 2020
by
Himanshu Dabas
Committed by
GitHub
Oct 09, 2020
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix for deprecation of v1.1 endpoints (#944)
parent
421a155a
Changes
11
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
712 additions
and
409 deletions
+712
-409
twint/cli.py
twint/cli.py
+3
-0
twint/config.py
twint/config.py
+2
-0
twint/feed.py
twint/feed.py
+59
-7
twint/format.py
twint/format.py
+3
-2
twint/get.py
twint/get.py
+110
-62
twint/output.py
twint/output.py
+80
-68
twint/run.py
twint/run.py
+93
-52
twint/token.py
twint/token.py
+63
-0
twint/tweet.py
twint/tweet.py
+171
-63
twint/url.py
twint/url.py
+91
-19
twint/user.py
twint/user.py
+37
-136
No files found.
twint/cli.py
View file @
2d638de0
...
@@ -309,3 +309,6 @@ def run_as_command():
...
@@ -309,3 +309,6 @@ def run_as_command():
sys
.
exit
(
0
)
sys
.
exit
(
0
)
main
()
main
()
if
__name__
==
'__main__'
:
main
()
twint/config.py
View file @
2d638de0
...
@@ -81,3 +81,5 @@ class Config:
...
@@ -81,3 +81,5 @@ class Config:
TranslateDest
:
str
=
"en"
TranslateDest
:
str
=
"en"
Backoff_exponent
:
float
=
3.0
Backoff_exponent
:
float
=
3.0
Min_wait_time
:
int
=
0
Min_wait_time
:
int
=
0
Bearer_token
:
str
=
None
Guest_token
:
str
=
None
twint/feed.py
View file @
2d638de0
...
@@ -4,32 +4,39 @@ from json import loads
...
@@ -4,32 +4,39 @@ from json import loads
import
logging
as
logme
import
logging
as
logme
class
NoMoreTweetsException
(
Exception
):
def
__init__
(
self
,
msg
):
super
()
.
__init__
(
msg
)
def
Follow
(
response
):
def
Follow
(
response
):
logme
.
debug
(
__name__
+
':Follow'
)
logme
.
debug
(
__name__
+
':Follow'
)
soup
=
BeautifulSoup
(
response
,
"html.parser"
)
soup
=
BeautifulSoup
(
response
,
"html.parser"
)
follow
=
soup
.
find_all
(
"td"
,
"info fifty screenname"
)
follow
=
soup
.
find_all
(
"td"
,
"info fifty screenname"
)
cursor
=
soup
.
find_all
(
"div"
,
"w-button-more"
)
cursor
=
soup
.
find_all
(
"div"
,
"w-button-more"
)
try
:
try
:
cursor
=
findall
(
r'cursor=(.*?)">'
,
str
(
cursor
))[
0
]
cursor
=
findall
(
r'cursor=(.*?)">'
,
str
(
cursor
))[
0
]
except
IndexError
:
except
IndexError
:
logme
.
critical
(
__name__
+
':Follow:IndexError'
)
logme
.
critical
(
__name__
+
':Follow:IndexError'
)
return
follow
,
cursor
return
follow
,
cursor
def
Mobile
(
response
):
def
Mobile
(
response
):
logme
.
debug
(
__name__
+
':Mobile'
)
logme
.
debug
(
__name__
+
':Mobile'
)
soup
=
BeautifulSoup
(
response
,
"html.parser"
)
soup
=
BeautifulSoup
(
response
,
"html.parser"
)
tweets
=
soup
.
find_all
(
"span"
,
"metadata"
)
tweets
=
soup
.
find_all
(
"span"
,
"metadata"
)
max_id
=
soup
.
find_all
(
"div"
,
"w-button-more"
)
max_id
=
soup
.
find_all
(
"div"
,
"w-button-more"
)
try
:
try
:
max_id
=
findall
(
r'max_id=(.*?)">'
,
str
(
max_id
))[
0
]
max_id
=
findall
(
r'max_id=(.*?)">'
,
str
(
max_id
))[
0
]
except
Exception
as
e
:
except
Exception
as
e
:
logme
.
critical
(
__name__
+
':Mobile:'
+
str
(
e
))
logme
.
critical
(
__name__
+
':Mobile:'
+
str
(
e
))
return
tweets
,
max_id
return
tweets
,
max_id
def
MobileFav
(
response
):
def
MobileFav
(
response
):
soup
=
BeautifulSoup
(
response
,
"html.parser"
)
soup
=
BeautifulSoup
(
response
,
"html.parser"
)
tweets
=
soup
.
find_all
(
"table"
,
"tweet"
)
tweets
=
soup
.
find_all
(
"table"
,
"tweet"
)
max_id
=
soup
.
find_all
(
"div"
,
"w-button-more"
)
max_id
=
soup
.
find_all
(
"div"
,
"w-button-more"
)
...
@@ -40,8 +47,9 @@ def MobileFav(response):
...
@@ -40,8 +47,9 @@ def MobileFav(response):
return
tweets
,
max_id
return
tweets
,
max_id
def
profile
(
response
):
def
profile
(
response
):
logme
.
debug
(
__name__
+
':profile'
)
logme
.
debug
(
__name__
+
':profile'
)
json_response
=
loads
(
response
)
json_response
=
loads
(
response
)
html
=
json_response
[
"items_html"
]
html
=
json_response
[
"items_html"
]
soup
=
BeautifulSoup
(
html
,
"html.parser"
)
soup
=
BeautifulSoup
(
html
,
"html.parser"
)
...
@@ -49,10 +57,54 @@ def profile(response):
...
@@ -49,10 +57,54 @@ def profile(response):
return
feed
,
feed
[
-
1
][
"data-item-id"
]
return
feed
,
feed
[
-
1
][
"data-item-id"
]
def
Json
(
response
):
def
Json
(
response
):
logme
.
debug
(
__name__
+
':Json'
)
logme
.
debug
(
__name__
+
':Json'
)
json_response
=
loads
(
response
)
json_response
=
loads
(
response
)
html
=
json_response
[
"items_html"
]
html
=
json_response
[
"items_html"
]
soup
=
BeautifulSoup
(
html
,
"html.parser"
)
soup
=
BeautifulSoup
(
html
,
"html.parser"
)
feed
=
soup
.
find_all
(
"div"
,
"tweet"
)
feed
=
soup
.
find_all
(
"div"
,
"tweet"
)
return
feed
,
json_response
[
"min_position"
]
return
feed
,
json_response
[
"min_position"
]
def
search_v2
(
response
):
# TODO need to implement this
response
=
loads
(
response
)
if
len
(
response
[
'globalObjects'
][
'tweets'
])
==
0
:
msg
=
'No more data. finished scraping!!'
raise
NoMoreTweetsException
(
msg
)
# need to modify things at the function call end
# timeline = response['timeline']['instructions'][0]['addEntries']['entries']
feed
=
[]
feed_set
=
set
()
# here we need to remove the quoted and `to-reply` tweets from the list as they may or may not contain the
# for _id in response['globalObjects']['tweets']:
# if 'quoted_status_id_str' in response['globalObjects']['tweets'][_id] or \
# response['globalObjects']['tweets'][_id]['in_reply_to_status_id_str']:
# try:
# feed_set.add(response['globalObjects']['tweets'][_id]['quoted_status_id_str'])
# except KeyError:
# feed_set.add(response['globalObjects']['tweets'][_id]['in_reply_to_status_id_str'])
# i = 1
# for _id in response['globalObjects']['tweets']:
# if _id not in feed_set:
# temp_obj = response['globalObjects']['tweets'][_id]
# temp_obj['user_data'] = response['globalObjects']['users'][temp_obj['user_id_str']]
# feed.append(temp_obj)
for
timeline_entry
in
response
[
'timeline'
][
'instructions'
][
0
][
'addEntries'
][
'entries'
]:
# this will handle the cases when the timeline entry is a tweet
if
timeline_entry
[
'entryId'
]
.
find
(
'sq-I-t-'
)
==
0
:
_id
=
timeline_entry
[
'content'
][
'item'
][
'content'
][
'tweet'
][
'id'
]
temp_obj
=
response
[
'globalObjects'
][
'tweets'
][
_id
]
temp_obj
[
'user_data'
]
=
response
[
'globalObjects'
][
'users'
][
temp_obj
[
'user_id_str'
]]
feed
.
append
(
temp_obj
)
try
:
next_cursor
=
response
[
'timeline'
][
'instructions'
][
0
][
'addEntries'
][
'entries'
][
-
1
][
'content'
][
'operation'
][
'cursor'
][
'value'
]
except
KeyError
:
# this is needed because after the first request location of cursor is changed
next_cursor
=
response
[
'timeline'
][
'instructions'
][
-
1
][
'replaceEntry'
][
'entry'
][
'content'
][
'operation'
][
'cursor'
][
'value'
]
return
feed
,
next_cursor
twint/format.py
View file @
2d638de0
...
@@ -37,8 +37,9 @@ def Tweet(config, t):
...
@@ -37,8 +37,9 @@ def Tweet(config, t):
logme
.
debug
(
__name__
+
':Tweet:notFormat'
)
logme
.
debug
(
__name__
+
':Tweet:notFormat'
)
output
=
f
"{t.id_str} {t.datestamp} {t.timestamp} {t.timezone} "
output
=
f
"{t.id_str} {t.datestamp} {t.timestamp} {t.timezone} "
if
t
.
retweet
:
# TODO: someone who is familiar with this code, needs to take a look at what this is <also see tweet.py>
output
+=
"RT "
# if t.retweet:
# output += "RT "
output
+=
f
"<{t.username}> {t.tweet}"
output
+=
f
"<{t.username}> {t.tweet}"
...
...
twint/get.py
View file @
2d638de0
...
@@ -8,28 +8,40 @@ from fake_useragent import UserAgent
...
@@ -8,28 +8,40 @@ from fake_useragent import UserAgent
import
asyncio
import
asyncio
import
concurrent.futures
import
concurrent.futures
import
random
import
random
from
json
import
loads
from
json
import
loads
,
dumps
from
aiohttp_socks
import
ProxyConnector
,
ProxyType
from
aiohttp_socks
import
ProxyConnector
,
ProxyType
from
urllib.parse
import
quote
from
.
import
url
from
.
import
url
from
.output
import
Tweets
,
Users
from
.output
import
Tweets
,
Users
from
.
user
import
inf
from
.
token
import
TokenExpiryException
import
logging
as
logme
import
logging
as
logme
httpproxy
=
None
httpproxy
=
None
user_agent_list
=
[
user_agent_list
=
[
#'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
# 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
#'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36',
# ' Chrome/60.0.3112.113 Safari/537.36',
#'Mozilla/5.0 (Windows NT 5.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36',
# 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
#'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36',
# ' Chrome/60.0.3112.90 Safari/537.36',
#'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36',
# 'Mozilla/5.0 (Windows NT 5.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
#'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
# ' Chrome/60.0.3112.90 Safari/537.36',
#'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36',
# 'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
#'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36',
# ' Chrome/60.0.3112.90 Safari/537.36',
#'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36',
# 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko)'
#'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36',
# ' Chrome/44.0.2403.157 Safari/537.36',
# 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
# ' Chrome/60.0.3112.113 Safari/537.36',
# 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
# ' Chrome/57.0.2987.133 Safari/537.36',
# 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
# ' Chrome/57.0.2987.133 Safari/537.36',
# 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
# ' Chrome/55.0.2883.87 Safari/537.36',
# 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
# ' Chrome/55.0.2883.87 Safari/537.36',
'Mozilla/4.0 (compatible; MSIE 9.0; Windows NT 6.1)'
,
'Mozilla/4.0 (compatible; MSIE 9.0; Windows NT 6.1)'
,
'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko'
,
'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko'
,
'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)'
,
'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)'
,
...
@@ -42,11 +54,19 @@ user_agent_list = [
...
@@ -42,11 +54,19 @@ user_agent_list = [
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; Trident/7.0; rv:11.0) like Gecko'
,
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; Trident/7.0; rv:11.0) like Gecko'
,
'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)'
,
'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)'
,
'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)'
,
'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)'
,
'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)'
'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET '
'CLR 3.5.30729)'
,
]
]
# function to convert python `dict` to json and then encode it to be passed in the url as a parameter
# some urls require this format
def
dict_to_url
(
dct
):
return
quote
(
dumps
(
dct
))
def
get_connector
(
config
):
def
get_connector
(
config
):
logme
.
debug
(
__name__
+
':get_connector'
)
logme
.
debug
(
__name__
+
':get_connector'
)
_connector
=
None
_connector
=
None
if
config
.
Proxy_host
:
if
config
.
Proxy_host
:
if
config
.
Proxy_host
.
lower
()
==
"tor"
:
if
config
.
Proxy_host
.
lower
()
==
"tor"
:
...
@@ -73,82 +93,92 @@ def get_connector(config):
...
@@ -73,82 +93,92 @@ def get_connector(config):
port
=
config
.
Proxy_port
,
port
=
config
.
Proxy_port
,
rdns
=
True
)
rdns
=
True
)
else
:
else
:
logme
.
critical
(
__name__
+
':get_connector:proxy-port-type-error'
)
logme
.
critical
(
__name__
+
':get_connector:proxy-port-type-error'
)
print
(
"Error: Please specify --proxy-host, --proxy-port, and --proxy-type"
)
print
(
"Error: Please specify --proxy-host, --proxy-port, and --proxy-type"
)
sys
.
exit
(
1
)
sys
.
exit
(
1
)
else
:
else
:
if
config
.
Proxy_port
or
config
.
Proxy_type
:
if
config
.
Proxy_port
or
config
.
Proxy_type
:
logme
.
critical
(
__name__
+
':get_connector:proxy-host-arg-error'
)
logme
.
critical
(
__name__
+
':get_connector:proxy-host-arg-error'
)
print
(
"Error: Please specify --proxy-host, --proxy-port, and --proxy-type"
)
print
(
"Error: Please specify --proxy-host, --proxy-port, and --proxy-type"
)
sys
.
exit
(
1
)
sys
.
exit
(
1
)
return
_connector
return
_connector
async
def
RequestUrl
(
config
,
init
,
headers
=
[]):
async
def
RequestUrl
(
config
,
init
,
headers
=
[]):
logme
.
debug
(
__name__
+
':RequestUrl'
)
logme
.
debug
(
__name__
+
':RequestUrl'
)
_connector
=
get_connector
(
config
)
_connector
=
get_connector
(
config
)
_serialQuery
=
""
_serialQuery
=
""
params
=
[]
params
=
[]
_url
=
""
_url
=
""
_headers
=
{}
# TODO : do this later
if
config
.
Profile
:
if
config
.
Profile
:
if
config
.
Profile_full
:
if
config
.
Profile_full
:
logme
.
debug
(
__name__
+
':RequestUrl:Profile_full'
)
logme
.
debug
(
__name__
+
':RequestUrl:Profile_full'
)
_url
=
await
url
.
MobileProfile
(
config
.
Username
,
init
)
_url
=
await
url
.
MobileProfile
(
config
.
Username
,
init
)
else
:
else
:
logme
.
debug
(
__name__
+
':RequestUrl:notProfile_full'
)
logme
.
debug
(
__name__
+
':RequestUrl:notProfile_full'
)
_url
=
await
url
.
Profile
(
config
.
Username
,
init
)
_url
=
await
url
.
Profile
(
config
.
Username
,
init
)
_serialQuery
=
_url
_serialQuery
=
_url
elif
config
.
TwitterSearch
:
elif
config
.
TwitterSearch
:
logme
.
debug
(
__name__
+
':RequestUrl:TwitterSearch'
)
logme
.
debug
(
__name__
+
':RequestUrl:TwitterSearch'
)
_url
,
params
,
_serialQuery
=
await
url
.
Search
(
config
,
init
)
_url
,
params
,
_serialQuery
=
await
url
.
Search
(
config
,
init
)
_headers
=
[(
"authorization"
,
config
.
Bearer_token
),
(
"x-guest-token"
,
config
.
Guest_token
)]
else
:
else
:
if
config
.
Following
:
if
config
.
Following
:
logme
.
debug
(
__name__
+
':RequestUrl:Following'
)
logme
.
debug
(
__name__
+
':RequestUrl:Following'
)
_url
=
await
url
.
Following
(
config
.
Username
,
init
)
_url
=
await
url
.
Following
(
config
.
Username
,
init
)
elif
config
.
Followers
:
elif
config
.
Followers
:
logme
.
debug
(
__name__
+
':RequestUrl:Followers'
)
logme
.
debug
(
__name__
+
':RequestUrl:Followers'
)
_url
=
await
url
.
Followers
(
config
.
Username
,
init
)
_url
=
await
url
.
Followers
(
config
.
Username
,
init
)
else
:
else
:
logme
.
debug
(
__name__
+
':RequestUrl:Favorites'
)
logme
.
debug
(
__name__
+
':RequestUrl:Favorites'
)
_url
=
await
url
.
Favorites
(
config
.
Username
,
init
)
_url
=
await
url
.
Favorites
(
config
.
Username
,
init
)
_serialQuery
=
_url
_serialQuery
=
_url
response
=
await
Request
(
_url
,
params
=
params
,
connector
=
_connector
,
headers
=
headers
)
response
=
await
Request
(
_url
,
params
=
params
,
connector
=
_connector
,
headers
=
_
headers
)
if
config
.
Debug
:
if
config
.
Debug
:
print
(
_serialQuery
,
file
=
open
(
"twint-request_urls.log"
,
"a"
,
encoding
=
"utf-8"
))
print
(
_serialQuery
,
file
=
open
(
"twint-request_urls.log"
,
"a"
,
encoding
=
"utf-8"
))
return
response
return
response
def
ForceNewTorIdentity
(
config
):
def
ForceNewTorIdentity
(
config
):
logme
.
debug
(
__name__
+
':ForceNewTorIdentity'
)
logme
.
debug
(
__name__
+
':ForceNewTorIdentity'
)
try
:
try
:
tor_c
=
socket
.
create_connection
((
'127.0.0.1'
,
config
.
Tor_control_port
))
tor_c
=
socket
.
create_connection
((
'127.0.0.1'
,
config
.
Tor_control_port
))
tor_c
.
send
(
'AUTHENTICATE "{}"
\r\n
SIGNAL NEWNYM
\r\n
'
.
format
(
config
.
Tor_control_password
)
.
encode
())
tor_c
.
send
(
'AUTHENTICATE "{}"
\r\n
SIGNAL NEWNYM
\r\n
'
.
format
(
config
.
Tor_control_password
)
.
encode
())
response
=
tor_c
.
recv
(
1024
)
response
=
tor_c
.
recv
(
1024
)
if
response
!=
b
'250 OK
\r\n
250 OK
\r\n
'
:
if
response
!=
b
'250 OK
\r\n
250 OK
\r\n
'
:
sys
.
stderr
.
write
(
'Unexpected response from Tor control port: {}
\n
'
.
format
(
response
))
sys
.
stderr
.
write
(
'Unexpected response from Tor control port: {}
\n
'
.
format
(
response
))
logme
.
critical
(
__name__
+
':ForceNewTorIdentity:unexpectedResponse'
)
logme
.
critical
(
__name__
+
':ForceNewTorIdentity:unexpectedResponse'
)
except
Exception
as
e
:
except
Exception
as
e
:
logme
.
debug
(
__name__
+
':ForceNewTorIdentity:errorConnectingTor'
)
logme
.
debug
(
__name__
+
':ForceNewTorIdentity:errorConnectingTor'
)
sys
.
stderr
.
write
(
'Error connecting to Tor control port: {}
\n
'
.
format
(
repr
(
e
)))
sys
.
stderr
.
write
(
'Error connecting to Tor control port: {}
\n
'
.
format
(
repr
(
e
)))
sys
.
stderr
.
write
(
'If you want to rotate Tor ports automatically - enable Tor control port
\n
'
)
sys
.
stderr
.
write
(
'If you want to rotate Tor ports automatically - enable Tor control port
\n
'
)
async
def
Request
(
url
,
connector
=
None
,
params
=
[],
headers
=
[]):
logme
.
debug
(
__name__
+
':Request:Connector'
)
async
def
Request
(
_url
,
connector
=
None
,
params
=
None
,
headers
=
None
):
logme
.
debug
(
__name__
+
':Request:Connector'
)
async
with
aiohttp
.
ClientSession
(
connector
=
connector
,
headers
=
headers
)
as
session
:
async
with
aiohttp
.
ClientSession
(
connector
=
connector
,
headers
=
headers
)
as
session
:
return
await
Response
(
session
,
url
,
params
)
return
await
Response
(
session
,
_url
,
params
)
async
def
Response
(
session
,
url
,
params
=
[]
):
async
def
Response
(
session
,
_url
,
params
=
None
):
logme
.
debug
(
__name__
+
':Response'
)
logme
.
debug
(
__name__
+
':Response'
)
with
timeout
(
120
):
with
timeout
(
120
):
async
with
session
.
get
(
url
,
ssl
=
True
,
params
=
params
,
proxy
=
httpproxy
)
as
response
:
async
with
session
.
get
(
_url
,
ssl
=
True
,
params
=
params
,
proxy
=
httpproxy
)
as
response
:
return
await
response
.
text
()
resp
=
await
response
.
text
()
if
response
.
status
==
429
:
# 429 implies Too many requests i.e. Rate Limit Exceeded
raise
TokenExpiryException
(
loads
(
resp
)[
'errors'
][
0
][
'message'
])
return
resp
async
def
RandomUserAgent
(
wa
=
None
):
async
def
RandomUserAgent
(
wa
=
None
):
logme
.
debug
(
__name__
+
':RandomUserAgent'
)
logme
.
debug
(
__name__
+
':RandomUserAgent'
)
try
:
try
:
if
wa
:
if
wa
:
return
"Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36"
return
"Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36"
...
@@ -156,43 +186,61 @@ async def RandomUserAgent(wa=None):
...
@@ -156,43 +186,61 @@ async def RandomUserAgent(wa=None):
except
:
except
:
return
random
.
choice
(
user_agent_list
)
return
random
.
choice
(
user_agent_list
)
async
def
Username
(
_id
):
logme
.
debug
(
__name__
+
':Username'
)
url
=
f
"https://twitter.com/intent/user?user_id={_id}&lang=en"
r
=
await
Request
(
url
,
headers
=
{
"X-Requested-With"
:
"XMLHttpRequest"
})
soup
=
BeautifulSoup
(
r
,
"html.parser"
)
return
soup
.
find
(
"a"
,
"fn url alternate-context"
)[
"href"
]
.
replace
(
"/"
,
""
)
async
def
Username
(
_id
,
bearer_token
,
guest_token
):
logme
.
debug
(
__name__
+
':Username'
)
_dct
=
{
'userId'
:
_id
,
'withHighlightedLabel'
:
False
}
_url
=
"https://api.twitter.com/graphql/B9FuNQVmyx32rdbIPEZKag/UserByRestId?variables={}"
.
format
(
dict_to_url
(
_dct
))
_headers
=
{
'authorization'
:
bearer_token
,
'x-guest-token'
:
guest_token
,
}
r
=
await
Request
(
_url
,
headers
=
_headers
)
j_r
=
loads
(
r
)
username
=
j_r
[
'data'
][
'user'
][
'legacy'
][
'screen_name'
]
return
username
async
def
Tweet
(
url
,
config
,
conn
):
async
def
Tweet
(
url
,
config
,
conn
):
logme
.
debug
(
__name__
+
':Tweet'
)
logme
.
debug
(
__name__
+
':Tweet'
)
try
:
try
:
response
=
await
Request
(
url
)
response
=
await
Request
(
url
)
soup
=
BeautifulSoup
(
response
,
"html.parser"
)
soup
=
BeautifulSoup
(
response
,
"html.parser"
)
tweets
=
soup
.
find_all
(
"div"
,
"tweet"
)
tweets
=
soup
.
find_all
(
"div"
,
"tweet"
)
await
Tweets
(
tweets
,
config
,
conn
,
url
)
await
Tweets
(
tweets
,
config
,
conn
,
url
)
except
Exception
as
e
:
except
Exception
as
e
:
logme
.
critical
(
__name__
+
':Tweet:'
+
str
(
e
))
logme
.
critical
(
__name__
+
':Tweet:'
+
str
(
e
))
async
def
User
(
url
,
config
,
conn
,
user_id
=
False
):
logme
.
debug
(
__name__
+
':User'
)
async
def
User
(
username
,
config
,
conn
,
bearer_token
,
guest_token
,
user_id
=
False
):
_connector
=
get_connector
(
config
)
logme
.
debug
(
__name__
+
':User'
)
_dct
=
{
'screen_name'
:
username
,
'withHighlightedLabel'
:
False
}
_url
=
'https://api.twitter.com/graphql/jMaTS-_Ea8vh9rpKggJbCQ/UserByScreenName?variables={}'
\
.
format
(
dict_to_url
(
_dct
))
_headers
=
{
'authorization'
:
bearer_token
,
'x-guest-token'
:
guest_token
,
}
try
:
try
:
response
=
await
Request
(
url
,
connector
=
_connector
,
headers
=
{
"X-Requested-With"
:
"XMLHttpRequest"
}
)
response
=
await
Request
(
_url
,
headers
=
_headers
)
soup
=
BeautifulSoup
(
response
,
"html.parser"
)
j_r
=
loads
(
response
)
if
user_id
:
if
user_id
:
return
int
(
inf
(
soup
,
"id"
))
_id
=
j_r
[
'data'
][
'user'
][
'rest_id'
]
await
Users
(
soup
,
config
,
conn
)
return
_id
await
Users
(
j_r
,
config
,
conn
)
except
Exception
as
e
:
except
Exception
as
e
:
logme
.
critical
(
__name__
+
':User:'
+
str
(
e
))
logme
.
critical
(
__name__
+
':User:'
+
str
(
e
))
raise
def
Limit
(
Limit
,
count
):
def
Limit
(
Limit
,
count
):
logme
.
debug
(
__name__
+
':Limit'
)
logme
.
debug
(
__name__
+
':Limit'
)
if
Limit
is
not
None
and
count
>=
int
(
Limit
):
if
Limit
is
not
None
and
count
>=
int
(
Limit
):
return
True
return
True
async
def
Multi
(
feed
,
config
,
conn
):
async
def
Multi
(
feed
,
config
,
conn
):
logme
.
debug
(
__name__
+
':Multi'
)
logme
.
debug
(
__name__
+
':Multi'
)
count
=
0
count
=
0
try
:
try
:
with
concurrent
.
futures
.
ThreadPoolExecutor
(
max_workers
=
20
)
as
executor
:
with
concurrent
.
futures
.
ThreadPoolExecutor
(
max_workers
=
20
)
as
executor
:
...
@@ -201,27 +249,27 @@ async def Multi(feed, config, conn):
...
@@ -201,27 +249,27 @@ async def Multi(feed, config, conn):
for
tweet
in
feed
:
for
tweet
in
feed
:
count
+=
1
count
+=
1
if
config
.
Favorites
or
config
.
Profile_full
:
if
config
.
Favorites
or
config
.
Profile_full
:
logme
.
debug
(
__name__
+
':Multi:Favorites-profileFull'
)
logme
.
debug
(
__name__
+
':Multi:Favorites-profileFull'
)
link
=
tweet
.
find
(
"a"
)[
"href"
]
link
=
tweet
.
find
(
"a"
)[
"href"
]
url
=
f
"https://twitter.com{link}&lang=en"
url
=
f
"https://twitter.com{link}&lang=en"
elif
config
.
User_full
:
elif
config
.
User_full
:
logme
.
debug
(
__name__
+
':Multi:userFull'
)
logme
.
debug
(
__name__
+
':Multi:userFull'
)
username
=
tweet
.
find
(
"a"
)[
"name"
]
username
=
tweet
.
find
(
"a"
)[
"name"
]
url
=
f
"http://twitter.com/{username}?lang=en"
url
=
f
"http://twitter.com/{username}?lang=en"
else
:
else
:
logme
.
debug
(
__name__
+
':Multi:else-url'
)
logme
.
debug
(
__name__
+
':Multi:else-url'
)
link
=
tweet
.
find
(
"a"
,
"tweet-timestamp js-permalink js-nav js-tooltip"
)[
"href"
]
link
=
tweet
.
find
(
"a"
,
"tweet-timestamp js-permalink js-nav js-tooltip"
)[
"href"
]
url
=
f
"https://twitter.com{link}?lang=en"
url
=
f
"https://twitter.com{link}?lang=en"
if
config
.
User_full
:
if
config
.
User_full
:
logme
.
debug
(
__name__
+
':Multi:user-full-Run'
)
logme
.
debug
(
__name__
+
':Multi:user-full-Run'
)
futures
.
append
(
loop
.
run_in_executor
(
executor
,
await
User
(
url
,
futures
.
append
(
loop
.
run_in_executor
(
executor
,
await
User
(
url
,
config
,
conn
)))
config
,
conn
)))
else
:
else
:
logme
.
debug
(
__name__
+
':Multi:notUser-full-Run'
)
logme
.
debug
(
__name__
+
':Multi:notUser-full-Run'
)
futures
.
append
(
loop
.
run_in_executor
(
executor
,
await
Tweet
(
url
,
futures
.
append
(
loop
.
run_in_executor
(
executor
,
await
Tweet
(
url
,
config
,
conn
)))
config
,
conn
)))
logme
.
debug
(
__name__
+
':Multi:asyncioGather'
)
logme
.
debug
(
__name__
+
':Multi:asyncioGather'
)
await
asyncio
.
gather
(
*
futures
)
await
asyncio
.
gather
(
*
futures
)
except
Exception
as
e
:
except
Exception
as
e
:
# TODO: fix error not error
# TODO: fix error not error
...
...
twint/output.py
View file @
2d638de0
...
@@ -17,19 +17,22 @@ author_list.pop()
...
@@ -17,19 +17,22 @@ author_list.pop()
# used by Pandas
# used by Pandas
_follows_object
=
{}
_follows_object
=
{}
def
_formatDateTime
(
datetimestamp
):
def
_formatDateTime
(
datetimestamp
):
try
:
try
:
return
int
(
datetime
.
strptime
(
datetimestamp
,
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
)
.
timestamp
())
return
int
(
datetime
.
strptime
(
datetimestamp
,
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
)
.
timestamp
())
except
ValueError
:
except
ValueError
:
return
int
(
datetime
.
strptime
(
datetimestamp
,
"
%
Y-
%
m-
%
d"
)
.
timestamp
())
return
int
(
datetime
.
strptime
(
datetimestamp
,
"
%
Y-
%
m-
%
d"
)
.
timestamp
())
def
_clean_follow_list
():
def
_clean_follow_list
():
logme
.
debug
(
__name__
+
':clean_follow_list'
)
logme
.
debug
(
__name__
+
':clean_follow_list'
)
global
_follows_object
global
_follows_object
_follows_object
=
{}
_follows_object
=
{}
def
clean_lists
():
def
clean_lists
():
logme
.
debug
(
__name__
+
':clean_lists'
)
logme
.
debug
(
__name__
+
':clean_lists'
)
global
follows_list
global
follows_list
global
tweets_list
global
tweets_list
global
users_list
global
users_list
...
@@ -37,10 +40,11 @@ def clean_lists():
...
@@ -37,10 +40,11 @@ def clean_lists():
tweets_list
=
[]
tweets_list
=
[]
users_list
=
[]
users_list
=
[]
def
datecheck
(
datetimestamp
,
config
):
def
datecheck
(
datetimestamp
,
config
):
logme
.
debug
(
__name__
+
':datecheck'
)
logme
.
debug
(
__name__
+
':datecheck'
)
if
config
.
Since
:
if
config
.
Since
:
logme
.
debug
(
__name__
+
':datecheck:SinceTrue'
)
logme
.
debug
(
__name__
+
':datecheck:SinceTrue'
)
d
=
_formatDateTime
(
datetimestamp
)
d
=
_formatDateTime
(
datetimestamp
)
s
=
_formatDateTime
(
config
.
Since
)
s
=
_formatDateTime
(
config
.
Since
)
...
@@ -48,44 +52,49 @@ def datecheck(datetimestamp, config):
...
@@ -48,44 +52,49 @@ def datecheck(datetimestamp, config):
if
d
<
s
:
if
d
<
s
:
return
False
return
False
if
config
.
Until
:
if
config
.
Until
:
logme
.
debug
(
__name__
+
':datecheck:UntilTrue'
)
logme
.
debug
(
__name__
+
':datecheck:UntilTrue'
)
d
=
_formatDateTime
(
datetimestamp
)
d
=
_formatDateTime
(
datetimestamp
)
s
=
_formatDateTime
(
config
.
Until
)
s
=
_formatDateTime
(
config
.
Until
)
if
d
>
s
:
if
d
>
s
:
return
False
return
False
logme
.
debug
(
__name__
+
':datecheck:dateRangeFalse'
)
logme
.
debug
(
__name__
+
':datecheck:dateRangeFalse'
)
return
True
return
True
# TODO In this method we need to delete the quoted tweets, because twitter also sends the quoted tweets in the
# `tweets` list along with the other tweets
def
is_tweet
(
tw
):
def
is_tweet
(
tw
):
try
:
try
:
tw
[
"data-item-id"
]
tw
[
"data-item-id"
]
logme
.
debug
(
__name__
+
':is_tweet:True'
)
logme
.
debug
(
__name__
+
':is_tweet:True'
)
return
True
return
True
except
:
except
:
logme
.
critical
(
__name__
+
':is_tweet:False'
)
logme
.
critical
(
__name__
+
':is_tweet:False'
)
return
False
return
False
def
_output
(
obj
,
output
,
config
,
**
extra
):
def
_output
(
obj
,
output
,
config
,
**
extra
):
logme
.
debug
(
__name__
+
':_output'
)
logme
.
debug
(
__name__
+
':_output'
)
if
config
.
Lowercase
:
if
config
.
Lowercase
:
if
isinstance
(
obj
,
str
):
if
isinstance
(
obj
,
str
):
logme
.
debug
(
__name__
+
':_output:Lowercase:username'
)
logme
.
debug
(
__name__
+
':_output:Lowercase:username'
)
obj
=
obj
.
lower
()
obj
=
obj
.
lower
()
elif
obj
.
__class__
.
__name__
==
"user"
:
elif
obj
.
__class__
.
__name__
==
"user"
:
logme
.
debug
(
__name__
+
':_output:Lowercase:user'
)
logme
.
debug
(
__name__
+
':_output:Lowercase:user'
)
pass
pass
elif
obj
.
__class__
.
__name__
==
"tweet"
:
elif
obj
.
__class__
.
__name__
==
"tweet"
:
logme
.
debug
(
__name__
+
':_output:Lowercase:tweet'
)
logme
.
debug
(
__name__
+
':_output:Lowercase:tweet'
)
obj
.
username
=
obj
.
username
.
lower
()
obj
.
username
=
obj
.
username
.
lower
()
author_list
.
update
({
obj
.
username
})
author_list
.
update
({
obj
.
username
})
for
i
in
range
(
len
(
obj
.
mentions
)):
for
i
in
range
(
len
(
obj
.
mentions
)):
obj
.
mentions
[
i
]
=
obj
.
mentions
[
i
]
.
lower
()
obj
.
mentions
[
i
]
=
obj
.
mentions
[
i
]
.
lower
()
for
i
in
range
(
len
(
obj
.
hashtags
)):
for
i
in
range
(
len
(
obj
.
hashtags
)):
obj
.
hashtags
[
i
]
=
obj
.
hashtags
[
i
]
.
lower
()
obj
.
hashtags
[
i
]
=
obj
.
hashtags
[
i
]
.
lower
()
for
i
in
range
(
len
(
obj
.
cashtags
)):
# TODO : dont know what cashtags are, <also modify in tweet.py>
obj
.
cashtags
[
i
]
=
obj
.
cashtags
[
i
]
.
lower
()
# for i in range(len(obj.cashtags)):
# obj.cashtags[i] = obj.cashtags[i].lower()
else
:
else
:
logme
.
info
(
'_output:Lowercase:hiddenTweetFound'
)
logme
.
info
(
'_output:Lowercase:hiddenTweetFound'
)
print
(
"[x] Hidden tweet found, account suspended due to violation of TOS"
)
print
(
"[x] Hidden tweet found, account suspended due to violation of TOS"
)
...
@@ -94,36 +103,36 @@ def _output(obj, output, config, **extra):
...
@@ -94,36 +103,36 @@ def _output(obj, output, config, **extra):
if
config
.
Store_csv
:
if
config
.
Store_csv
:
try
:
try
:
write
.
Csv
(
obj
,
config
)
write
.
Csv
(
obj
,
config
)
logme
.
debug
(
__name__
+
':_output:CSV'
)
logme
.
debug
(
__name__
+
':_output:CSV'
)
except
Exception
as
e
:
except
Exception
as
e
:
logme
.
critical
(
__name__
+
':_output:CSV:Error:'
+
str
(
e
))
logme
.
critical
(
__name__
+
':_output:CSV:Error:'
+
str
(
e
))
print
(
str
(
e
)
+
" [x] output._output"
)
print
(
str
(
e
)
+
" [x] output._output"
)
elif
config
.
Store_json
:
elif
config
.
Store_json
:
write
.
Json
(
obj
,
config
)
write
.
Json
(
obj
,
config
)
logme
.
debug
(
__name__
+
':_output:JSON'
)
logme
.
debug
(
__name__
+
':_output:JSON'
)
else
:
else
:
write
.
Text
(
output
,
config
.
Output
)
write
.
Text
(
output
,
config
.
Output
)
logme
.
debug
(
__name__
+
':_output:Text'
)
logme
.
debug
(
__name__
+
':_output:Text'
)
if
config
.
Elasticsearch
:
if
config
.
Elasticsearch
:
logme
.
debug
(
__name__
+
':_output:Elasticsearch'
)
logme
.
debug
(
__name__
+
':_output:Elasticsearch'
)
print
(
""
,
end
=
"."
,
flush
=
True
)
print
(
""
,
end
=
"."
,
flush
=
True
)
else
:
else
:
if
not
config
.
Hide_output
:
if
not
config
.
Hide_output
:
try
:
try
:
print
(
output
.
replace
(
'
\n
'
,
' '
))
print
(
output
.
replace
(
'
\n
'
,
' '
))
except
UnicodeEncodeError
:
except
UnicodeEncodeError
:
logme
.
critical
(
__name__
+
':_output:UnicodeEncodeError'
)
logme
.
critical
(
__name__
+
':_output:UnicodeEncodeError'
)
print
(
"unicode error [x] output._output"
)
print
(
"unicode error [x] output._output"
)
async
def
checkData
(
tweet
,
config
,
conn
):
async
def
checkData
(
tweet
,
config
,
conn
):
logme
.
debug
(
__name__
+
':checkData'
)
logme
.
debug
(
__name__
+
':checkData'
)
copyright
=
tweet
.
find
(
"div"
,
"StreamItemContent--withheld"
)
if
copyright
is
None
and
is_tweet
(
tweet
):
tweet
=
Tweet
(
tweet
,
config
)
tweet
=
Tweet
(
tweet
,
config
)
if
not
tweet
.
datestamp
:
if
not
tweet
.
datestamp
:
logme
.
critical
(
__name__
+
':checkData:hiddenTweetFound'
)
logme
.
critical
(
__name__
+
':checkData:hiddenTweetFound'
)
print
(
"[x] Hidden tweet found, account suspended due to violation of TOS"
)
print
(
"[x] Hidden tweet found, account suspended due to violation of TOS"
)
return
return
...
@@ -131,56 +140,58 @@ async def checkData(tweet, config, conn):
...
@@ -131,56 +140,58 @@ async def checkData(tweet, config, conn):
output
=
format
.
Tweet
(
config
,
tweet
)
output
=
format
.
Tweet
(
config
,
tweet
)
if
config
.
Database
:
if
config
.
Database
:
logme
.
debug
(
__name__
+
':checkData:Database'
)
logme
.
debug
(
__name__
+
':checkData:Database'
)
db
.
tweets
(
conn
,
tweet
,
config
)
db
.
tweets
(
conn
,
tweet
,
config
)
if
config
.
Pandas
:
if
config
.
Pandas
:
logme
.
debug
(
__name__
+
':checkData:Pandas'
)
logme
.
debug
(
__name__
+
':checkData:Pandas'
)
panda
.
update
(
tweet
,
config
)
panda
.
update
(
tweet
,
config
)
if
config
.
Store_object
:
if
config
.
Store_object
:
logme
.
debug
(
__name__
+
':checkData:Store_object'
)
logme
.
debug
(
__name__
+
':checkData:Store_object'
)
if
hasattr
(
config
.
Store_object_tweets_list
,
'append'
):
if
hasattr
(
config
.
Store_object_tweets_list
,
'append'
):
config
.
Store_object_tweets_list
.
append
(
tweet
)
config
.
Store_object_tweets_list
.
append
(
tweet
)
else
:
else
:
tweets_list
.
append
(
tweet
)
tweets_list
.
append
(
tweet
)
if
config
.
Elasticsearch
:
if
config
.
Elasticsearch
:
logme
.
debug
(
__name__
+
':checkData:Elasticsearch'
)
logme
.
debug
(
__name__
+
':checkData:Elasticsearch'
)
elasticsearch
.
Tweet
(
tweet
,
config
)
elasticsearch
.
Tweet
(
tweet
,
config
)
_output
(
tweet
,
output
,
config
)
_output
(
tweet
,
output
,
config
)
else
:
# else:
logme
.
critical
(
__name__
+
':checkData:copyrightedTweet'
)
# logme.critical(__name__+':checkData:copyrightedTweet')
async
def
Tweets
(
tweets
,
config
,
conn
,
url
=
''
):
async
def
Tweets
(
tweets
,
config
,
conn
,
url
=
''
):
logme
.
debug
(
__name__
+
':Tweets'
)
logme
.
debug
(
__name__
+
':Tweets'
)
if
config
.
Favorites
or
config
.
Profile_full
or
config
.
Location
:
if
config
.
Favorites
or
config
.
Profile_full
or
config
.
Location
:
logme
.
debug
(
__name__
+
':Tweets:fav+full+loc'
)
logme
.
debug
(
__name__
+
':Tweets:fav+full+loc'
)
for
tw
in
tweets
:
for
tw
in
tweets
:
if
tw
[
'data-item-id'
]
==
url
.
split
(
'?'
)[
0
]
.
split
(
'/'
)[
-
1
]:
if
tw
[
'data-item-id'
]
==
url
.
split
(
'?'
)[
0
]
.
split
(
'/'
)[
-
1
]:
await
checkData
(
tw
,
config
,
conn
)
await
checkData
(
tw
,
config
,
conn
)
elif
config
.
TwitterSearch
:
elif
config
.
TwitterSearch
:
logme
.
debug
(
__name__
+
':Tweets:TwitterSearch'
)
logme
.
debug
(
__name__
+
':Tweets:TwitterSearch'
)
await
checkData
(
tweets
,
config
,
conn
)
await
checkData
(
tweets
,
config
,
conn
)
else
:
else
:
logme
.
debug
(
__name__
+
':Tweets:else'
)
logme
.
debug
(
__name__
+
':Tweets:else'
)
if
int
(
tweets
[
"data-user-id"
])
==
config
.
User_id
or
config
.
Retweets
:
if
int
(
tweets
[
"data-user-id"
])
==
config
.
User_id
or
config
.
Retweets
:
await
checkData
(
tweets
,
config
,
conn
)
await
checkData
(
tweets
,
config
,
conn
)
async
def
Users
(
u
,
config
,
conn
):
async
def
Users
(
u
,
config
,
conn
):
logme
.
debug
(
__name__
+
':User'
)
logme
.
debug
(
__name__
+
':User'
)
global
users_list
global
users_list
user
=
User
(
u
)
user
=
User
(
u
)
output
=
format
.
User
(
config
.
Format
,
user
)
output
=
format
.
User
(
config
.
Format
,
user
)
if
config
.
Database
:
if
config
.
Database
:
logme
.
debug
(
__name__
+
':User:Database'
)
logme
.
debug
(
__name__
+
':User:Database'
)
db
.
user
(
conn
,
config
,
user
)
db
.
user
(
conn
,
config
,
user
)
if
config
.
Elasticsearch
:
if
config
.
Elasticsearch
:
logme
.
debug
(
__name__
+
':User:Elasticsearch'
)
logme
.
debug
(
__name__
+
':User:Elasticsearch'
)
_save_date
=
user
.
join_date
_save_date
=
user
.
join_date
_save_time
=
user
.
join_time
_save_time
=
user
.
join_time
user
.
join_date
=
str
(
datetime
.
strptime
(
user
.
join_date
,
"
%
d
%
b
%
Y"
))
.
split
()[
0
]
user
.
join_date
=
str
(
datetime
.
strptime
(
user
.
join_date
,
"
%
d
%
b
%
Y"
))
.
split
()[
0
]
...
@@ -190,7 +201,7 @@ async def Users(u, config, conn):
...
@@ -190,7 +201,7 @@ async def Users(u, config, conn):
user
.
join_time
=
_save_time
user
.
join_time
=
_save_time
if
config
.
Store_object
:
if
config
.
Store_object
:
logme
.
debug
(
__name__
+
':User:Store_object'
)
logme
.
debug
(
__name__
+
':User:Store_object'
)
if
hasattr
(
config
.
Store_object_follow_list
,
'append'
):
if
hasattr
(
config
.
Store_object_follow_list
,
'append'
):
config
.
Store_object_follow_list
.
append
(
user
)
config
.
Store_object_follow_list
.
append
(
user
)
...
@@ -200,23 +211,24 @@ async def Users(u, config, conn):
...
@@ -200,23 +211,24 @@ async def Users(u, config, conn):
users_list
.
append
(
user
)
# twint.user.user
users_list
.
append
(
user
)
# twint.user.user
if
config
.
Pandas
:
if
config
.
Pandas
:
logme
.
debug
(
__name__
+
':User:Pandas+user'
)
logme
.
debug
(
__name__
+
':User:Pandas+user'
)
panda
.
update
(
user
,
config
)
panda
.
update
(
user
,
config
)
_output
(
user
,
output
,
config
)
_output
(
user
,
output
,
config
)
async
def
Username
(
username
,
config
,
conn
):
async
def
Username
(
username
,
config
,
conn
):
logme
.
debug
(
__name__
+
':Username'
)
logme
.
debug
(
__name__
+
':Username'
)
global
_follows_object
global
_follows_object
global
follows_list
global
follows_list
follow_var
=
config
.
Following
*
"following"
+
config
.
Followers
*
"followers"
follow_var
=
config
.
Following
*
"following"
+
config
.
Followers
*
"followers"
if
config
.
Database
:
if
config
.
Database
:
logme
.
debug
(
__name__
+
':Username:Database'
)
logme
.
debug
(
__name__
+
':Username:Database'
)
db
.
follow
(
conn
,
config
.
Username
,
config
.
Followers
,
username
)
db
.
follow
(
conn
,
config
.
Username
,
config
.
Followers
,
username
)
if
config
.
Elasticsearch
:
if
config
.
Elasticsearch
:
logme
.
debug
(
__name__
+
':Username:Elasticsearch'
)
logme
.
debug
(
__name__
+
':Username:Elasticsearch'
)
elasticsearch
.
Follow
(
username
,
config
)
elasticsearch
.
Follow
(
username
,
config
)
if
config
.
Store_object
:
if
config
.
Store_object
:
...
@@ -226,13 +238,13 @@ async def Username(username, config, conn):
...
@@ -226,13 +238,13 @@ async def Username(username, config, conn):
follows_list
.
append
(
username
)
# twint.user.user
follows_list
.
append
(
username
)
# twint.user.user
if
config
.
Pandas
:
if
config
.
Pandas
:
logme
.
debug
(
__name__
+
':Username:object+pandas'
)
logme
.
debug
(
__name__
+
':Username:object+pandas'
)
try
:
try
:
_
=
_follows_object
[
config
.
Username
][
follow_var
]
_
=
_follows_object
[
config
.
Username
][
follow_var
]
except
KeyError
:
except
KeyError
:
_follows_object
.
update
({
config
.
Username
:
{
follow_var
:
[]}})
_follows_object
.
update
({
config
.
Username
:
{
follow_var
:
[]}})
_follows_object
[
config
.
Username
][
follow_var
]
.
append
(
username
)
_follows_object
[
config
.
Username
][
follow_var
]
.
append
(
username
)
if
config
.
Pandas_au
:
if
config
.
Pandas_au
:
logme
.
debug
(
__name__
+
':Username:object+pandas+au'
)
logme
.
debug
(
__name__
+
':Username:object+pandas+au'
)
panda
.
update
(
_follows_object
[
config
.
Username
],
config
)
panda
.
update
(
_follows_object
[
config
.
Username
],
config
)
_output
(
username
,
username
,
config
)
_output
(
username
,
username
,
config
)
twint/run.py
View file @
2d638de0
import
sys
,
os
,
time
,
datetime
import
sys
,
os
,
datetime
from
asyncio
import
get_event_loop
,
TimeoutError
,
ensure_future
,
new_event_loop
,
set_event_loop
from
asyncio
import
get_event_loop
,
TimeoutError
,
ensure_future
,
new_event_loop
,
set_event_loop
from
.
import
datelock
,
feed
,
get
,
output
,
verbose
,
storage
from
.
import
datelock
,
feed
,
get
,
output
,
verbose
,
storage
from
.token
import
TokenExpiryException
from
.
import
token
from
.storage
import
db
from
.storage
import
db
from
.feed
import
NoMoreTweetsException
import
logging
as
logme
import
logging
as
logme
import
time
import
time
bearer
=
'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs'
\
'
%3
D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
class
Twint
:
class
Twint
:
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
logme
.
debug
(
__name__
+
':Twint:__init__'
)
logme
.
debug
(
__name__
+
':Twint:__init__'
)
if
config
.
Resume
is
not
None
and
(
config
.
TwitterSearch
or
config
.
Followers
or
config
.
Following
):
if
config
.
Resume
is
not
None
and
(
config
.
TwitterSearch
or
config
.
Followers
or
config
.
Following
):
logme
.
debug
(
__name__
+
':Twint:__init__:Resume'
)
logme
.
debug
(
__name__
+
':Twint:__init__:Resume'
)
self
.
init
=
self
.
get_resume
(
config
.
Resume
)
self
.
init
=
self
.
get_resume
(
config
.
Resume
)
else
:
else
:
self
.
init
=
'-1'
self
.
init
=
'-1'
...
@@ -21,16 +28,21 @@ class Twint:
...
@@ -21,16 +28,21 @@ class Twint:
self
.
count
=
0
self
.
count
=
0
self
.
user_agent
=
""
self
.
user_agent
=
""
self
.
config
=
config
self
.
config
=
config
self
.
config
.
Bearer_token
=
bearer
# TODO might have to make some adjustments for it to work with multi-treading
# USAGE : to get a new guest token simply do `self.token.refresh()`
self
.
token
=
token
.
Token
(
config
)
self
.
token
.
refresh
()
self
.
conn
=
db
.
Conn
(
config
.
Database
)
self
.
conn
=
db
.
Conn
(
config
.
Database
)
self
.
d
=
datelock
.
Set
(
self
.
config
.
Until
,
self
.
config
.
Since
)
self
.
d
=
datelock
.
Set
(
self
.
config
.
Until
,
self
.
config
.
Since
)
verbose
.
Elastic
(
config
.
Elasticsearch
)
verbose
.
Elastic
(
config
.
Elasticsearch
)
if
self
.
config
.
Store_object
:
if
self
.
config
.
Store_object
:
logme
.
debug
(
__name__
+
':Twint:__init__:clean_follow_list'
)
logme
.
debug
(
__name__
+
':Twint:__init__:clean_follow_list'
)
output
.
_clean_follow_list
()
output
.
_clean_follow_list
()
if
self
.
config
.
Pandas_clean
:
if
self
.
config
.
Pandas_clean
:
logme
.
debug
(
__name__
+
':Twint:__init__:pandas_clean'
)
logme
.
debug
(
__name__
+
':Twint:__init__:pandas_clean'
)
storage
.
panda
.
clean
()
storage
.
panda
.
clean
()
def
get_resume
(
self
,
resumeFile
):
def
get_resume
(
self
,
resumeFile
):
...
@@ -41,10 +53,17 @@ class Twint:
...
@@ -41,10 +53,17 @@ class Twint:
return
_init
return
_init
async
def
Feed
(
self
):
async
def
Feed
(
self
):
logme
.
debug
(
__name__
+
':Twint:Feed'
)
logme
.
debug
(
__name__
+
':Twint:Feed'
)
consecutive_errors_count
=
0
consecutive_errors_count
=
0
while
True
:
while
True
:
# this will receive a JSON string, parse it into a `dict` and do the required stuff
try
:
response
=
await
get
.
RequestUrl
(
self
.
config
,
self
.
init
,
headers
=
[(
"User-Agent"
,
self
.
user_agent
)])
except
TokenExpiryException
as
e
:
logme
.
debug
(
__name__
+
'Twint:Feed:'
+
str
(
e
))
self
.
token
.
refresh
()
response
=
await
get
.
RequestUrl
(
self
.
config
,
self
.
init
,
headers
=
[(
"User-Agent"
,
self
.
user_agent
)])
response
=
await
get
.
RequestUrl
(
self
.
config
,
self
.
init
,
headers
=
[(
"User-Agent"
,
self
.
user_agent
)])
if
self
.
config
.
Debug
:
if
self
.
config
.
Debug
:
print
(
response
,
file
=
open
(
"twint-last-request.log"
,
"w"
,
encoding
=
"utf-8"
))
print
(
response
,
file
=
open
(
"twint-last-request.log"
,
"w"
,
encoding
=
"utf-8"
))
...
@@ -75,29 +94,36 @@ class Twint:
...
@@ -75,29 +94,36 @@ class Twint:
else
:
else
:
self
.
feed
,
self
.
init
=
feed
.
profile
(
response
)
self
.
feed
,
self
.
init
=
feed
.
profile
(
response
)
elif
self
.
config
.
TwitterSearch
:
elif
self
.
config
.
TwitterSearch
:
self
.
feed
,
self
.
init
=
feed
.
Json
(
response
)
try
:
self
.
feed
,
self
.
init
=
feed
.
search_v2
(
response
)
except
NoMoreTweetsException
as
e
:
logme
.
debug
(
__name__
+
':Twint:Feed:'
+
str
(
e
))
print
(
e
,
'is it though? because sometimes twitter lie.'
)
break
break
except
TimeoutError
as
e
:
except
TimeoutError
as
e
:
if
self
.
config
.
Proxy_host
.
lower
()
==
"tor"
:
if
self
.
config
.
Proxy_host
.
lower
()
==
"tor"
:
print
(
"[?] Timed out, changing Tor identity..."
)
print
(
"[?] Timed out, changing Tor identity..."
)
if
self
.
config
.
Tor_control_password
is
None
:
if
self
.
config
.
Tor_control_password
is
None
:
logme
.
critical
(
__name__
+
':Twint:Feed:tor-password'
)
logme
.
critical
(
__name__
+
':Twint:Feed:tor-password'
)
sys
.
stderr
.
write
(
"Error: config.Tor_control_password must be set for proxy autorotation!
\r\n
"
)
sys
.
stderr
.
write
(
"Error: config.Tor_control_password must be set for proxy autorotation!
\r\n
"
)
sys
.
stderr
.
write
(
"Info: What is it? See https://stem.torproject.org/faq.html#can-i-interact-with-tors-controller-interface-directly
\r\n
"
)
sys
.
stderr
.
write
(
"Info: What is it? See https://stem.torproject.org/faq.html#can-i-interact-with-tors-controller-interface-directly
\r\n
"
)
break
break
else
:
else
:
get
.
ForceNewTorIdentity
(
self
.
config
)
get
.
ForceNewTorIdentity
(
self
.
config
)
continue
continue
else
:
else
:
logme
.
critical
(
__name__
+
':Twint:Feed:'
+
str
(
e
))
logme
.
critical
(
__name__
+
':Twint:Feed:'
+
str
(
e
))
print
(
str
(
e
))
print
(
str
(
e
))
break
break
except
Exception
as
e
:
except
Exception
as
e
:
if
self
.
config
.
Profile
or
self
.
config
.
Favorites
:
if
self
.
config
.
Profile
or
self
.
config
.
Favorites
:
print
(
"[!] Twitter does not return more data, scrape stops here."
)
print
(
"[!] Twitter does not return more data, scrape stops here."
)
break
break
logme
.
critical
(
__name__
+
':Twint:Feed:noData'
+
str
(
e
))
logme
.
critical
(
__name__
+
':Twint:Feed:noData'
+
str
(
e
))
# Sometimes Twitter says there is no data. But it's a lie.
# Sometimes Twitter says there is no data. But it's a lie.
# raise
consecutive_errors_count
+=
1
consecutive_errors_count
+=
1
if
consecutive_errors_count
<
self
.
config
.
Retries_count
:
if
consecutive_errors_count
<
self
.
config
.
Retries_count
:
# skip to the next iteration if wait time does not satisfy limit constraints
# skip to the next iteration if wait time does not satisfy limit constraints
...
@@ -111,9 +137,10 @@ class Twint:
...
@@ -111,9 +137,10 @@ class Twint:
time
.
sleep
(
delay
)
time
.
sleep
(
delay
)
self
.
user_agent
=
await
get
.
RandomUserAgent
(
wa
=
True
)
self
.
user_agent
=
await
get
.
RandomUserAgent
(
wa
=
True
)
continue
continue
logme
.
critical
(
__name__
+
':Twint:Feed:Tweets_known_error:'
+
str
(
e
))
logme
.
critical
(
__name__
+
':Twint:Feed:Tweets_known_error:'
+
str
(
e
))
sys
.
stderr
.
write
(
str
(
e
)
+
" [x] run.Feed"
)
sys
.
stderr
.
write
(
str
(
e
)
+
" [x] run.Feed"
)
sys
.
stderr
.
write
(
"[!] if get this error but you know for sure that more tweets exist, please open an issue and we will investigate it!"
)
sys
.
stderr
.
write
(
"[!] if get this error but you know for sure that more tweets exist, please open an issue and we will investigate it!"
)
break
break
if
self
.
config
.
Resume
:
if
self
.
config
.
Resume
:
print
(
self
.
init
,
file
=
open
(
self
.
config
.
Resume
,
"a"
,
encoding
=
"utf-8"
))
print
(
self
.
init
,
file
=
open
(
self
.
config
.
Resume
,
"a"
,
encoding
=
"utf-8"
))
...
@@ -121,17 +148,17 @@ class Twint:
...
@@ -121,17 +148,17 @@ class Twint:
async
def
follow
(
self
):
async
def
follow
(
self
):
await
self
.
Feed
()
await
self
.
Feed
()
if
self
.
config
.
User_full
:
if
self
.
config
.
User_full
:
logme
.
debug
(
__name__
+
':Twint:follow:userFull'
)
logme
.
debug
(
__name__
+
':Twint:follow:userFull'
)
self
.
count
+=
await
get
.
Multi
(
self
.
feed
,
self
.
config
,
self
.
conn
)
self
.
count
+=
await
get
.
Multi
(
self
.
feed
,
self
.
config
,
self
.
conn
)
else
:
else
:
logme
.
debug
(
__name__
+
':Twint:follow:notUserFull'
)
logme
.
debug
(
__name__
+
':Twint:follow:notUserFull'
)
for
user
in
self
.
feed
:
for
user
in
self
.
feed
:
self
.
count
+=
1
self
.
count
+=
1
username
=
user
.
find
(
"a"
)[
"name"
]
username
=
user
.
find
(
"a"
)[
"name"
]
await
output
.
Username
(
username
,
self
.
config
,
self
.
conn
)
await
output
.
Username
(
username
,
self
.
config
,
self
.
conn
)
async
def
favorite
(
self
):
async
def
favorite
(
self
):
logme
.
debug
(
__name__
+
':Twint:favorite'
)
logme
.
debug
(
__name__
+
':Twint:favorite'
)
await
self
.
Feed
()
await
self
.
Feed
()
favorited_tweets_list
=
[]
favorited_tweets_list
=
[]
for
tweet
in
self
.
feed
:
for
tweet
in
self
.
feed
:
...
@@ -182,21 +209,22 @@ class Twint:
...
@@ -182,21 +209,22 @@ class Twint:
async
def
profile
(
self
):
async
def
profile
(
self
):
await
self
.
Feed
()
await
self
.
Feed
()
if
self
.
config
.
Profile_full
:
if
self
.
config
.
Profile_full
:
logme
.
debug
(
__name__
+
':Twint:profileFull'
)
logme
.
debug
(
__name__
+
':Twint:profileFull'
)
self
.
count
+=
await
get
.
Multi
(
self
.
feed
,
self
.
config
,
self
.
conn
)
self
.
count
+=
await
get
.
Multi
(
self
.
feed
,
self
.
config
,
self
.
conn
)
else
:
else
:
logme
.
debug
(
__name__
+
':Twint:notProfileFull'
)
logme
.
debug
(
__name__
+
':Twint:notProfileFull'
)
for
tweet
in
self
.
feed
:
for
tweet
in
self
.
feed
:
self
.
count
+=
1
self
.
count
+=
1
await
output
.
Tweets
(
tweet
,
self
.
config
,
self
.
conn
)
await
output
.
Tweets
(
tweet
,
self
.
config
,
self
.
conn
)
async
def
tweets
(
self
):
async
def
tweets
(
self
):
await
self
.
Feed
()
await
self
.
Feed
()
# TODO : need to take care of this later
if
self
.
config
.
Location
:
if
self
.
config
.
Location
:
logme
.
debug
(
__name__
+
':Twint:tweets:location'
)
logme
.
debug
(
__name__
+
':Twint:tweets:location'
)
self
.
count
+=
await
get
.
Multi
(
self
.
feed
,
self
.
config
,
self
.
conn
)
self
.
count
+=
await
get
.
Multi
(
self
.
feed
,
self
.
config
,
self
.
conn
)
else
:
else
:
logme
.
debug
(
__name__
+
':Twint:tweets:notLocation'
)
logme
.
debug
(
__name__
+
':Twint:tweets:notLocation'
)
for
tweet
in
self
.
feed
:
for
tweet
in
self
.
feed
:
self
.
count
+=
1
self
.
count
+=
1
await
output
.
Tweets
(
tweet
,
self
.
config
,
self
.
conn
)
await
output
.
Tweets
(
tweet
,
self
.
config
,
self
.
conn
)
...
@@ -217,75 +245,82 @@ class Twint:
...
@@ -217,75 +245,82 @@ class Twint:
self
.
user_agent
=
await
get
.
RandomUserAgent
()
self
.
user_agent
=
await
get
.
RandomUserAgent
()
if
self
.
config
.
User_id
is
not
None
and
self
.
config
.
Username
is
None
:
if
self
.
config
.
User_id
is
not
None
and
self
.
config
.
Username
is
None
:
logme
.
debug
(
__name__
+
':Twint:main:user_id'
)
logme
.
debug
(
__name__
+
':Twint:main:user_id'
)
self
.
config
.
Username
=
await
get
.
Username
(
self
.
config
.
User_id
)
self
.
config
.
Username
=
await
get
.
Username
(
self
.
config
.
User_id
,
self
.
config
.
Bearer_token
,
self
.
config
.
Guest_token
)
if
self
.
config
.
Username
is
not
None
and
self
.
config
.
User_id
is
None
:
if
self
.
config
.
Username
is
not
None
and
self
.
config
.
User_id
is
None
:
logme
.
debug
(
__name__
+
':Twint:main:username'
)
logme
.
debug
(
__name__
+
':Twint:main:username'
)
url
=
f
"https://twitter.com/{self.config.Username}?lang=en"
self
.
config
.
User_id
=
await
get
.
User
(
url
,
self
.
config
,
self
.
conn
,
True
)
self
.
config
.
User_id
=
await
get
.
User
(
self
.
config
.
Username
,
self
.
config
,
self
.
conn
,
self
.
config
.
Bearer_token
,
self
.
config
.
Guest_token
,
True
)
if
self
.
config
.
User_id
is
None
:
if
self
.
config
.
User_id
is
None
:
raise
ValueError
(
"Cannot find twitter account with name = "
+
self
.
config
.
Username
)
raise
ValueError
(
"Cannot find twitter account with name = "
+
self
.
config
.
Username
)
# TODO : will need to modify it to work with the new endpoints
if
self
.
config
.
TwitterSearch
and
self
.
config
.
Since
and
self
.
config
.
Until
:
if
self
.
config
.
TwitterSearch
and
self
.
config
.
Since
and
self
.
config
.
Until
:
logme
.
debug
(
__name__
+
':Twint:main:search+since+until'
)
logme
.
debug
(
__name__
+
':Twint:main:search+since+until'
)
while
self
.
d
.
_since
<
self
.
d
.
_until
:
while
self
.
d
.
_since
<
self
.
d
.
_until
:
self
.
config
.
Since
=
str
(
self
.
d
.
_since
)
self
.
config
.
Since
=
str
(
self
.
d
.
_since
)
self
.
config
.
Until
=
str
(
self
.
d
.
_until
)
self
.
config
.
Until
=
str
(
self
.
d
.
_until
)
if
len
(
self
.
feed
)
>
0
:
if
len
(
self
.
feed
)
>
0
:
await
self
.
tweets
()
await
self
.
tweets
()
else
:
else
:
logme
.
debug
(
__name__
+
':Twint:main:gettingNewTweets'
)
logme
.
debug
(
__name__
+
':Twint:main:gettingNewTweets'
)
break
break
if
get
.
Limit
(
self
.
config
.
Limit
,
self
.
count
):
if
get
.
Limit
(
self
.
config
.
Limit
,
self
.
count
):
break
break
else
:
else
:
logme
.
debug
(
__name__
+
':Twint:main:not-search+since+until'
)
logme
.
debug
(
__name__
+
':Twint:main:not-search+since+until'
)
while
True
:
while
True
:
if
len
(
self
.
feed
)
>
0
:
if
len
(
self
.
feed
)
>
0
:
if
self
.
config
.
Followers
or
self
.
config
.
Following
:
if
self
.
config
.
Followers
or
self
.
config
.
Following
:
logme
.
debug
(
__name__
+
':Twint:main:follow'
)
logme
.
debug
(
__name__
+
':Twint:main:follow'
)
await
self
.
follow
()
await
self
.
follow
()
elif
self
.
config
.
Favorites
:
elif
self
.
config
.
Favorites
:
logme
.
debug
(
__name__
+
':Twint:main:favorites'
)
logme
.
debug
(
__name__
+
':Twint:main:favorites'
)
await
self
.
favorite
()
await
self
.
favorite
()
elif
self
.
config
.
Profile
:
elif
self
.
config
.
Profile
:
logme
.
debug
(
__name__
+
':Twint:main:profile'
)
logme
.
debug
(
__name__
+
':Twint:main:profile'
)
await
self
.
profile
()
await
self
.
profile
()
elif
self
.
config
.
TwitterSearch
:
elif
self
.
config
.
TwitterSearch
:
logme
.
debug
(
__name__
+
':Twint:main:twitter-search'
)
logme
.
debug
(
__name__
+
':Twint:main:twitter-search'
)
await
self
.
tweets
()
await
self
.
tweets
()
else
:
else
:
logme
.
debug
(
__name__
+
':Twint:main:no-more-tweets'
)
logme
.
debug
(
__name__
+
':Twint:main:no-more-tweets'
)
break
break
#logging.info("[<] " + str(datetime.now()) + ':: run+Twint+main+CallingGetLimit2')
#
logging.info("[<] " + str(datetime.now()) + ':: run+Twint+main+CallingGetLimit2')
if
get
.
Limit
(
self
.
config
.
Limit
,
self
.
count
):
if
get
.
Limit
(
self
.
config
.
Limit
,
self
.
count
):
logme
.
debug
(
__name__
+
':Twint:main:reachedLimit'
)
logme
.
debug
(
__name__
+
':Twint:main:reachedLimit'
)
break
break
if
self
.
config
.
Count
:
if
self
.
config
.
Count
:
verbose
.
Count
(
self
.
count
,
self
.
config
)
verbose
.
Count
(
self
.
count
,
self
.
config
)
def
run
(
config
,
callback
=
None
):
def
run
(
config
,
callback
=
None
):
logme
.
debug
(
__name__
+
':run'
)
logme
.
debug
(
__name__
+
':run'
)
try
:
try
:
get_event_loop
()
get_event_loop
()
except
RuntimeError
as
e
:
except
RuntimeError
as
e
:
if
"no current event loop"
in
str
(
e
):
if
"no current event loop"
in
str
(
e
):
set_event_loop
(
new_event_loop
())
set_event_loop
(
new_event_loop
())
else
:
else
:
logme
.
exception
(
__name__
+
':Lookup
:Unexpected exception while handling an expected RuntimeError.'
)
logme
.
exception
(
__name__
+
':run
:Unexpected exception while handling an expected RuntimeError.'
)
raise
raise
except
Exception
as
e
:
except
Exception
as
e
:
logme
.
exception
(
__name__
+
':Lookup:Unexpected exception occured while attempting to get or create a new event loop.'
)
logme
.
exception
(
__name__
+
':run:Unexpected exception occurred while attempting to get or create a new event loop.'
)
raise
raise
get_event_loop
()
.
run_until_complete
(
Twint
(
config
)
.
main
(
callback
))
get_event_loop
()
.
run_until_complete
(
Twint
(
config
)
.
main
(
callback
))
def
Favorites
(
config
):
def
Favorites
(
config
):
logme
.
debug
(
__name__
+
':Favorites'
)
logme
.
debug
(
__name__
+
':Favorites'
)
config
.
Favorites
=
True
config
.
Favorites
=
True
config
.
Following
=
False
config
.
Following
=
False
config
.
Followers
=
False
config
.
Followers
=
False
...
@@ -296,8 +331,9 @@ def Favorites(config):
...
@@ -296,8 +331,9 @@ def Favorites(config):
if
config
.
Pandas_au
:
if
config
.
Pandas_au
:
storage
.
panda
.
_autoget
(
"tweet"
)
storage
.
panda
.
_autoget
(
"tweet"
)
def
Followers
(
config
):
def
Followers
(
config
):
logme
.
debug
(
__name__
+
':Followers'
)
logme
.
debug
(
__name__
+
':Followers'
)
config
.
Followers
=
True
config
.
Followers
=
True
config
.
Following
=
False
config
.
Following
=
False
config
.
Profile
=
False
config
.
Profile
=
False
...
@@ -310,11 +346,12 @@ def Followers(config):
...
@@ -310,11 +346,12 @@ def Followers(config):
if
config
.
User_full
:
if
config
.
User_full
:
storage
.
panda
.
_autoget
(
"user"
)
storage
.
panda
.
_autoget
(
"user"
)
if
config
.
Pandas_clean
and
not
config
.
Store_object
:
if
config
.
Pandas_clean
and
not
config
.
Store_object
:
#storage.panda.clean()
#
storage.panda.clean()
output
.
_clean_follow_list
()
output
.
_clean_follow_list
()
def
Following
(
config
):
def
Following
(
config
):
logme
.
debug
(
__name__
+
':Following'
)
logme
.
debug
(
__name__
+
':Following'
)
config
.
Following
=
True
config
.
Following
=
True
config
.
Followers
=
False
config
.
Followers
=
False
config
.
Profile
=
False
config
.
Profile
=
False
...
@@ -327,11 +364,12 @@ def Following(config):
...
@@ -327,11 +364,12 @@ def Following(config):
if
config
.
User_full
:
if
config
.
User_full
:
storage
.
panda
.
_autoget
(
"user"
)
storage
.
panda
.
_autoget
(
"user"
)
if
config
.
Pandas_clean
and
not
config
.
Store_object
:
if
config
.
Pandas_clean
and
not
config
.
Store_object
:
#storage.panda.clean()
#
storage.panda.clean()
output
.
_clean_follow_list
()
output
.
_clean_follow_list
()
def
Lookup
(
config
):
def
Lookup
(
config
):
logme
.
debug
(
__name__
+
':Lookup'
)
logme
.
debug
(
__name__
+
':Lookup'
)
try
:
try
:
get_event_loop
()
get_event_loop
()
...
@@ -339,15 +377,16 @@ def Lookup(config):
...
@@ -339,15 +377,16 @@ def Lookup(config):
if
"no current event loop"
in
str
(
e
):
if
"no current event loop"
in
str
(
e
):
set_event_loop
(
new_event_loop
())
set_event_loop
(
new_event_loop
())
else
:
else
:
logme
.
exception
(
__name__
+
':Lookup:Unexpected exception while handling an expected RuntimeError.'
)
logme
.
exception
(
__name__
+
':Lookup:Unexpected exception while handling an expected RuntimeError.'
)
raise
raise
except
Exception
as
e
:
except
Exception
as
e
:
logme
.
exception
(
__name__
+
':Lookup:Unexpected exception occured while attempting to get or create a new event loop.'
)
logme
.
exception
(
__name__
+
':Lookup:Unexpected exception occured while attempting to get or create a new event loop.'
)
raise
raise
try
:
try
:
if
config
.
User_id
is
not
None
:
if
config
.
User_id
is
not
None
:
logme
.
debug
(
__name__
+
':Twint:Lookup:user_id'
)
logme
.
debug
(
__name__
+
':Twint:Lookup:user_id'
)
config
.
Username
=
get_event_loop
()
.
run_until_complete
(
get
.
Username
(
config
.
User_id
))
config
.
Username
=
get_event_loop
()
.
run_until_complete
(
get
.
Username
(
config
.
User_id
))
url
=
f
"https://mobile.twitter.com/{config.Username}?prefetchTimestamp="
+
str
(
int
(
time
.
time
()
*
1000
))
url
=
f
"https://mobile.twitter.com/{config.Username}?prefetchTimestamp="
+
str
(
int
(
time
.
time
()
*
1000
))
...
@@ -357,15 +396,16 @@ def Lookup(config):
...
@@ -357,15 +396,16 @@ def Lookup(config):
storage
.
panda
.
_autoget
(
"user"
)
storage
.
panda
.
_autoget
(
"user"
)
except
RuntimeError
as
e
:
except
RuntimeError
as
e
:
if
"no current event loop"
in
str
(
e
):
if
"no current event loop"
in
str
(
e
):
logme
.
exception
(
__name__
+
':Lookup:Previous attempt to to create an event loop failed.'
)
logme
.
exception
(
__name__
+
':Lookup:Previous attempt to to create an event loop failed.'
)
raise
raise
except
Exception
as
e
:
except
Exception
as
e
:
logme
.
exception
(
__name__
+
':Lookup:Unexpected exception occured.'
)
logme
.
exception
(
__name__
+
':Lookup:Unexpected exception occured.'
)
raise
raise
def
Profile
(
config
):
def
Profile
(
config
):
logme
.
debug
(
__name__
+
':Profile'
)
logme
.
debug
(
__name__
+
':Profile'
)
config
.
Profile
=
True
config
.
Profile
=
True
config
.
Favorites
=
False
config
.
Favorites
=
False
config
.
Following
=
False
config
.
Following
=
False
...
@@ -375,8 +415,9 @@ def Profile(config):
...
@@ -375,8 +415,9 @@ def Profile(config):
if
config
.
Pandas_au
:
if
config
.
Pandas_au
:
storage
.
panda
.
_autoget
(
"tweet"
)
storage
.
panda
.
_autoget
(
"tweet"
)
def
Search
(
config
,
callback
=
None
):
def
Search
(
config
,
callback
=
None
):
logme
.
debug
(
__name__
+
':Search'
)
logme
.
debug
(
__name__
+
':Search'
)
config
.
TwitterSearch
=
True
config
.
TwitterSearch
=
True
config
.
Favorites
=
False
config
.
Favorites
=
False
config
.
Following
=
False
config
.
Following
=
False
...
...
twint/token.py
0 → 100644
View file @
2d638de0
import
re
import
time
import
requests
import
logging
as
logme
class
TokenExpiryException
(
Exception
):
def
__init__
(
self
,
msg
):
super
()
.
__init__
(
msg
)
class
Token
:
def
__init__
(
self
,
config
):
self
.
_session
=
requests
.
Session
()
self
.
config
=
config
self
.
_retries
=
5
self
.
_timeout
=
10
self
.
url
=
'https://twitter.com'
def
_request
(
self
):
for
attempt
in
range
(
self
.
_retries
+
1
):
# The request is newly prepared on each retry because of potential cookie updates.
req
=
self
.
_session
.
prepare_request
(
requests
.
Request
(
'GET'
,
self
.
url
))
logme
.
debug
(
f
'Retrieving {req.url}'
)
try
:
r
=
self
.
_session
.
send
(
req
,
allow_redirects
=
True
,
timeout
=
self
.
_timeout
)
except
requests
.
exceptions
.
RequestException
as
exc
:
if
attempt
<
self
.
_retries
:
retrying
=
', retrying'
level
=
logme
.
WARNING
else
:
retrying
=
''
level
=
logme
.
ERROR
logme
.
log
(
level
,
f
'Error retrieving {req.url}: {exc!r}{retrying}'
)
else
:
success
,
msg
=
(
True
,
None
)
msg
=
f
': {msg}'
if
msg
else
''
if
success
:
logme
.
debug
(
f
'{req.url} retrieved successfully{msg}'
)
return
r
if
attempt
<
self
.
_retries
:
# TODO : might wanna tweak this back-off timer
sleep_time
=
2.0
*
2
**
attempt
logme
.
info
(
f
'Waiting {sleep_time:.0f} seconds'
)
time
.
sleep
(
sleep_time
)
else
:
msg
=
f
'{self._retries + 1} requests to {self.url} failed, giving up.'
logme
.
fatal
(
msg
)
self
.
config
.
Guest_token
=
None
raise
RefreshTokenException
(
msg
)
def
refresh
(
self
):
logme
.
debug
(
'Retrieving guest token'
)
res
=
self
.
_request
()
match
=
re
.
search
(
r'\("gt=(\d+);'
,
res
.
text
)
if
match
:
logme
.
debug
(
'Found guest token in HTML'
)
self
.
config
.
Guest_token
=
str
(
match
.
group
(
1
))
else
:
self
.
config
.
Guest_token
=
None
raise
RefreshTokenException
(
'Could not find the Guest token in HTML'
)
twint/tweet.py
View file @
2d638de0
from
time
import
strftime
,
localtime
from
time
import
strftime
,
localtime
from
datetime
import
datetime
from
datetime
import
datetime
,
timezone
import
json
import
json
import
logging
as
logme
import
logging
as
logme
...
@@ -9,6 +9,7 @@ from googletransx import Translator
...
@@ -9,6 +9,7 @@ from googletransx import Translator
# - https://github.com/x0rzkov/py-googletrans#basic-usage
# - https://github.com/x0rzkov/py-googletrans#basic-usage
translator
=
Translator
()
translator
=
Translator
()
class
tweet
:
class
tweet
:
"""Define Tweet class
"""Define Tweet class
"""
"""
...
@@ -17,52 +18,63 @@ class tweet:
...
@@ -17,52 +18,63 @@ class tweet:
def
__init__
(
self
):
def
__init__
(
self
):
pass
pass
def
utc_to_local
(
utc_dt
):
return
utc_dt
.
replace
(
tzinfo
=
timezone
.
utc
)
.
astimezone
(
tz
=
None
)
def
getMentions
(
tw
):
def
getMentions
(
tw
):
"""Extract ment from tweet
"""Extract ment
ions
from tweet
"""
"""
logme
.
debug
(
__name__
+
':getMentions'
)
logme
.
debug
(
__name__
+
':getMentions'
)
mentions
=
[]
try
:
try
:
mentions
=
tw
[
"data-mentions"
]
.
split
(
" "
)
for
mention
in
tw
[
'entities'
][
'user_mentions'
]:
except
:
mentions
.
append
(
mention
[
'screen_name'
])
except
KeyError
:
mentions
=
[]
mentions
=
[]
return
mentions
return
mentions
def
getQuoteURL
(
tw
):
def
getQuoteURL
(
tw
):
"""Extract quote from tweet
"""Extract quote from tweet
"""
"""
logme
.
debug
(
__name__
+
':getQuoteURL'
)
logme
.
debug
(
__name__
+
':getQuoteURL'
)
base_twitter
=
"https://twitter.com"
base_twitter
=
"https://twitter.com"
quote_url
=
""
quote_url
=
""
try
:
try
:
quote
=
tw
.
find
(
"div"
,
"QuoteTweet-innerContainer"
)
quote
=
tw
.
find
(
"div"
,
"QuoteTweet-innerContainer"
)
quote_url
=
base_twitter
+
quote
.
get
(
"href"
)
quote_url
=
base_twitter
+
quote
.
get
(
"href"
)
except
:
except
:
quote_url
=
""
quote_url
=
""
return
quote_url
return
quote_url
def
getText
(
tw
):
"""Replace some text
"""
logme
.
debug
(
__name__
+
':getText'
)
text
=
tw
.
find
(
"p"
,
"tweet-text"
)
.
text
text
=
text
.
replace
(
"http"
,
" http"
)
text
=
text
.
replace
(
"pic.twitter"
,
" pic.twitter"
)
return
text
# def getText(tw):
# """Replace some text
# """
# logme.debug(__name__ + ':getText')
# text = tw.find("p", "tweet-text").text
# text = text.replace("http", " http")
# text = text.replace("pic.twitter", " pic.twitter")
#
# return text
def
getStat
(
tw
,
_type
):
def
getStat
(
tw
,
_type
):
"""Get stats about Tweet
"""Get stats about Tweet
"""
"""
logme
.
debug
(
__name__
+
':getStat'
)
logme
.
debug
(
__name__
+
':getStat'
)
st
=
f
"ProfileTweet-action--{_type} u-hiddenVisually"
st
=
f
"ProfileTweet-action--{_type} u-hiddenVisually"
return
tw
.
find
(
"span"
,
st
)
.
find
(
"span"
)[
"data-tweet-stat-count"
]
return
tw
.
find
(
"span"
,
st
)
.
find
(
"span"
)[
"data-tweet-stat-count"
]
def
getRetweet
(
tw
,
_config
):
def
getRetweet
(
tw
,
_config
):
"""Get Retweet
"""Get Retweet
"""
"""
logme
.
debug
(
__name__
+
':getRetweet'
)
logme
.
debug
(
__name__
+
':getRetweet'
)
if
_config
.
Profile
:
if
_config
.
Profile
:
if
int
(
tw
[
"data-user-id"
])
!=
_config
.
User_id
:
if
int
(
tw
[
"data-user-id"
])
!=
_config
.
User_id
:
return
_config
.
User_id
,
_config
.
Username
return
_config
.
User_id
,
_config
.
Username
...
@@ -74,60 +86,156 @@ def getRetweet(tw, _config):
...
@@ -74,60 +86,156 @@ def getRetweet(tw, _config):
return
_rt_id
,
_rt_username
return
_rt_id
,
_rt_username
return
''
,
''
return
''
,
''
def
getThumbnail
(
tw
):
"""Get Thumbnail
# def getThumbnail(tw):
"""
# """Get Thumbnail
divs
=
tw
.
find_all
(
"div"
,
"PlayableMedia-player"
)
# """
thumb
=
""
# divs = tw.find_all("div", "PlayableMedia-player")
for
div
in
divs
:
# thumb = ""
thumb
=
div
.
attrs
[
"style"
]
.
split
(
"url('"
)[
-
1
]
# for div in divs:
thumb
=
thumb
.
replace
(
"')"
,
""
)
# thumb = div.attrs["style"].split("url('")[-1]
return
thumb
# thumb = thumb.replace("')", "")
# return thumb
# def Tweet(tw, config):
# """Create Tweet object
# """
# logme.debug(__name__+':Tweet')
# t = tweet()
# t.id = int(tw["data-item-id"])
# t.id_str = tw["data-item-id"]
# t.conversation_id = tw["data-conversation-id"]
# t.datetime = int(tw.find("span", "_timestamp")["data-time-ms"])
# t.datestamp = strftime("%Y-%m-%d", localtime(t.datetime/1000.0))
# t.timestamp = strftime("%H:%M:%S", localtime(t.datetime/1000.0))
# t.user_id = int(tw["data-user-id"])
# t.user_id_str = tw["data-user-id"]
# t.username = tw["data-screen-name"]
# t.name = tw["data-name"]
# t.place = tw.find("a","js-geo-pivot-link").text.strip() if tw.find("a","js-geo-pivot-link") else ""
# t.timezone = strftime("%z", localtime())
# for img in tw.findAll("img", "Emoji Emoji--forText"):
# img.replaceWith(img["alt"])
# t.mentions = getMentions(tw)
# t.urls = [link.attrs["data-expanded-url"] for link in tw.find_all('a',{'class':'twitter-timeline-link'}) if link.has_attr("data-expanded-url")]
# t.photos = [photo_node.attrs['data-image-url'] for photo_node in tw.find_all("div", "AdaptiveMedia-photoContainer")]
# t.video = 1 if tw.find_all("div", "AdaptiveMedia-video") != [] else 0
# t.thumbnail = getThumbnail(tw)
# t.tweet = getText(tw)
# t.lang = tw.find('p', 'tweet-text')['lang']
# t.hashtags = [hashtag.text for hashtag in tw.find_all("a","twitter-hashtag")]
# t.cashtags = [cashtag.text for cashtag in tw.find_all("a", "twitter-cashtag")]
# t.replies_count = getStat(tw, "reply")
# t.retweets_count = getStat(tw, "retweet")
# t.likes_count = getStat(tw, "favorite")
# t.link = f"https://twitter.com/{t.username}/status/{t.id}"
# t.user_rt_id, t.user_rt = getRetweet(tw, config)
# t.retweet = True if t.user_rt else False
# t.retweet_id = ''
# t.retweet_date = ''
# if not config.Profile:
# t.retweet_id = tw['data-retweet-id'] if t.user_rt else ''
# t.retweet_date = datetime.fromtimestamp(((int(t.retweet_id) >> 22) + 1288834974657)/1000.0).strftime("%Y-%m-%d %H:%M:%S") if t.user_rt else ''
# t.quote_url = getQuoteURL(tw)
# t.near = config.Near if config.Near else ""
# t.geo = config.Geo if config.Geo else ""
# t.source = config.Source if config.Source else ""
# t.reply_to = [{'user_id': t['id_str'], 'username': t['screen_name']} for t in json.loads(tw["data-reply-to-users-json"])]
# t.translate = ''
# t.trans_src = ''
# t.trans_dest = ''
# if config.Translate == True:
# try:
# ts = translator.translate(text=t.tweet, dest=config.TranslateDest)
# t.translate = ts.text
# t.trans_src = ts.src
# t.trans_dest = ts.dest
# # ref. https://github.com/SuniTheFish/ChainTranslator/blob/master/ChainTranslator/__main__.py#L31
# except ValueError as e:
# raise Exception("Invalid destination language: {} / Tweet: {}".format(config.TranslateDest, t.tweet))
# logme.debug(__name__+':Tweet:translator.translate:'+str(e))
# return t
def
Tweet
(
tw
,
config
):
def
Tweet
(
tw
,
config
):
"""Create Tweet object
"""Create Tweet object
"""
"""
logme
.
debug
(
__name__
+
':Tweet'
)
logme
.
debug
(
__name__
+
':Tweet'
)
t
=
tweet
()
t
=
tweet
()
t
.
id
=
int
(
tw
[
"data-item-id"
])
t
.
id
=
int
(
tw
[
'id_str'
])
t
.
id_str
=
tw
[
"data-item-id"
]
t
.
id_str
=
tw
[
"id_str"
]
t
.
conversation_id
=
tw
[
"data-conversation-id"
]
t
.
conversation_id
=
tw
[
"conversation_id_str"
]
t
.
datetime
=
int
(
tw
.
find
(
"span"
,
"_timestamp"
)[
"data-time-ms"
])
t
.
datestamp
=
strftime
(
"
%
Y-
%
m-
%
d"
,
localtime
(
t
.
datetime
/
1000.0
))
# parsing date to user-friendly format
t
.
timestamp
=
strftime
(
"
%
H:
%
M:
%
S"
,
localtime
(
t
.
datetime
/
1000.0
))
_dt
=
tw
[
'created_at'
]
t
.
user_id
=
int
(
tw
[
"data-user-id"
])
_dt
=
datetime
.
strptime
(
_dt
,
'
%
a
%
b
%
d
%
H:
%
M:
%
S
%
z
%
Y'
)
t
.
user_id_str
=
tw
[
"data-user-id"
]
_dt
=
utc_to_local
(
_dt
)
t
.
username
=
tw
[
"data-screen-name"
]
t
.
datetime
=
str
(
_dt
.
strftime
(
'
%
d-
%
m-
%
Y
%
H:
%
M:
%
S
%
Z'
))
t
.
name
=
tw
[
"data-name"
]
# date is of the format year,
t
.
place
=
tw
.
find
(
"a"
,
"js-geo-pivot-link"
)
.
text
.
strip
()
if
tw
.
find
(
"a"
,
"js-geo-pivot-link"
)
else
""
t
.
datestamp
=
_dt
.
strftime
(
'
%
d-
%
m-
%
Y'
)
t
.
timestamp
=
_dt
.
strftime
(
'
%
H:
%
M:
%
S'
)
t
.
user_id
=
int
(
tw
[
"user_id_str"
])
t
.
user_id_str
=
tw
[
"user_id_str"
]
t
.
username
=
tw
[
"user_data"
][
'screen_name'
]
t
.
name
=
tw
[
"user_data"
][
'name'
]
t
.
place
=
tw
[
'geo'
]
if
tw
[
'geo'
]
else
""
t
.
timezone
=
strftime
(
"
%
z"
,
localtime
())
t
.
timezone
=
strftime
(
"
%
z"
,
localtime
())
for
img
in
tw
.
findAll
(
"img"
,
"Emoji Emoji--forText"
):
# for img in tw.findAll("img", "Emoji Emoji--forText"):
img
.
replaceWith
(
img
[
"alt"
])
# img.replaceWith(img["alt"])
t
.
mentions
=
getMentions
(
tw
)
try
:
t
.
urls
=
[
link
.
attrs
[
"data-expanded-url"
]
for
link
in
tw
.
find_all
(
'a'
,{
'class'
:
'twitter-timeline-link'
})
if
link
.
has_attr
(
"data-expanded-url"
)]
t
.
mentions
=
[
_mention
[
'screen_name'
]
for
_mention
in
tw
[
'entities'
][
'user_mentions'
]]
t
.
photos
=
[
photo_node
.
attrs
[
'data-image-url'
]
for
photo_node
in
tw
.
find_all
(
"div"
,
"AdaptiveMedia-photoContainer"
)]
except
KeyError
:
t
.
video
=
1
if
tw
.
find_all
(
"div"
,
"AdaptiveMedia-video"
)
!=
[]
else
0
t
.
mentions
=
[]
t
.
thumbnail
=
getThumbnail
(
tw
)
try
:
t
.
tweet
=
getText
(
tw
)
t
.
urls
=
[
_url
[
'expanded_url'
]
for
_url
in
tw
[
'entities'
][
'urls'
]]
t
.
lang
=
tw
.
find
(
'p'
,
'tweet-text'
)[
'lang'
]
except
KeyError
:
t
.
hashtags
=
[
hashtag
.
text
for
hashtag
in
tw
.
find_all
(
"a"
,
"twitter-hashtag"
)]
t
.
urls
=
[]
t
.
cashtags
=
[
cashtag
.
text
for
cashtag
in
tw
.
find_all
(
"a"
,
"twitter-cashtag"
)]
try
:
t
.
replies_count
=
getStat
(
tw
,
"reply"
)
t
.
photos
=
[
_img
[
'media_url_https'
]
for
_img
in
tw
[
'entities'
][
'media'
]
if
_img
[
'type'
]
==
'photo'
and
t
.
retweets_count
=
getStat
(
tw
,
"retweet"
)
_img
[
'expanded_url'
]
.
find
(
'/photo/'
)
!=
-
1
]
t
.
likes_count
=
getStat
(
tw
,
"favorite"
)
except
KeyError
:
t
.
photos
=
[]
try
:
t
.
video
=
1
if
len
(
tw
[
'extended_entities'
][
'media'
])
else
0
except
KeyError
:
t
.
video
=
0
try
:
t
.
thumbnail
=
tw
[
'extended_entities'
][
'media'
][
0
][
'media_url_https'
]
except
KeyError
:
t
.
thumbnail
=
''
t
.
tweet
=
tw
[
'full_text'
]
t
.
lang
=
tw
[
'lang'
]
try
:
t
.
hashtags
=
[
hashtag
[
'text'
]
for
hashtag
in
tw
[
'entities'
][
'hashtags'
]]
except
KeyError
:
t
.
hashtags
=
[]
# don't know what this is
# t.cashtags = [cashtag.text for cashtag in tw.find_all("a", "twitter-cashtag")]
t
.
replies_count
=
tw
[
'reply_count'
]
t
.
retweets_count
=
tw
[
'retweet_count'
]
t
.
likes_count
=
tw
[
'favorite_count'
]
t
.
link
=
f
"https://twitter.com/{t.username}/status/{t.id}"
t
.
link
=
f
"https://twitter.com/{t.username}/status/{t.id}"
t
.
user_rt_id
,
t
.
user_rt
=
getRetweet
(
tw
,
config
)
# TODO: someone who is familiar with this code, needs to take a look at what this is
t
.
retweet
=
True
if
t
.
user_rt
else
False
# t.user_rt_id, t.user_rt = getRetweet(tw, config)
t
.
retweet_id
=
''
# t.retweet = True if t.user_rt else False
t
.
retweet_date
=
''
# t.retweet_id = ''
if
not
config
.
Profile
:
# t.retweet_date = ''
t
.
retweet_id
=
tw
[
'data-retweet-id'
]
if
t
.
user_rt
else
''
# if not config.Profile:
t
.
retweet_date
=
datetime
.
fromtimestamp
(((
int
(
t
.
retweet_id
)
>>
22
)
+
1288834974657
)
/
1000.0
)
.
strftime
(
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
)
if
t
.
user_rt
else
''
# t.retweet_id = tw['data-retweet-id'] if t.user_rt else ''
t
.
quote_url
=
getQuoteURL
(
tw
)
# t.retweet_date = datetime.fromtimestamp(((int(t.retweet_id) >> 22) + 1288834974657) / 1000.0).strftime(
# "%Y-%m-%d %H:%M:%S") if t.user_rt else ''
try
:
t
.
quote_url
=
tw
[
'quoted_status_permalink'
][
'expanded'
]
if
tw
[
'is_quote_status'
]
else
''
except
KeyError
:
# means that the quoted tweet have been deleted
t
.
quote_url
=
0
t
.
near
=
config
.
Near
if
config
.
Near
else
""
t
.
near
=
config
.
Near
if
config
.
Near
else
""
t
.
geo
=
config
.
Geo
if
config
.
Geo
else
""
t
.
geo
=
config
.
Geo
if
config
.
Geo
else
""
t
.
source
=
config
.
Source
if
config
.
Source
else
""
t
.
source
=
config
.
Source
if
config
.
Source
else
""
t
.
reply_to
=
[{
'user_id'
:
t
[
'id_str'
],
'username'
:
t
[
'screen_name'
]}
for
t
in
json
.
loads
(
tw
[
"data-reply-to-users-json"
])]
# TODO: check this whether we need the list of all the users to whom this tweet is a reply or we only need
# the immediately above user id
t
.
reply_to
=
{
'user_id'
:
tw
[
'in_reply_to_user_id_str'
],
'username'
:
tw
[
'in_reply_to_screen_name'
]}
t
.
translate
=
''
t
.
translate
=
''
t
.
trans_src
=
''
t
.
trans_src
=
''
t
.
trans_dest
=
''
t
.
trans_dest
=
''
...
@@ -140,5 +248,5 @@ def Tweet(tw, config):
...
@@ -140,5 +248,5 @@ def Tweet(tw, config):
# ref. https://github.com/SuniTheFish/ChainTranslator/blob/master/ChainTranslator/__main__.py#L31
# ref. https://github.com/SuniTheFish/ChainTranslator/blob/master/ChainTranslator/__main__.py#L31
except
ValueError
as
e
:
except
ValueError
as
e
:
raise
Exception
(
"Invalid destination language: {} / Tweet: {}"
.
format
(
config
.
TranslateDest
,
t
.
tweet
))
raise
Exception
(
"Invalid destination language: {} / Tweet: {}"
.
format
(
config
.
TranslateDest
,
t
.
tweet
))
logme
.
debug
(
__name__
+
':Tweet:translator.translate:'
+
str
(
e
))
logme
.
debug
(
__name__
+
':Tweet:translator.translate:'
+
str
(
e
))
return
t
return
t
twint/url.py
View file @
2d638de0
import
datetime
import
datetime
from
sys
import
platform
from
sys
import
platform
import
logging
as
logme
import
logging
as
logme
from
urllib.parse
import
urlencode
from
urllib.parse
import
quote
mobile
=
"https://mobile.twitter.com"
mobile
=
"https://mobile.twitter.com"
base
=
"https://twitter.com/i"
# base = "https://twitter.com/i"
base
=
"https://api.twitter.com/2/search/adaptive.json"
def
_sanitizeQuery
(
base
,
params
):
def
_sanitizeQuery
(
_url
,
params
):
_serialQuery
=
""
_serialQuery
=
""
for
p
in
params
:
_serialQuery
=
urlencode
(
params
,
quote_via
=
quote
)
_serialQuery
+=
p
[
0
]
+
"="
+
p
[
1
]
+
"&"
_serialQuery
=
_url
+
"?"
+
_serialQuery
_serialQuery
=
base
+
"?"
+
_serialQuery
[:
-
1
]
.
replace
(
":"
,
"
%3
A"
)
.
replace
(
" "
,
"
%20
"
)
return
_serialQuery
return
_serialQuery
def
_formatDate
(
date
):
def
_formatDate
(
date
):
if
"win"
in
platform
:
if
"win"
in
platform
:
return
f
'
\"
{date.split()[0]}
\"
'
return
f
'
\"
{date.split()[0]}
\"
'
...
@@ -20,8 +24,9 @@ def _formatDate(date):
...
@@ -20,8 +24,9 @@ def _formatDate(date):
except
ValueError
:
except
ValueError
:
return
int
(
datetime
.
datetime
.
strptime
(
date
,
"
%
Y-
%
m-
%
d"
)
.
timestamp
())
return
int
(
datetime
.
datetime
.
strptime
(
date
,
"
%
Y-
%
m-
%
d"
)
.
timestamp
())
async
def
Favorites
(
username
,
init
):
async
def
Favorites
(
username
,
init
):
logme
.
debug
(
__name__
+
':Favorites'
)
logme
.
debug
(
__name__
+
':Favorites'
)
url
=
f
"{mobile}/{username}/favorites?lang=en"
url
=
f
"{mobile}/{username}/favorites?lang=en"
if
init
!=
'-1'
:
if
init
!=
'-1'
:
...
@@ -29,8 +34,9 @@ async def Favorites(username, init):
...
@@ -29,8 +34,9 @@ async def Favorites(username, init):
return
url
return
url
async
def
Followers
(
username
,
init
):
async
def
Followers
(
username
,
init
):
logme
.
debug
(
__name__
+
':Followers'
)
logme
.
debug
(
__name__
+
':Followers'
)
url
=
f
"{mobile}/{username}/followers?lang=en"
url
=
f
"{mobile}/{username}/followers?lang=en"
if
init
!=
'-1'
:
if
init
!=
'-1'
:
...
@@ -38,8 +44,9 @@ async def Followers(username, init):
...
@@ -38,8 +44,9 @@ async def Followers(username, init):
return
url
return
url
async
def
Following
(
username
,
init
):
async
def
Following
(
username
,
init
):
logme
.
debug
(
__name__
+
':Following'
)
logme
.
debug
(
__name__
+
':Following'
)
url
=
f
"{mobile}/{username}/following?lang=en"
url
=
f
"{mobile}/{username}/following?lang=en"
if
init
!=
'-1'
:
if
init
!=
'-1'
:
...
@@ -47,8 +54,9 @@ async def Following(username, init):
...
@@ -47,8 +54,9 @@ async def Following(username, init):
return
url
return
url
async
def
MobileProfile
(
username
,
init
):
async
def
MobileProfile
(
username
,
init
):
logme
.
debug
(
__name__
+
':MobileProfile'
)
logme
.
debug
(
__name__
+
':MobileProfile'
)
url
=
f
"{mobile}/{username}?lang=en"
url
=
f
"{mobile}/{username}?lang=en"
if
init
!=
'-1'
:
if
init
!=
'-1'
:
...
@@ -56,8 +64,9 @@ async def MobileProfile(username, init):
...
@@ -56,8 +64,9 @@ async def MobileProfile(username, init):
return
url
return
url
async
def
Profile
(
username
,
init
):
async
def
Profile
(
username
,
init
):
logme
.
debug
(
__name__
+
':Profile'
)
logme
.
debug
(
__name__
+
':Profile'
)
url
=
f
"{base}/profiles/show/{username}/timeline/tweets?include_"
url
=
f
"{base}/profiles/show/{username}/timeline/tweets?include_"
url
+=
"available_features=1&lang=en&include_entities=1"
url
+=
"available_features=1&lang=en&include_entities=1"
url
+=
"&include_new_items_bar=true"
url
+=
"&include_new_items_bar=true"
...
@@ -67,17 +76,38 @@ async def Profile(username, init):
...
@@ -67,17 +76,38 @@ async def Profile(username, init):
return
url
return
url
async
def
Search
(
config
,
init
):
async
def
Search
(
config
,
init
):
logme
.
debug
(
__name__
+
':Search'
)
logme
.
debug
(
__name__
+
':Search'
)
url
=
f
"{base}/search/timeline"
url
=
base
tweet_count
=
100
q
=
""
q
=
""
params
=
[
params
=
[
(
'vertical'
,
'default'
),
# ('include_blocking', '1'),
(
'src'
,
'unkn'
),
# ('include_blocked_by', '1'),
(
'include_available_features'
,
'1'
),
# ('include_followed_by', '1'),
(
'include_entities'
,
'1'
),
# ('include_want_retweets', '1'),
(
'max_position'
,
str
(
init
)),
# ('include_mute_edge', '1'),
(
'reset_error_state'
,
'false'
),
# ('include_can_dm', '1'),
(
'include_can_media_tag'
,
'1'
),
# ('skip_status', '1'),
# ('include_cards', '1'),
(
'include_ext_alt_text'
,
'true'
),
(
'include_quote_count'
,
'true'
),
(
'include_reply_count'
,
'1'
),
(
'tweet_mode'
,
'extended'
),
(
'include_entities'
,
'true'
),
(
'include_user_entities'
,
'true'
),
(
'include_ext_media_availability'
,
'true'
),
(
'send_error_codes'
,
'true'
),
(
'simple_quoted_tweet'
,
'true'
),
(
'count'
,
tweet_count
),
# ('query_source', 'typed_query'),
# ('pc', '1'),
(
'cursor'
,
str
(
init
)),
(
'spelling_corrections'
,
'1'
),
(
'ext'
,
'mediaStats
%2
ChighlightedLabel'
),
(
'tweet_search_mode'
,
'live'
),
# this can be handled better, maybe take an argument and set it then
]
]
if
not
config
.
Popular_tweets
:
if
not
config
.
Popular_tweets
:
params
.
append
((
'f'
,
'tweets'
))
params
.
append
((
'f'
,
'tweets'
))
...
@@ -92,7 +122,8 @@ async def Search(config, init):
...
@@ -92,7 +122,8 @@ async def Search(config, init):
config
.
Geo
=
config
.
Geo
.
replace
(
" "
,
""
)
config
.
Geo
=
config
.
Geo
.
replace
(
" "
,
""
)
q
+=
f
" geocode:{config.Geo}"
q
+=
f
" geocode:{config.Geo}"
if
config
.
Search
:
if
config
.
Search
:
q
+=
f
" {config.Search}"
q
+=
f
"{config.Search}"
if
config
.
Year
:
if
config
.
Year
:
q
+=
f
" until:{config.Year}-1-1"
q
+=
f
" until:{config.Year}-1-1"
if
config
.
Since
:
if
config
.
Since
:
...
@@ -120,6 +151,7 @@ async def Search(config, init):
...
@@ -120,6 +151,7 @@ async def Search(config, init):
q
+=
" filter:media"
q
+=
" filter:media"
if
config
.
Replies
:
if
config
.
Replies
:
q
+=
" filter:replies"
q
+=
" filter:replies"
# although this filter can still be used, but I found it broken in my preliminary testing, needs more testing
if
config
.
Native_retweets
:
if
config
.
Native_retweets
:
q
+=
" filter:nativeretweets"
q
+=
" filter:nativeretweets"
if
config
.
Min_likes
:
if
config
.
Min_likes
:
...
@@ -144,3 +176,43 @@ async def Search(config, init):
...
@@ -144,3 +176,43 @@ async def Search(config, init):
params
.
append
((
"q"
,
q
))
params
.
append
((
"q"
,
q
))
_serialQuery
=
_sanitizeQuery
(
url
,
params
)
_serialQuery
=
_sanitizeQuery
(
url
,
params
)
return
url
,
params
,
_serialQuery
return
url
,
params
,
_serialQuery
# maybe dont need this
async
def
SearchProfile
(
config
,
init
=
None
):
logme
.
debug
(
__name__
+
':SearchProfile'
)
_url
=
'https://api.twitter.com/2/timeline/profile/{}.json?'
q
=
""
params
=
[
(
'include_profile_interstitial_type'
,
'1'
),
(
'include_blocking'
,
'1'
),
(
'include_blocked_by'
,
'1'
),
(
'include_followed_by'
,
'1'
),
(
'include_want_retweets'
,
'1'
),
(
'include_mute_edge'
,
'1'
),
(
'include_can_dm'
,
'1'
),
(
'include_can_media_tag'
,
'1'
),
(
'skip_status'
,
'1'
),
(
'cards_platform'
,
'Web - 12'
),
(
'include_cards'
,
'1'
),
(
'include_ext_alt_text'
,
'true'
),
(
'include_quote_count'
,
'true'
),
(
'include_reply_count'
,
'1'
),
(
'tweet_mode'
,
'extended'
),
(
'include_entities'
,
'true'
),
(
'include_user_entities'
,
'true'
),
(
'include_ext_media_color'
,
'true'
),
(
'include_ext_media_availability'
,
'true'
),
(
'send_error_codes'
,
'true'
),
(
'simple_quoted_tweet'
,
'true'
),
(
'include_tweet_replies'
,
'false'
),
(
'count'
,
'50'
),
(
'userId'
,
'1934388686'
),
(
'ext'
,
'mediaStats,ChighlightedLabel'
),
]
if
init
:
params
.
append
((
'cursor'
,
init
))
_serialQuery
=
_sanitizeQuery
(
_url
,
params
)
return
_url
,
params
,
_serialQuery
pass
twint/user.py
View file @
2d638de0
import
datetime
import
logging
as
logme
import
logging
as
logme
class
user
:
class
User
:
type
=
"user"
type
=
"user"
def
__init__
(
self
):
def
__init__
(
self
):
pass
pass
def
inf
(
ur
,
_type
):
logme
.
debug
(
__name__
+
':inf'
)
try
:
group
=
ur
.
find
(
"div"
,
"profile"
)
if
group
==
None
:
group
=
ur
.
find
(
"div"
,
"user-actions btn-group not-following"
)
if
group
==
None
:
group
=
ur
.
find
(
"div"
,
"user-actions btn-group not-following protected"
)
except
Exception
as
e
:
print
(
"Error: "
+
str
(
e
))
if
_type
==
"id"
:
screen_name
=
group
.
find
(
"span"
,
"screen-name"
)
.
text
ret
=
ur
.
find
(
"a"
,
{
"data-screenname"
:
screen_name
})
ret
=
ret
.
get
(
'data-mentioned-user-id'
)
if
ret
is
not
None
else
None
ret
=
""
if
ret
is
None
else
ret
elif
_type
==
"name"
:
ret
=
group
.
find
(
"div"
,
"fullname"
)
.
text
.
split
(
'
\n
'
)[
0
]
elif
_type
==
"username"
:
ret
=
group
.
find
(
"span"
,
"screen-name"
)
.
text
elif
_type
==
"private"
:
ret
=
group
.
find
(
"div"
,
"protected"
)
if
ret
:
ret
=
1
else
:
ret
=
0
return
ret
def
card
(
ur
,
_type
):
logme
.
debug
(
__name__
+
':card'
)
if
_type
==
"bio"
:
try
:
ret
=
ur
.
find
(
"div"
,
"bio"
)
.
text
.
replace
(
"
\n
"
,
" "
)
.
strip
()
except
:
ret
=
""
elif
_type
==
"location"
:
try
:
ret
=
ur
.
find
(
"div"
,
"location"
)
.
text
except
:
ret
=
""
elif
_type
==
"url"
:
try
:
ret
=
ur
.
find
(
"link"
)[
"href"
]
except
:
ret
=
""
return
ret
def
join
(
ur
):
try
:
logme
.
debug
(
__name__
+
':join'
)
jd
=
ur
.
find
(
"span"
,
"ProfileHeaderCard-joinDateText js-tooltip u-dir"
)[
"title"
]
return
jd
.
split
(
" - "
)
except
:
return
[
""
,
""
]
def
convertToInt
(
x
):
logme
.
debug
(
__name__
+
':contertToInt'
)
multDict
=
{
"k"
:
1000
,
"m"
:
1000000
,
"b"
:
1000000000
,
}
try
:
if
','
in
x
:
x
=
x
.
replace
(
','
,
''
)
y
=
int
(
x
)
return
y
except
:
pass
try
:
y
=
float
(
str
(
x
)[:
-
1
])
y
=
y
*
multDict
[
str
(
x
)[
-
1
:]
.
lower
()]
return
int
(
y
)
except
:
pass
return
0
def
stat
(
ur
,
_type
):
logme
.
debug
(
__name__
+
':stat'
)
stats
=
ur
.
find
(
'table'
,
'profile-stats'
)
stat_dict
=
{}
for
stat
in
stats
.
find_all
(
'td'
,
'stat'
):
statnum
,
statlabel
=
stat
.
text
.
replace
(
'
\n
'
,
''
)
.
replace
(
','
,
''
)
.
split
(
' '
)[:
2
]
stat_dict
[
statlabel
.
lower
()]
=
int
(
statnum
.
replace
(
','
,
''
))
try
:
return
stat_dict
[
_type
]
except
AttributeError
:
return
0
def
media
(
ur
):
logme
.
debug
(
__name__
+
':media'
)
try
:
media_count
=
ur
.
find
(
"a"
,
"PhotoRail-headingWithCount js-nav"
)
.
text
.
strip
()
.
split
(
" "
)[
0
]
return
convertToInt
(
media_count
)
except
:
return
0
def
verified
(
ur
):
logme
.
debug
(
__name__
+
':verified'
)
try
:
is_verified
=
ur
.
find
(
"img"
,
{
"alt"
:
"Verified Account"
})[
'alt'
]
if
"Verified Account"
in
is_verified
:
is_verified
=
1
else
:
is_verified
=
0
except
:
is_verified
=
0
return
is_verified
# ur object must be a json from the endpoint https://api.twitter.com/graphql
def
User
(
ur
):
def
User
(
ur
):
logme
.
debug
(
__name__
+
':User'
)
logme
.
debug
(
__name__
+
':User'
)
u
=
user
()
if
'data'
not
in
ur
and
'user'
not
in
ur
[
'data'
]:
for
img
in
ur
.
findAll
(
"img"
,
"Emoji Emoji--forText"
):
msg
=
'malformed json! cannot be parsed to get user data'
img
.
replaceWith
(
img
[
"alt"
])
logme
.
fatal
(
msg
)
u
.
id
=
inf
(
ur
,
"id"
)
raise
KeyError
(
msg
)
u
.
name
=
inf
(
ur
,
"name"
)
_usr
=
User
()
u
.
username
=
inf
(
ur
,
"username"
)
_usr
.
id
=
ur
[
'data'
][
'user'
][
'rest_id'
]
u
.
bio
=
card
(
ur
,
"bio"
)
_usr
.
name
=
ur
[
'data'
][
'user'
][
'rest_id'
][
'legacy'
][
'name'
]
u
.
location
=
card
(
ur
,
"location"
)
_usr
.
username
=
ur
[
'data'
][
'user'
][
'rest_id'
][
'legacy'
][
'screen_name'
]
u
.
url
=
card
(
ur
,
"url"
)
_usr
.
bio
=
ur
[
'data'
][
'user'
][
'rest_id'
][
'legacy'
][
'description'
]
u
.
join_date
=
join
(
ur
)[
1
]
_usr
.
location
=
ur
[
'data'
][
'user'
][
'rest_id'
][
'legacy'
][
'location'
]
u
.
join_time
=
join
(
ur
)[
0
]
_usr
.
url
=
ur
[
'data'
][
'user'
][
'rest_id'
][
'legacy'
][
'screen_name'
][
'url'
]
u
.
tweets
=
stat
(
ur
,
"tweets"
)
# parsing date to user-friendly format
u
.
following
=
stat
(
ur
,
"following"
)
_dt
=
ur
[
'data'
][
'user'
][
'rest_id'
][
'legacy'
][
'created_at'
]
u
.
followers
=
stat
(
ur
,
"followers"
)
_dt
=
datetime
.
datetime
.
strptime
(
_dt
,
'
%
a
%
b
%
d
%
H:
%
M:
%
S
%
z
%
Y'
)
u
.
likes
=
""
# stat(ur, "favorites")
# date is of the format year,
u
.
media_count
=
""
# media(ur)
_usr
.
join_date
=
_dt
.
strftime
(
'
%
d-
%
m-
%
Y'
)
u
.
is_private
=
inf
(
ur
,
"private"
)
_usr
.
join_time
=
_dt
.
strftime
(
'
%
H:
%
M:
%
S
%
Z'
)
u
.
is_verified
=
verified
(
ur
)
u
.
avatar
=
ur
.
find
(
"img"
,
{
"alt"
:
u
.
name
})[
"src"
]
# :type `int`
#u.background_image = ur.find('div',{'class':'ProfileCanopy-headerBg'}).find('img').get('src')
_usr
.
tweets
=
int
(
ur
[
'data'
][
'user'
][
'rest_id'
][
'legacy'
][
'statuses_count'
])
return
u
_usr
.
following
=
int
(
ur
[
'data'
][
'user'
][
'rest_id'
][
'legacy'
][
'friends_count'
])
_usr
.
followers
=
int
(
ur
[
'data'
][
'user'
][
'rest_id'
][
'legacy'
][
'followers_count'
])
_usr
.
likes
=
int
(
ur
[
'data'
][
'user'
][
'rest_id'
][
'legacy'
][
'favourites_count'
])
_usr
.
media_count
=
int
(
ur
[
'data'
][
'user'
][
'rest_id'
][
'legacy'
][
'media_count'
])
_usr
.
is_private
=
ur
[
'data'
][
'user'
][
'rest_id'
][
'legacy'
][
'protected'
]
_usr
.
is_verified
=
ur
[
'data'
][
'user'
][
'rest_id'
][
'legacy'
][
'verified'
]
_usr
.
avatar
=
ur
[
'data'
][
'user'
][
'rest_id'
][
'legacy'
][
'profile_image_url_https'
]
_usr
.
background_image
=
ur
[
'data'
][
'user'
][
'rest_id'
][
'legacy'
][
'profile_banner_url'
]
# TODO : future implementation
# legacy_extended_profile is also available in some cases which can be used to get DOB of user
return
_usr
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment