Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
T
Twint
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Packages
Packages
List
Container Registry
Analytics
Analytics
CI / CD
Code Review
Insights
Issues
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nanahira
Twint
Commits
0709e143
Commit
0709e143
authored
Aug 12, 2019
by
Francesco Poldi
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Updated storing structures
parent
894655fd
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
44 additions
and
11 deletions
+44
-11
twint/storage/db.py
twint/storage/db.py
+21
-2
twint/storage/elasticsearch.py
twint/storage/elasticsearch.py
+9
-4
twint/storage/panda.py
twint/storage/panda.py
+4
-1
twint/storage/write_meta.py
twint/storage/write_meta.py
+10
-4
No files found.
twint/storage/db.py
View file @
0709e143
...
...
@@ -88,13 +88,27 @@ def init(db):
CREATE TABLE IF NOT EXISTS
retweets(
user_id integer not null,
username text not null,
tweet_id integer not null,
retweet_id integer not null,
CONSTRAINT retweets_pk PRIMARY KEY(user_id, tweet_id),
CONSTRAINT user_id_fk FOREIGN KEY(user_id) REFERENCES users(id),
CONSTRAINT tweet_id_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id)
);
"""
cursor
.
execute
(
table_retweets
)
table_reply_to
=
"""
CREATE TABLE IF NOT EXISTS
replies(
tweet_id integer not null,
user_id integer not null,
username text not null,
CONSTRAINT replies_pk PRIMARY KEY (user_id, tweet_id),
CONSTRAINT tweet_id_fk FOREIGN KEY (tweet_id) REFERENCES tweets(id)
);
"""
cursor
.
execute
(
table_reply_to
)
table_favorites
=
"""
CREATE TABLE IF NOT EXISTS
...
...
@@ -256,8 +270,13 @@ def tweets(conn, Tweet, config):
cursor
.
execute
(
query
,
(
config
.
User_id
,
Tweet
.
id
))
if
Tweet
.
retweet
:
query
=
'INSERT INTO retweets VALUES(?,?)'
cursor
.
execute
(
query
,
(
config
.
User_id
,
Tweet
.
id
))
query
=
'INSERT INTO retweets VALUES(?,?,?,?)'
cursor
.
execute
(
query
,
(
int
(
Tweet
.
user_rt_id
),
Tweet
.
user_rt
,
Tweet
.
id
,
int
(
Tweet
.
retweet_id
)))
if
Tweet
.
reply_to
:
for
reply
in
Tweet
.
reply_to
:
query
=
'INSERT INTO replies VALUES(?,?,?)'
cursor
.
execute
(
query
,
(
Tweet
.
id
,
int
(
reply
[
'user_id'
]),
reply
[
'username'
]))
conn
.
commit
()
except
sqlite3
.
IntegrityError
:
...
...
twint/storage/elasticsearch.py
View file @
0709e143
...
...
@@ -66,7 +66,6 @@ def createIndex(config, instance, **scope):
"tweet"
:
{
"type"
:
"text"
},
"hashtags"
:
{
"type"
:
"keyword"
},
"cashtags"
:
{
"type"
:
"keyword"
},
"user_id"
:
{
"type"
:
"long"
},
"user_id_str"
:
{
"type"
:
"keyword"
},
"username"
:
{
"type"
:
"keyword"
},
"name"
:
{
"type"
:
"text"
},
...
...
@@ -86,9 +85,12 @@ def createIndex(config, instance, **scope):
"geo_near"
:
{
"type"
:
"geo_point"
},
"geo_tweet"
:
{
"type"
:
"geo_point"
},
"photos"
:
{
"type"
:
"text"
},
"user_rt_id"
:
{
"type"
:
"
integer
"
},
"user_rt_id"
:
{
"type"
:
"
keyword
"
},
"mentions"
:
{
"type"
:
"keyword"
},
"source"
:
{
"type"
:
"keyword"
}
"source"
:
{
"type"
:
"keyword"
},
"user_rt"
:
{
"type"
:
"keyword"
},
"retweet_id"
:
{
"type"
:
"keyword"
},
"reply_to"
:
{
"type"
:
"nested"
}
}
},
"settings"
:
{
...
...
@@ -203,7 +205,6 @@ def Tweet(Tweet, config):
"tweet"
:
Tweet
.
tweet
,
"hashtags"
:
Tweet
.
hashtags
,
"cashtags"
:
Tweet
.
cashtags
,
"user_id"
:
Tweet
.
user_id
,
"user_id_str"
:
Tweet
.
user_id_str
,
"username"
:
Tweet
.
username
,
"name"
:
Tweet
.
name
,
...
...
@@ -223,6 +224,10 @@ def Tweet(Tweet, config):
}
if
Tweet
.
retweet
:
j_data
[
"_source"
]
.
update
({
"user_rt_id"
:
Tweet
.
user_rt_id
})
j_data
[
"_source"
]
.
update
({
"user_rt"
:
Tweet
.
user_rt
})
j_data
[
"_source"
]
.
update
({
"retweet_id"
:
Tweet
.
retweet_id
})
if
Tweet
.
reply_to
:
j_data
[
"_source"
]
.
update
({
"reply_to"
:
Tweet
.
reply_to
})
if
Tweet
.
photos
:
_photos
=
[]
for
photo
in
Tweet
.
photos
:
...
...
twint/storage/panda.py
View file @
0709e143
...
...
@@ -92,7 +92,10 @@ def update(object, config):
"near"
:
Tweet
.
near
,
"geo"
:
Tweet
.
geo
,
"source"
:
Tweet
.
source
,
"user_rt_id"
:
Tweet
.
user_rt_id
"user_rt_id"
:
Tweet
.
user_rt_id
,
"user_rt"
:
Tweet
.
user_rt
,
"retweet_id"
:
Tweet
.
retweet_id
,
"reply_to"
:
Tweet
.
reply_to
}
_object_blocks
[
_type
]
.
append
(
_data
)
elif
_type
==
"user"
:
...
...
twint/storage/write_meta.py
View file @
0709e143
...
...
@@ -23,10 +23,13 @@ def tweetData(t):
"retweet"
:
t
.
retweet
,
"quote_url"
:
t
.
quote_url
,
"video"
:
t
.
video
,
"user_rt_id"
:
t
.
user_rt_id
,
"near"
:
t
.
near
,
"geo"
:
t
.
geo
,
"source"
:
t
.
source
"source"
:
t
.
source
,
"user_rt_id"
:
t
.
user_rt_id
,
"user_rt"
:
t
.
user_rt
,
"retweet_id"
:
t
.
retweet_id
,
"reply_to"
:
t
.
reply_to
}
return
data
...
...
@@ -55,10 +58,13 @@ def tweetFieldnames():
"retweet"
,
"quote_url"
,
"video"
,
"user_rt_id"
,
"near"
,
"geo"
,
"source"
"source"
,
"user_rt_id"
,
"user_rt"
,
"retweet_id"
,
"reply_to"
]
return
fieldnames
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment