Commit dad308db authored by novelailab's avatar novelailab

Danbooru id as identity

parent 7b770017
......@@ -66,7 +66,8 @@ new_dataset.build()
copy_chunk_size = 4096
for e in tqdm(range(0, len(can_keep_list), copy_chunk_size)):
chunk = can_keep_list[e:e+copy_chunk_size]
new_dataset.operate(lambda id: old_dataset.read_from_id(id, decode=False), chunk, [ all_metadata[e] for e in chunk ], use_tqdm=True)
new_dataset.operate(lambda id: old_dataset.read_from_id(id, decode=False), chunk, chunk, use_tqdm=True)
new_dataset.flush()
new_dataset.flush_index()
new_dataset.flush_metadata()
......@@ -84,7 +85,7 @@ def download_danbooru(id):
save_every = 25
for e in tqdm(range(0, len(to_scrape), copy_chunk_size)):
chunk = to_scrape[e:e+copy_chunk_size]
new_dataset.operate(download_danbooru, chunk, [ all_metadata[e] for e in chunk ], use_tqdm=True)
new_dataset.operate(download_danbooru, chunk, chunk, use_tqdm=True)
if (e // copy_chunk_size) % save_every == 0:
new_dataset.flush()
new_dataset.flush_index()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment