Commit 7b770017 authored by novelailab's avatar novelailab

Revert open() changes, fix and switch to build()

parent 8a038aec
......@@ -199,8 +199,8 @@ class ImageDatasetBuilder():
self.folder_path.mkdir(parents=True, exist_ok=True)
if self.open_dataset:
dataset = open(self.dataset_path, mode="ab+")
dataset.flush()
self.dataset = open(self.dataset_path, mode="ab+")
self.dataset.flush()
if self.open_index:
self.index = []
......@@ -218,31 +218,22 @@ class ImageDatasetBuilder():
self.flush_metadata(silent=True)
print("Dataset closed and flushed.")
append_mode = False
if self.open_dataset and self.dataset_path.is_file():
self.dataset = open(self.dataset_path, mode="ab+")
append_mode = True
elif self.open_dataset:
self.dataset = open(self.dataset_path, mode="wb")
else:
raise Exception("Dataset file not found at {}".format(self.dataset_path))
if self.open_index and self.index_path.is_file():
with open(self.index_path, 'rb') as f:
self.index = pickle.load(f)
elif append_mode:
raise Exception("Index file not found at {}".format(self.index_path))
else:
self.index = []
raise Exception("Index file not found at {}".format(self.index_path))
if self.open_metadata and self.metadata_path.is_file():
with open(self.metadata_path, 'rb') as f:
self.metadata = pickle.load(f)
elif append_mode:
raise Exception("Metadata file not found at {}".format(self.metadata_path))
else:
self.metadata = {}
raise Exception("Metadata file not found at {}".format(self.metadata_path))
def operate(self, operation, batch, identities, metadata=None, executor=concurrent.futures.ThreadPoolExecutor, use_tqdm=False, **kwargs):
executor = executor(max_workers=self.threads)
......
......@@ -60,7 +60,7 @@ print("Copyng old db data...")
# detect block size of fs the archive is stored on
block_size = int(os.popen("getconf PAGE_SIZE").read().lstrip().rstrip()) #int(os.popen("stat -fc %s " + new_dataset_folder).read().lstrip().rstrip())
new_dataset = ImageDatasetBuilder(folder_path=new_dataset_folder, name="danbooru_updated", threads=32, block_size=block_size, align_fs_blocks=True)
new_dataset.open()
new_dataset.build()
# how many operations to run at once
copy_chunk_size = 4096
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment