Commit 690dd2cf authored by Eren Doğan's avatar Eren Doğan Committed by GitHub

more dataset stuff

parent fb134d28
......@@ -8,6 +8,8 @@ from simplejpeg import decode_jpeg
import simplejpeg
import pickle
from pathlib import Path
from PIL import Image
import requests
# Does this work with other block_sizes? doesn't seem to.
class FbDataset(data.Dataset):
......@@ -214,8 +216,9 @@ class ImageDatasetBuilder():
else:
raise Exception("Metadata file not found at {}".format(self.metadata_path))
def operate(self, operation, batch, identities, metadata=None):
executor = concurrent.futures.ThreadPoolExecutor(max_workers=self.threads)
def operate(self, operation, batch, identities, metadata=None, executor=concurrent.futures.ThreadPoolExecutor):
executor = executor(max_workers=self.threads)
futures = executor.map(operation, batch)
futures = list(futures)
......@@ -224,16 +227,33 @@ class ImageDatasetBuilder():
def encode_op(self, data):
if simplejpeg.is_jpeg(data):
pass
try:
simplejpeg.decode(data)
except:
return None
else:
data = Image.open(io.BytesIO(image_data))
data = simplejpeg.encode_jpeg(data, quality=91)
return data
def url_op(self, url):
result = requests.get(url)
for _ in range(5):
if result.status_code == 200:
break
data = result.content
data = self.encode_op(data)
return data
def write(self, data, identity, metadata=None, flush=False):
if self.is_close:
raise Exception("Dataset not built")
if data == None:
return
self.dataset.write(data)
self.index.append([self.dataset.tell(), len(data), identity])
if self.metadata and metadata:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment