train-cnn/Container/scripts/normalize.py (29 lines of code) (raw):

import os from torchvision import transforms from PIL import Image import zipfile t = transforms.Compose([ transforms.Resize(size=(128,128)), transforms.Grayscale(), transforms.RandomHorizontalFlip(p=0.5), transforms.RandomVerticalFlip(p=0.5) ]) batch_task_index = int(os.environ["BATCH_TASK_INDEX"]) data_root = os.environ["DATA_ROOT"] extract_root = "/tmp" normalized_data_root = f"/tmp/bin{batch_task_index}" os.system(f"mkdir -p {os.path.join(normalized_data_root, 'train')}") training_data_archive = os.path.join(data_root, "train.zip") bin_size = int(os.environ["BIN_SIZE"]) first = batch_task_index * bin_size with zipfile.ZipFile(training_data_archive, "r") as inzf, zipfile.ZipFile(f"bin{batch_task_index}.zip", "w") as outzf: for idx in range(first, first + bin_size): for cat_or_dog in [ "cat", "dog" ]: filename = f"train/{cat_or_dog}.{idx}.jpg" inzf.extract(filename, path=extract_root) in_image = Image.open(os.path.join(extract_root, filename)) out_image = t(in_image) tmp_path = os.path.join(normalized_data_root, filename) out_image.save(tmp_path) outzf.write(tmp_path, arcname=f"train/{cat_or_dog}/{idx}.jpg") os.system(f"cp bin{batch_task_index}.zip {data_root}")