in mmf_cli/hm_convert.py [0:0]
def convert(self):
config = self.configuration.get_config()
data_dir = config.env.data_dir
if self.args.mmf_data_folder:
data_dir = self.args.mmf_data_folder
bypass_checksum = False
if self.args.bypass_checksum:
bypass_checksum = bool(self.args.bypass_checksum)
print(f"Data folder is {data_dir}")
print(f"Zip path is {self.args.zip_file}")
base_path = os.path.join(data_dir, "datasets", "hateful_memes", "defaults")
images_path = os.path.join(base_path, "images")
PathManager.mkdirs(images_path)
move_dir = False
if self.args.move:
move_dir = bool(self.args.move)
if not bypass_checksum:
self.checksum(self.args.zip_file, self.POSSIBLE_CHECKSUMS)
src = self.args.zip_file
dest = images_path
if move_dir:
print(f"Moving {src}")
move(src, dest)
else:
print(f"Copying {src}")
copy(src, dest)
print(f"Unzipping {src}")
self.decompress_zip(
dest, fname=os.path.basename(src), password=self.args.password
)
self.assert_files(images_path)
annotations_path = os.path.join(base_path, "annotations")
PathManager.mkdirs(annotations_path)
annotations = self.JSONL_FILES
for annotation in annotations:
print(f"Moving {annotation}")
src = os.path.join(images_path, "data", annotation)
dest = annotations_path
move(src, dest)
images = self.IMAGE_FILES
for image_file in images:
src = os.path.join(images_path, "data", image_file)
if PathManager.exists(src):
print(f"Moving {image_file}")
else:
continue
dest = images_path
move(src, dest)
if src.endswith(".tar.gz"):
decompress(dest, fname=image_file, delete_original=False)