in tseval/utils/resources_utils.py [0:0]
def extract(filepath, output_dir):
# Infer extract method based on extension
extensions_to_methods = {
'.tar.gz': untar,
'.tar.bz2': untar,
'.tgz': untar,
'.zip': unzip,
'.gz': ungzip,
'.bz2': unbz2,
}
def get_extension(filename, extensions):
possible_extensions = [ext for ext in extensions if filename.endswith(ext)]
if len(possible_extensions) == 0:
raise Exception(f'File {filename} has an unknown extension')
# Take the longest (.tar.gz should take precedence over .gz)
return max(possible_extensions, key=lambda ext: len(ext))
filename = os.path.basename(filepath)
extension = get_extension(filename, list(extensions_to_methods))
extract_method = extensions_to_methods[extension]
# Extract files in a temporary dir then move the extracted item back to
# the ouput dir in order to get the details of what was extracted
tmp_extract_dir = tempfile.mkdtemp()
# Extract
extract_method(filepath, output_dir=tmp_extract_dir)
extracted_items = os.listdir(tmp_extract_dir)
output_paths = []
for name in extracted_items:
extracted_path = os.path.join(tmp_extract_dir, name)
output_path = os.path.join(output_dir, name)
move_with_overwrite(extracted_path, output_path)
output_paths.append(output_path)
return output_paths