def extract()

in tseval/utils/resources_utils.py [0:0]


def extract(filepath, output_dir):
    # Infer extract method based on extension
    extensions_to_methods = {
        '.tar.gz': untar,
        '.tar.bz2': untar,
        '.tgz': untar,
        '.zip': unzip,
        '.gz': ungzip,
        '.bz2': unbz2,
    }

    def get_extension(filename, extensions):
        possible_extensions = [ext for ext in extensions if filename.endswith(ext)]
        if len(possible_extensions) == 0:
            raise Exception(f'File {filename} has an unknown extension')
        # Take the longest (.tar.gz should take precedence over .gz)
        return max(possible_extensions, key=lambda ext: len(ext))

    filename = os.path.basename(filepath)
    extension = get_extension(filename, list(extensions_to_methods))
    extract_method = extensions_to_methods[extension]

    # Extract files in a temporary dir then move the extracted item back to
    # the ouput dir in order to get the details of what was extracted
    tmp_extract_dir = tempfile.mkdtemp()
    # Extract
    extract_method(filepath, output_dir=tmp_extract_dir)
    extracted_items = os.listdir(tmp_extract_dir)
    output_paths = []
    for name in extracted_items:
        extracted_path = os.path.join(tmp_extract_dir, name)
        output_path = os.path.join(output_dir, name)
        move_with_overwrite(extracted_path, output_path)
        output_paths.append(output_path)
    return output_paths