def parse_workflow_zip_file()

in packages/cdk/lib/wes_adapter/amazon_genomics/wes/adapters/CromwellWESAdapter.py [0:0]


def parse_workflow_zip_file(file, workflow_type):
    """
    Processes a workflow zip bundle

    :param file: String or Path-like path to a workflow.zip file
    :param workflow_type: String, type of workflow to expect (e.g. "wdl")

    :rtype: dict of `data` and `files`

    If the zip only contains a single file, that file is set as `workflowSource`

    If the zip contains multiple files with a MANIFEST.json file, the MANIFEST is used to determine
    appropriate `data` and `file` arguments. (See: parse_workflow_manifest_file())

    If the zip contains multiple files without a MANIFEST.json file:
      * a `main` workflow file with an extension matching the workflow_type is expected and will be set as `workflowSource`
      * optionally, if `inputs*.json` files are found in the root level of the zip, they will be set as `workflowInputs(_\d)*` in the order they are found
      * optionally, if an `options.json` file is found in the root level of the zip, it will be set as `workflowOptions`

    If the zip contains multiple files, the original zip is set as `workflowDependencies`
    """
    data = dict()
    files = dict()

    wd = path.dirname(file)
    with zipfile.ZipFile(file) as zip:
        zip.extractall(wd)

        contents = zip.namelist()
        if not contents:
            raise RuntimeError("empty workflow.zip")

        if len(contents) == 1:
            # single file workflow
            files["workflowSource"] = open(path.join(wd, contents[0]), "rb")

        else:
            # multifile workflow
            if "MANIFEST.json" in contents:
                props = parse_workflow_manifest_file(path.join(wd, "MANIFEST.json"))

                if props.get("data"):
                    data.update(props.get("data"))

                if props.get("files"):
                    files.update(props.get("files"))

            else:
                if not f"main.{workflow_type.lower()}" in contents:
                    raise RuntimeError(f"'main.{workflow_type}' file not found")

                files["workflowSource"] = open(
                    path.join(wd, f"main.{workflow_type.lower()}"), "rb"
                )

                input_files = [f for f in contents if f.startswith("inputs")]
                if input_files:
                    if not files.get("workflowInputFiles"):
                        files["workflowInputFiles"] = []

                    for input_file in input_files:
                        files[f"workflowInputFiles"] += [
                            open(path.join(wd, input_file), "rb")
                        ]

                if "options.json" in contents:
                    files["workflowOptions"] = open(path.join(wd, "options.json"), "rb")

            # add the original zip bundle as a workflow dependencies file
            files["workflowDependencies"] = open(file, "rb")

    return {"data": data, "files": files}