def unzip_and_read_yaml()

in clouddq-migration/dataplex.py [0:0]


def unzip_and_read_yaml(bucket_name, zip_file_name) -> dict:
    '''
        Method to unzip and read the dq yaml spec files
    '''

    # Initialize a client for Google Cloud Storage
    storage_client = storage.Client()

    # Get the bucket and the blob (zip file)
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(zip_file_name)

    # Download the zip file's contents into memory
    zip_data = BytesIO(blob.download_as_bytes())

    # Variable to store all YAML data
    yaml_data = {}

    # Open the zip file from memory
    with zipfile.ZipFile(zip_data, 'r') as zip_ref:
        # Iterate over each file in the zip archive
        for file_name in zip_ref.namelist():
            # Only process .yaml or .yml files
            if file_name.endswith('.yaml') or file_name.endswith('.yml'):
                # Read the content of the YAML file
                with zip_ref.open(file_name) as file:
                    file_data = yaml.safe_load(file)
                    for key, value in file_data.items():
                        if key in yaml_data.keys():
                            value.update(yaml_data[key])
                            yaml_data[key] = value
                        else:
                            yaml_data.update(file_data)
    return yaml_data