in clouddq-migration/dataplex.py [0:0]
def unzip_and_read_yaml(bucket_name, zip_file_name) -> dict:
'''
Method to unzip and read the dq yaml spec files
'''
# Initialize a client for Google Cloud Storage
storage_client = storage.Client()
# Get the bucket and the blob (zip file)
bucket = storage_client.bucket(bucket_name)
blob = bucket.blob(zip_file_name)
# Download the zip file's contents into memory
zip_data = BytesIO(blob.download_as_bytes())
# Variable to store all YAML data
yaml_data = {}
# Open the zip file from memory
with zipfile.ZipFile(zip_data, 'r') as zip_ref:
# Iterate over each file in the zip archive
for file_name in zip_ref.namelist():
# Only process .yaml or .yml files
if file_name.endswith('.yaml') or file_name.endswith('.yml'):
# Read the content of the YAML file
with zip_ref.open(file_name) as file:
file_data = yaml.safe_load(file)
for key, value in file_data.items():
if key in yaml_data.keys():
value.update(yaml_data[key])
yaml_data[key] = value
else:
yaml_data.update(file_data)
return yaml_data