in src/package/dataplexutils/metadata/wizard.py [0:0]
def _get_tables_from_uri(self, documentation_csv_uri):
"""Reads the CSV file from Google Cloud Storage and returns the tables.
Args:
documentation_csv_uri: The URI of the CSV file in Google Cloud Storage.
Returns:
A list of tables.
Raises:
Exception: If there is an error reading the CSV file.
"""
try:
# Create a client to interact with Google Cloud Storage
storage_client = storage.Client()
# Get the bucket and blob names from the URI
bucket_name, blob_name = documentation_csv_uri.split("/", 3)[2:]
# Get the bucket and blob objects
bucket = storage_client.get_bucket(bucket_name)
blob = bucket.blob(blob_name)
# Download the CSV file as a string
csv_data = blob.download_as_text()
# Split the CSV data into lines
lines = csv_data.split("\n")
# Remove any empty lines
lines = [line for line in lines if line.strip()]
# Extract the table names from the lines
tables = [(line.split(",")[0], line.split(",")[1].strip()) for line in lines]
#logger.info(f"Tables extracted from CSV: {tables}")
for table in tables:
logger.info(f"Table: {table[0]} doc: {table[1]}")
return tables
except Exception as e:
logger.error(f"Exception: {e}.")
raise e