in utilities/Hive_metastore_migration/src/export_from_datacatalog.py [0:0]
def read_databases_from_catalog(sql_context, glue_context, datacatalog_name, database_arr, region):
databases = None
tables = None
partitions = None
for database in database_arr:
dyf = glue_context.create_dynamic_frame.from_options(
connection_type=CONNECTION_TYPE_NAME,
connection_options={'catalog.name': datacatalog_name,
'catalog.database': database,
'catalog.region': region})
df = transform_catalog_to_df(dyf)
# filter into databases, tables, and partitions
dc_databases_no_schema = df.where('type = "database"')
dc_tables_no_schema = df.where('type = "table"')
dc_partitions_no_schema = df.where('type = "partition"')
# apply schema to dataframes
(dc_databases, dc_tables, dc_partitions) = \
change_schemas(sql_context, dc_databases_no_schema, dc_tables_no_schema, dc_partitions_no_schema)
(a_databases, a_tables, a_partitions) = \
transform_items_to_item(dc_databases=dc_databases, dc_tables=dc_tables, dc_partitions=dc_partitions)
databases = databases.union(a_databases) if databases else a_databases
tables = tables.union(a_tables) if tables else a_tables
partitions = partitions.union(a_partitions) if partitions else a_partitions
return (databases, tables, partitions)