in utilities/Crawler_undo_redo/src/scripts_utils.py [0:0]
def nest_data_frame(data_frame, database_name, entity_type):
if entity_type.startswith("table"):
# Entity is a table
return data_frame.agg(collect_list(struct("*"))).withColumnRenamed(COLLECT_RESULT_NAME, "items").withColumn("database",lit(database_name)).withColumn("type", lit(entity_type))
elif entity_type.startswith("partition"):
# Entity is a partition
return data_frame.groupBy('tableName').agg(collect_list(struct("*"))).withColumnRenamed(COLLECT_RESULT_NAME, "items").withColumn("database",lit(database_name)).withColumn("type", lit(entity_type)).withColumnRenamed("tableName","table")
elif entity_type.startswith("database"):
return data_frame.groupBy().agg(collect_list(struct("*"))).withColumnRenamed(COLLECT_RESULT_NAME, "items").withColumn("type", lit(entity_type))
else:
raise Exception("entity_type %s is not recognized, your backup data may be corrupted..." % entity_type)