def nest_data_frame()

in utilities/Crawler_undo_redo/src/scripts_utils.py [0:0]


def nest_data_frame(data_frame, database_name, entity_type):
    if entity_type.startswith("table"):
        # Entity is a table
        return data_frame.agg(collect_list(struct("*"))).withColumnRenamed(COLLECT_RESULT_NAME, "items").withColumn("database",lit(database_name)).withColumn("type", lit(entity_type))
    elif entity_type.startswith("partition"):
        # Entity is a partition
        return data_frame.groupBy('tableName').agg(collect_list(struct("*"))).withColumnRenamed(COLLECT_RESULT_NAME, "items").withColumn("database",lit(database_name)).withColumn("type", lit(entity_type)).withColumnRenamed("tableName","table")
    elif entity_type.startswith("database"):
        return data_frame.groupBy().agg(collect_list(struct("*"))).withColumnRenamed(COLLECT_RESULT_NAME, "items").withColumn("type", lit(entity_type))
    else:
        raise Exception("entity_type %s is not recognized, your backup data may be corrupted..." % entity_type)