def deleteOldData()

in sdlf-utils/pipeline-examples/glue-jobs-deployer/pipeline_scripts/examplepipeline-glue-job.py [0:0]


def deleteOldData(targetTableBucketLocation):
    """Drops data directly in s3 to overwrite insert into operations in partitioned tables
    """
    # spark.sql("DROP TABLE IF EXISTS " + props.targetTableFQDN )
    # spark.sql("TRUNCATE TABLE " + props.targetTableFQDN )
    log.info('Table Bucket:::' + targetTableBucketLocation)

    partitions = []
    partitionToDelete = ''
    # KillAndFill table
    if not props.hasDtPartition and len(props.partitionValues) == 0:
        pathToDelete = props.targetTablePathLocation
        deleteBucketPath(pathToDelete, targetTableBucketLocation)
    else:
        select = " show partitions " + props.targetTableFQDN
        partitions = spark.sql(select).rdd.map(lambda x: x[0]).collect()
    # Incremental table
    # Case 1 - Only has dt partition
    if props.hasDtPartition and len(props.partitionValues) == 0:
        datePartitions = getDateRangePartitions(props)
        if props.isCdcTable:
            datePartitions = [props.targetPartitionDtField + "=" + datePartition for datePartition in
                              props.cdcDatePartitionsToProcess]
        for partitionToDelete in datePartitions:
            # if the table has other partition different than 1 date partitions, but are asking only for the date partition
            if len(tablePartitionFields) > 1:
                partitions = list(set(map(lambda x: x.split("/")[0], partitions)))
            if partitionToDelete in partitions:
                pathToDelete = props.targetTablePathLocation + "/" + partitionToDelete
                deleteBucketPath(pathToDelete, targetTableBucketLocation)
    # Case 2 - doesn't have date partition but has other partitions.  Assumption: you can ask only for one value per partition level.
    elif not props.hasDtPartition and len(props.partitionValues) > 0:
        for partitionFieldNameToDelete in props.partitionValues:
            partitionToDelete += partitionFieldNameToDelete + "=" + props.partitionValues[
                partitionFieldNameToDelete] + "/"
        partitionToDelete = partitionToDelete[:-1]
        if partitionToDelete in partitions:
            pathToDelete = props.targetTablePathLocation + "/" + partitionToDelete
            deleteBucketPath(pathToDelete, targetTableBucketLocation)
    # Case 3 - have date partition and other partitions. Assumption: date partition is the first partition level. for the other partitions, you can ask only for one value per partition level.
    elif props.hasDtPartition and len(props.partitionValues) > 0:
        for dtPartitionToDelete in getDateRangePartitions(props):
            for partitionFieldNameToDelete in props.partitionValues:
                partitionToDelete += partitionFieldNameToDelete + "=" + props.partitionValues[
                    partitionFieldNameToDelete] + "/"
            partitionToDelete = partitionToDelete[:-1]
            completePartition = dtPartitionToDelete + "/" + partitionToDelete

            if completePartition in partitions:
                pathToDelete = props.targetTablePathLocation + "/" + completePartition
                deleteBucketPath(pathToDelete, targetTableBucketLocation)
            partitionToDelete = ''