in sdlf-utils/pipeline-examples/glue-jobs-deployer/pipeline_scripts/examplepipeline-glue-job.py [0:0]
def __init__(self, startDateStr='', endDateStr='', startYearStr='', endYearStr='', startMonthStr='', endMonthStr='',
startDayStr='', endDayStr='', targetTable='',
targetTableBucketLocation='', targetTablePathLocation='', targetTableFQDN='',
sourcePartitionYearField='',
sourcePartitionMonthField='', sourcePartitionDayField='',
maskedTargetDatabase='', maskedTargetTableBucketLocation='', columns2mask='',
maskedTargetTableFQDN='', targetDatabase='', redshiftOutputTable='', executeRedshiftExport=False,
outputToRedshift=False, redshiftCatalogConnection = '', logger=None, jobName='', logPath='',
processStartTime='', useControlTable=False, sendCloudWatchLogs=False,
cloudwatchLogGroup='SparkTransform', dynamoOutputTable = '',
hasDtPartition=False, partitionValues={},dynamoOutputNumParallelTasks=None,
redshiftTempBucket='', targetPartitionDtField='', sourcePartitionDtField='',
redshiftDatabase='', createViews=True,
useAnalyticsSecLayer=True, redshiftColumnsToExport='', dropDuplicates=False,
sourcesDict={}, reprocessDates=[], processDatesCondition='', datePartitionsSeparated=False,
isCdcTable=False, cdcTableKey='', jobId=0, executeSparkTransform=True, writeAsTable=True,
cdcDatePartitionsToProcess=[], sparkPartitions=0, outputFormat='parquet',
redshiftTempFormat='CSV GZIP', outputToDynamo=False, executeDynamoExport=False, env='dev',
datasetName=''):
# FW PROPERTIES
self.startDateStr = startDateStr
self.endDateStr = endDateStr
self.startYearStr = startYearStr
self.endYearStr = endYearStr
self.startMonthStr = startMonthStr
self.endMonthStr = endMonthStr
self.startDayStr = startDayStr
self.endDayStr = endDayStr
self.targetDatabase = targetDatabase
self.targetTable = targetTable
self.targetTableFQDN = targetTableFQDN
self.env = env
self.targetTableBucketLocation = targetTableBucketLocation
self.targetTablePathLocation = targetTablePathLocation
self.targetPartitionDtField = targetPartitionDtField
self.sourcePartitionDtField = sourcePartitionDtField
self.sourcePartitionYearField = sourcePartitionYearField
self.sourcePartitionMonthField = sourcePartitionMonthField
self.sourcePartitionDayField = sourcePartitionDayField
self.maskedTargetDatabase = maskedTargetDatabase
self.maskedTargetTableBucketLocation = maskedTargetTableBucketLocation
self.maskedTargetTableFQDN = maskedTargetTableFQDN
self.columns2mask = columns2mask
self.redshiftOutputTable = redshiftOutputTable
self.executeRedshiftExport = executeRedshiftExport
self.outputToRedshift = outputToRedshift
self.redshiftTempBucket = redshiftTempBucket
self.redshiftDatabase = redshiftDatabase
self.redshiftColumnsToExport = redshiftColumnsToExport
self.redshiftTempFormat = redshiftTempFormat
self.redshiftCatalogConnection = redshiftCatalogConnection
self.logger = logger
self.jobName = jobName
self.datasetName = datasetName
self.logPath = logPath
self.endDateStr = endDateStr
self.processStartTime = processStartTime
self.useControlTable = useControlTable
self.sendCloudWatchLogs = sendCloudWatchLogs
self.cloudwatchLogGroup = cloudwatchLogGroup
self.hasDtPartition = hasDtPartition
self.partitionValues = partitionValues
self.createViews = createViews
self.useAnalyticsSecLayer = useAnalyticsSecLayer
self.sourcesDict = sourcesDict
self.reprocessDates = reprocessDates
self.processDatesCondition = processDatesCondition
self.datePartitionsSeparated = datePartitionsSeparated
self.isCdcTable = isCdcTable
self.dropDuplicates = dropDuplicates
self.cdcTableKey = cdcTableKey
self.jobId = jobId
self.cdcDatePartitionsToProcess = cdcDatePartitionsToProcess
self.sparkPartitions = sparkPartitions
self.dynamoOutputTable = dynamoOutputTable
self.dynamoOutputNumParallelTasks = dynamoOutputNumParallelTasks
self.outputToDynamo = outputToDynamo
self.executeDynamoExport = executeDynamoExport
self.executeSparkTransform = executeSparkTransform
self.writeAsTable = writeAsTable
self.outputFormat = outputFormat