in sdlf-utils/pipeline-examples/glue-jobs-deployer/pipeline_scripts/examplepipeline-glue-job.py [0:0]
def fillProperties(self, config):
"""Fills this object holder with json configs
"""
# COMMON FRAMEWORK PROPERTIES
sourceKeys = [key for key in config.keys() if key.startswith("source_database")]
sourceValues = [config.get(key) for key in config.keys() if key.startswith("source_database")]
for index, value in enumerate(sourceKeys, start=0):
self.sourcesDict[sourceKeys[index]] = sourceValues[index]
if config.get('target_database') is not None:
self.targetDatabase = str(config.get('target_database'))
self.targetTable = str(config.get('target_table'))
else:
self.targetTable =''
self.targetTableBucketLocation = str(config.get('target_table_bucket_location'))
self.targetTablePathLocation = str(config.get('target_table_path_location'))
self.targetTableFQDN = self.targetDatabase + "." + self.targetTable
if config.get('source_partition_year_field') is not None:
self.sourcePartitionYearField = str(config.get('source_partition_year_field'))
self.sourcePartitionMonthField = str(config.get('source_partition_month_field'))
self.sourcePartitionDayField = str(config.get('source_partition_day_field'))
self.datePartitionsSeparated = True
elif config.get('source_partition_dt_field') is not None:
self.sourcePartitionDtField = str(config.get('source_partition_dt_field'))
self.datePartitionsSeparated = False
if config.get('target_partition_dt_field') is not None:
self.hasDtPartition = True
self.targetPartitionDtField = str(config.get('target_partition_dt_field'))
else:
self.hasDtPartition = False
self.processControlTable = str(config.get('process_control_table_name'))
if config.get('execute_spark_transform') is not None:
self.executeSparkTransform = str(config.get('execute_spark_transform')) == 'True'
if config.get('write_as_table') is not None:
self.writeAsTable = str(config.get('write_as_table')) == 'True'
else:
self.writeAsTable = True
if config.get('output_format') is not None:
self.outputFormat = str(config.get('output_format'))
else:
self.outputFormat = 'parquet'
if config.get('spark_partitions_number') is not None:
self.sparkPartitions = int(config.get('spark_partitions_number'))
if config.get('use_analytics_sec_layer') is not None:
self.useAnalyticsSecLayer = str(config.get('use_analytics_sec_layer')) == 'True'
# DYNAMODB OUTPUT PROPERTIES
if config.get('execute_dynamo_export') is not None:
self.executeDynamoExport = str(config.get('execute_dynamo_export')) == 'True'
if self.executeDynamoExport:
self.dynamoOutputTable = str(config.get('dynamo_output_table'))
self.dynamoKey = str(config.get('dynamo_key'))
self.outputToDynamo = True
self.dynamoOutputNumParallelTasks = str(config.get('dynamo_output_num_parallel_tasks'))
# REDSHIFT OUTPUT PROPERTIES
if config.get('execute_redshift_export') is not None:
self.executeRedshiftExport = str(config.get('execute_redshift_export')) == 'True'
if self.executeRedshiftExport:
self.redshiftOutputTable = str(config.get('redshift_output_table'))
self.outputToRedshift = True
self.redshiftTempBucket = str(config.get('redshift_temp_bucket'))
self.redshiftDatabase = str(config.get('redshift_database'))
if config.get('redshift_columns_to_export') is not None:
self.redshiftColumnsToExport = str(config.get('redshift_columns_to_export'))
if config.get('redshift_temp_format') is not None:
self.redshiftTempFormat = str(config.get('redshift_temp_format'))
if config.get('redshift_catalog_connection') is not None:
self.redshiftCatalogConnection = str(config.get('redshift_catalog_connection'))
if config.get('is_cdc_table') is not None:
self.isCdcTable = str(config.get('is_cdc_table')) == 'True'
self.cdcTableKey = str(config.get('cdc_table_key'))
if config.get('drop_duplicates') is not None:
self.dropDuplicates = str(config.get('drop_duplicates')) == 'True'
# MASKET OUTPUT
self.maskedTargetDatabase = str(config.get('masked_target_database'))
self.maskedTargetTableFQDN = self.maskedTargetDatabase + "." + self.targetTable
self.maskedTargetTableBucketLocation = str(config.get('masked_target_table_bucket_location'))
self.columns2mask = str(config.get('columns2mask'))