in src/main/scala/deequ/deequ-suggestion-runner.scala [62:94]
def main(sysArgs: Array[String]) {
//***********************************************************************//
// Step1: Create Glue Context and extract Args
//***********************************************************************//
val args = GlueArgParser.getResolvedOptions(sysArgs, Seq("JOB_NAME",
"inputDynamoTable",
"outputDynamoTable",
"targetBucketName",
"targetBucketPrefix").toArray)
Job.init(args("JOB_NAME"), glueContext, args.asJava)
//***********************************************************************//
// Step2: Get the list of Glue Tables from DynamoDB table
//***********************************************************************//
val glueTableListDF = readGlueTablesFromDynamoDB(args("inputDynamoTable"))
//***********************************************************************//
// Step3: Read each Glue table from dyanmo DB, run suggestions and write to Dynamo and S3
//***********************************************************************//
glueTableListDF.collect().foreach {
row =>
var glueDB = row.mkString(",").split(",")(0)
var glueTable = row.mkString(",").split(",")(1)
val glueTableDF = loadGlueTable(glueDB, glueTable)
val finalSuggestionsDF = processSuggestions(glueTableDF, glueDB, glueTable)
writeSuggestionsToDynamo(finalSuggestionsDF, args("outputDynamoTable"))
writeDStoS3(finalSuggestionsDF, glueDB, glueTable, args("targetBucketName"), args("targetBucketPrefix"))
}
Job.commit()
}