def main()

in src/main/scala/deequ/deequ-suggestion-runner.scala [62:94]


  def main(sysArgs: Array[String]) {

    //***********************************************************************//
    // Step1: Create Glue Context and extract Args
    //***********************************************************************//
    val args = GlueArgParser.getResolvedOptions(sysArgs, Seq("JOB_NAME",
      "inputDynamoTable",
      "outputDynamoTable",
      "targetBucketName",
      "targetBucketPrefix").toArray)
    Job.init(args("JOB_NAME"), glueContext, args.asJava)

    //***********************************************************************//
    // Step2: Get the list of Glue Tables from DynamoDB table
    //***********************************************************************//
    val glueTableListDF = readGlueTablesFromDynamoDB(args("inputDynamoTable"))

    //***********************************************************************//
    // Step3: Read each Glue table from dyanmo DB, run suggestions and write to Dynamo and S3
    //***********************************************************************//
    glueTableListDF.collect().foreach {
      row =>
        var glueDB = row.mkString(",").split(",")(0)
        var glueTable = row.mkString(",").split(",")(1)
        val glueTableDF = loadGlueTable(glueDB, glueTable)
        val finalSuggestionsDF = processSuggestions(glueTableDF, glueDB, glueTable)
        writeSuggestionsToDynamo(finalSuggestionsDF, args("outputDynamoTable"))
        writeDStoS3(finalSuggestionsDF, glueDB, glueTable, args("targetBucketName"), args("targetBucketPrefix"))
    }

    Job.commit()

  }