def readGlueTablesFromDynamoDB()

in src/main/scala/deequ/deequ-suggestion-runner.scala [101:123]


  def readGlueTablesFromDynamoDB(dynoTable: String): DataFrame = {

    var jobConf_add = new JobConf(spark.sparkContext.hadoopConfiguration)
    jobConf_add.set("dynamodb.input.tableName", dynoTable)
    jobConf_add.set("mapred.output.format.class", "org.apache.hadoop.dynamodb.write.DynamoDBOutputFormat")
    jobConf_add.set("mapred.input.format.class", "org.apache.hadoop.dynamodb.read.DynamoDBInputFormat")

    var hadoopRDD = sparkContext.hadoopRDD(jobConf_add, classOf[DynamoDBInputFormat], classOf[Text], classOf[DynamoDBItemWritable])

    val glueRDD: RDD[(String, String, String)] = hadoopRDD.map {
      case (text, dbwritable) => (dbwritable.getItem().get("database").toString(),
        dbwritable.getItem().get("tablename").toString(),
        dbwritable.getItem().get("enable").toString())
    }

    glueRDD.toDF
      .withColumn("database", col_extractValue($"_1"))
      .withColumn("tablename", col_extractValue($"_2"))
      .withColumn("enable", col_extractValue($"_3"))
      .select("database", "tablename", "enable")
      .where($"enable" === "Y")

  }