in src/main/scala/com/amazon/deequ/schema/RowLevelSchemaValidator.scala [225:281]
private[this] def toCNF(schema: RowLevelSchema): Column = {
schema.columnDefinitions.foldLeft(expr(true.toString)) { case (cnf, columnDefinition) =>
var nextCnf = cnf
if (!columnDefinition.isNullable) {
nextCnf = nextCnf.and(col(columnDefinition.name).isNotNull)
}
val colIsNull = col(columnDefinition.name).isNull
columnDefinition match {
case intDef: IntColumnDefinition =>
val colAsInt = col(intDef.name).cast(IntegerType)
/* null or successfully casted */
nextCnf = nextCnf.and(colIsNull.or(colAsInt.isNotNull))
intDef.minValue.foreach { value =>
nextCnf = nextCnf.and(colIsNull.isNull.or(colAsInt.geq(value)))
}
intDef.maxValue.foreach { value =>
nextCnf = nextCnf.and(colIsNull.or(colAsInt.leq(value)))
}
case decDef: DecimalColumnDefinition =>
val decType = DataTypes.createDecimalType(decDef.precision, decDef.scale)
nextCnf = nextCnf.and(colIsNull.or(col(decDef.name).cast(decType).isNotNull))
case strDef: StringColumnDefinition =>
strDef.minLength.foreach { value =>
nextCnf = nextCnf.and(colIsNull.or(length(col(strDef.name)).geq(value)))
}
strDef.maxLength.foreach { value =>
nextCnf = nextCnf.and(colIsNull.or(length(col(strDef.name)).leq(value)))
}
strDef.matches.foreach { regex =>
nextCnf = nextCnf
.and(colIsNull.or(regexp_extract(col(strDef.name), regex, 0).notEqual("")))
}
case tsDef: TimestampColumnDefinition =>
/* null or successfully casted */
nextCnf = nextCnf.and(colIsNull.or(unix_timestamp(col(tsDef.name), tsDef.mask)
.cast(TimestampType).isNotNull))
}
nextCnf
}
}