private def getProfilingExprs()

in measure/src/main/scala/org/apache/griffin/measure/execution/impl/ProfilingMeasure.scala [236:276]


  private def getProfilingExprs(
      field: StructField,
      roundScale: Int,
      approxDistinctCount: Boolean,
      dataSetSample: Double): Seq[Column] = {
    val colName = field.name
    val colType = field.dataType

    val column = col(colName)
    val lengthColExpr = col(lengthColFn(colName))
    val nullColExpr = col(nullsInColFn(colName))
    val (distinctCountName, distinctCountExpr) =
      if (approxDistinctCount) {
        (
          lit(s"$ApproxPrefix$DistinctCount"),
          approx_count_distinct(column).as(s"$ApproxPrefix$DistinctCount"))
      } else {
        (lit(DistinctCount), countDistinct(column).as(DistinctCount))
      }

    val distinctExpr = if (dataSetSample == 1) {
      Seq(lit(distinctCountName), distinctCountExpr)
    } else Nil

    Seq(
      Seq(lit(DataTypeStr), lit(colType.catalogString).as(DataTypeStr)),
      Seq(lit(Total), sum(lit(1)).as(Total)),
      Seq(lit(MinColLength), min(lengthColExpr).as(MinColLength)),
      Seq(lit(MaxColLength), max(lengthColExpr).as(MaxColLength)),
      Seq(lit(AvgColLength), avg(lengthColExpr).as(AvgColLength)),
      Seq(lit(Min), forNumericFn(colType, min(column), Min)),
      Seq(lit(Max), forNumericFn(colType, max(column), Max)),
      Seq(lit(Avg), forNumericFn(colType, bround(avg(column), roundScale), Avg)),
      Seq(
        lit(StdDeviation),
        forNumericFn(colType, bround(stddev(column), roundScale), StdDeviation)),
      Seq(lit(Variance), forNumericFn(colType, bround(variance(column), roundScale), Variance)),
      Seq(lit(Kurtosis), forNumericFn(colType, bround(kurtosis(column), roundScale), Kurtosis)),
      distinctExpr,
      Seq(lit(NullCount), sum(nullColExpr).as(NullCount))).flatten
  }