def generateColumn()

in spark/src/main/scala/org/apache/comet/testing/ParquetGenerator.scala [122:235]


  def generateColumn(
      r: Random,
      dataType: DataType,
      numRows: Int,
      options: DataGenOptions): Seq[Any] = {
    dataType match {
      case ArrayType(elementType, _) =>
        val values = generateColumn(r, elementType, numRows, options)
        val list = ListBuffer[Any]()
        for (i <- 0 until numRows) {
          if (i % 10 == 0 && options.allowNull) {
            list += null
          } else {
            list += Range(0, r.nextInt(5)).map(j => values((i + j) % values.length)).toArray
          }
        }
        list.toSeq
      case StructType(fields) =>
        val values = fields.map(f => generateColumn(r, f.dataType, numRows, options))
        Range(0, numRows).map(i => Row(values.indices.map(j => values(j)(i)): _*))
      case MapType(keyType, valueType, _) =>
        val mapOptions = options.copy(allowNull = false)
        val k = generateColumn(r, keyType, numRows, mapOptions)
        val v = generateColumn(r, valueType, numRows, mapOptions)
        k.zip(v).map(x => Map(x._1 -> x._2))
      case DataTypes.BooleanType =>
        generateColumn(r, DataTypes.LongType, numRows, options)
          .map(_.asInstanceOf[Long].toShort)
          .map(s => s % 2 == 0)
      case DataTypes.ByteType =>
        generateColumn(r, DataTypes.LongType, numRows, options)
          .map(_.asInstanceOf[Long].toByte)
      case DataTypes.ShortType =>
        generateColumn(r, DataTypes.LongType, numRows, options)
          .map(_.asInstanceOf[Long].toShort)
      case DataTypes.IntegerType =>
        generateColumn(r, DataTypes.LongType, numRows, options)
          .map(_.asInstanceOf[Long].toInt)
      case DataTypes.LongType =>
        Range(0, numRows).map(_ => {
          r.nextInt(50) match {
            case 0 if options.allowNull => null
            case 1 => 0L
            case 2 => Byte.MinValue.toLong
            case 3 => Byte.MaxValue.toLong
            case 4 => Short.MinValue.toLong
            case 5 => Short.MaxValue.toLong
            case 6 => Int.MinValue.toLong
            case 7 => Int.MaxValue.toLong
            case 8 => Long.MinValue
            case 9 => Long.MaxValue
            case _ => r.nextLong()
          }
        })
      case DataTypes.FloatType =>
        Range(0, numRows).map(_ => {
          r.nextInt(20) match {
            case 0 if options.allowNull => null
            case 1 => Float.NegativeInfinity
            case 2 => Float.PositiveInfinity
            case 3 => Float.MinValue
            case 4 => Float.MaxValue
            case 5 => 0.0f
            case 6 if options.generateNegativeZero => -0.0f
            case _ => r.nextFloat()
          }
        })
      case DataTypes.DoubleType =>
        Range(0, numRows).map(_ => {
          r.nextInt(20) match {
            case 0 if options.allowNull => null
            case 1 => Double.NegativeInfinity
            case 2 => Double.PositiveInfinity
            case 3 => Double.MinValue
            case 4 => Double.MaxValue
            case 5 => 0.0
            case 6 if options.generateNegativeZero => -0.0
            case _ => r.nextDouble()
          }
        })
      case dt: DecimalType =>
        Range(0, numRows).map(_ =>
          new BigDecimal(r.nextDouble()).setScale(dt.scale, RoundingMode.HALF_UP))
      case DataTypes.StringType =>
        Range(0, numRows).map(_ => {
          r.nextInt(10) match {
            case 0 if options.allowNull => null
            case 1 => r.nextInt().toByte.toString
            case 2 => r.nextLong().toString
            case 3 => r.nextDouble().toString
            case 4 => RandomStringUtils.randomAlphabetic(8)
            case _ => r.nextString(8)
          }
        })
      case DataTypes.BinaryType =>
        generateColumn(r, DataTypes.StringType, numRows, options)
          .map {
            case x: String =>
              x.getBytes(Charset.defaultCharset())
            case _ =>
              null
          }
      case DataTypes.DateType =>
        Range(0, numRows).map(_ => new java.sql.Date(options.baseDate + r.nextInt()))
      case DataTypes.TimestampType =>
        Range(0, numRows).map(_ => new Timestamp(options.baseDate + r.nextInt()))
      case DataTypes.TimestampNTZType =>
        Range(0, numRows).map(_ =>
          LocalDateTime.ofInstant(
            Instant.ofEpochMilli(options.baseDate + r.nextInt()),
            ZoneId.systemDefault()))
      case _ => throw new IllegalStateException(s"Cannot generate data for $dataType yet")
    }
  }