def schema_of

def schema_of_xml()

in sql/api/src/main/scala/org/apache/spark/sql/functions.scala [7874:8615]
210 lines of code
1 McCabe index (conditional complexity)

  def schema_of_xml(xml: String): Column = schema_of_xml(lit(xml))

  /**
   * Parses a XML string and infers its schema in DDL format.
   *
   * @param xml
   *   a foldable string column containing a XML string.
   * @group xml_funcs
   * @since 4.0.0
   */
  def schema_of_xml(xml: Column): Column = Column.fn("schema_of_xml", xml)

  // scalastyle:off line.size.limit

  /**
   * Parses a XML string and infers its schema in DDL format using options.
   *
   * @param xml
   *   a foldable string column containing XML data.
   * @param options
   *   options to control how the xml is parsed. accepts the same options and the XML data source.
   *   See <a href=
   *   "https://spark.apache.org/docs/latest/sql-data-sources-xml.html#data-source-option"> Data
   *   Source Option</a> in the version you use.
   * @return
   *   a column with string literal containing schema in DDL format.
   * @group xml_funcs
   * @since 4.0.0
   */
  // scalastyle:on line.size.limit
  def schema_of_xml(xml: Column, options: java.util.Map[String, String]): Column =
    Column.fnWithOptions("schema_of_xml", options.asScala.iterator, xml)

  // scalastyle:off line.size.limit

  /**
   * (Java-specific) Converts a column containing a `StructType` into a XML string with the
   * specified schema. Throws an exception, in the case of an unsupported type.
   *
   * @param e
   *   a column containing a struct.
   * @param options
   *   options to control how the struct column is converted into a XML string. It accepts the
   *   same options as the XML data source. See <a href=
   *   "https://spark.apache.org/docs/latest/sql-data-sources-xml.html#data-source-option"> Data
   *   Source Option</a> in the version you use.
   * @group xml_funcs
   * @since 4.0.0
   */
  // scalastyle:on line.size.limit
  def to_xml(e: Column, options: java.util.Map[String, String]): Column =
    Column.fnWithOptions("to_xml", options.asScala.iterator, e)

  /**
   * Converts a column containing a `StructType` into a XML string with the specified schema.
   * Throws an exception, in the case of an unsupported type.
   *
   * @param e
   *   a column containing a struct.
   * @group xml_funcs
   * @since 4.0.0
   */
  def to_xml(e: Column): Column = to_xml(e, Map.empty[String, String].asJava)

  /**
   * (Java-specific) A transform for timestamps and dates to partition data into years.
   *
   * @group partition_transforms
   * @since 3.0.0
   */
  def years(e: Column): Column = partitioning.years(e)

  /**
   * (Java-specific) A transform for timestamps and dates to partition data into months.
   *
   * @group partition_transforms
   * @since 3.0.0
   */
  def months(e: Column): Column = partitioning.months(e)

  /**
   * (Java-specific) A transform for timestamps and dates to partition data into days.
   *
   * @group partition_transforms
   * @since 3.0.0
   */
  def days(e: Column): Column = partitioning.days(e)

  /**
   * Returns a string array of values within the nodes of xml that match the XPath expression.
   *
   * @group xml_funcs
   * @since 3.5.0
   */
  def xpath(xml: Column, path: Column): Column =
    Column.fn("xpath", xml, path)

  /**
   * Returns true if the XPath expression evaluates to true, or if a matching node is found.
   *
   * @group xml_funcs
   * @since 3.5.0
   */
  def xpath_boolean(xml: Column, path: Column): Column =
    Column.fn("xpath_boolean", xml, path)

  /**
   * Returns a double value, the value zero if no match is found, or NaN if a match is found but
   * the value is non-numeric.
   *
   * @group xml_funcs
   * @since 3.5.0
   */
  def xpath_double(xml: Column, path: Column): Column =
    Column.fn("xpath_double", xml, path)

  /**
   * Returns a double value, the value zero if no match is found, or NaN if a match is found but
   * the value is non-numeric.
   *
   * @group xml_funcs
   * @since 3.5.0
   */
  def xpath_number(xml: Column, path: Column): Column =
    Column.fn("xpath_number", xml, path)

  /**
   * Returns a float value, the value zero if no match is found, or NaN if a match is found but
   * the value is non-numeric.
   *
   * @group xml_funcs
   * @since 3.5.0
   */
  def xpath_float(xml: Column, path: Column): Column =
    Column.fn("xpath_float", xml, path)

  /**
   * Returns an integer value, or the value zero if no match is found, or a match is found but the
   * value is non-numeric.
   *
   * @group xml_funcs
   * @since 3.5.0
   */
  def xpath_int(xml: Column, path: Column): Column =
    Column.fn("xpath_int", xml, path)

  /**
   * Returns a long integer value, or the value zero if no match is found, or a match is found but
   * the value is non-numeric.
   *
   * @group xml_funcs
   * @since 3.5.0
   */
  def xpath_long(xml: Column, path: Column): Column =
    Column.fn("xpath_long", xml, path)

  /**
   * Returns a short integer value, or the value zero if no match is found, or a match is found
   * but the value is non-numeric.
   *
   * @group xml_funcs
   * @since 3.5.0
   */
  def xpath_short(xml: Column, path: Column): Column =
    Column.fn("xpath_short", xml, path)

  /**
   * Returns the text contents of the first xml node that matches the XPath expression.
   *
   * @group xml_funcs
   * @since 3.5.0
   */
  def xpath_string(xml: Column, path: Column): Column =
    Column.fn("xpath_string", xml, path)

  /**
   * (Java-specific) A transform for timestamps to partition data into hours.
   *
   * @group partition_transforms
   * @since 3.0.0
   */
  def hours(e: Column): Column = partitioning.hours(e)

  /**
   * Converts the timestamp without time zone `sourceTs` from the `sourceTz` time zone to
   * `targetTz`.
   *
   * @param sourceTz
   *   the time zone for the input timestamp. If it is missed, the current session time zone is
   *   used as the source time zone.
   * @param targetTz
   *   the time zone to which the input timestamp should be converted.
   * @param sourceTs
   *   a timestamp without time zone.
   * @group datetime_funcs
   * @since 3.5.0
   */
  def convert_timezone(sourceTz: Column, targetTz: Column, sourceTs: Column): Column =
    Column.fn("convert_timezone", sourceTz, targetTz, sourceTs)

  /**
   * Converts the timestamp without time zone `sourceTs` from the current time zone to `targetTz`.
   *
   * @param targetTz
   *   the time zone to which the input timestamp should be converted.
   * @param sourceTs
   *   a timestamp without time zone.
   * @group datetime_funcs
   * @since 3.5.0
   */
  def convert_timezone(targetTz: Column, sourceTs: Column): Column =
    Column.fn("convert_timezone", targetTz, sourceTs)

  /**
   * Make DayTimeIntervalType duration from days, hours, mins and secs.
   *
   * @group datetime_funcs
   * @since 3.5.0
   */
  def make_dt_interval(days: Column, hours: Column, mins: Column, secs: Column): Column =
    Column.fn("make_dt_interval", days, hours, mins, secs)

  /**
   * Make DayTimeIntervalType duration from days, hours and mins.
   *
   * @group datetime_funcs
   * @since 3.5.0
   */
  def make_dt_interval(days: Column, hours: Column, mins: Column): Column =
    Column.fn("make_dt_interval", days, hours, mins)

  /**
   * Make DayTimeIntervalType duration from days and hours.
   *
   * @group datetime_funcs
   * @since 3.5.0
   */
  def make_dt_interval(days: Column, hours: Column): Column =
    Column.fn("make_dt_interval", days, hours)

  /**
   * Make DayTimeIntervalType duration from days.
   *
   * @group datetime_funcs
   * @since 3.5.0
   */
  def make_dt_interval(days: Column): Column =
    Column.fn("make_dt_interval", days)

  /**
   * Make DayTimeIntervalType duration.
   *
   * @group datetime_funcs
   * @since 3.5.0
   */
  def make_dt_interval(): Column =
    Column.fn("make_dt_interval")

  /**
   * This is a special version of `make_interval` that performs the same operation, but returns a
   * NULL value instead of raising an error if interval cannot be created.
   *
   * @group datetime_funcs
   * @since 4.0.0
   */
  def try_make_interval(
      years: Column,
      months: Column,
      weeks: Column,
      days: Column,
      hours: Column,
      mins: Column,
      secs: Column): Column =
    Column.fn("try_make_interval", years, months, weeks, days, hours, mins, secs)

  /**
   * Make interval from years, months, weeks, days, hours, mins and secs.
   *
   * @group datetime_funcs
   * @since 3.5.0
   */
  def make_interval(
      years: Column,
      months: Column,
      weeks: Column,
      days: Column,
      hours: Column,
      mins: Column,
      secs: Column): Column =
    Column.fn("make_interval", years, months, weeks, days, hours, mins, secs)

  /**
   * This is a special version of `make_interval` that performs the same operation, but returns a
   * NULL value instead of raising an error if interval cannot be created.
   *
   * @group datetime_funcs
   * @since 4.0.0
   */
  def try_make_interval(
      years: Column,
      months: Column,
      weeks: Column,
      days: Column,
      hours: Column,
      mins: Column): Column =
    Column.fn("try_make_interval", years, months, weeks, days, hours, mins)

  /**
   * Make interval from years, months, weeks, days, hours and mins.
   *
   * @group datetime_funcs
   * @since 3.5.0
   */
  def make_interval(
      years: Column,
      months: Column,
      weeks: Column,
      days: Column,
      hours: Column,
      mins: Column): Column =
    Column.fn("make_interval", years, months, weeks, days, hours, mins)

  /**
   * This is a special version of `make_interval` that performs the same operation, but returns a
   * NULL value instead of raising an error if interval cannot be created.
   *
   * @group datetime_funcs
   * @since 4.0.0
   */
  def try_make_interval(
      years: Column,
      months: Column,
      weeks: Column,
      days: Column,
      hours: Column): Column =
    Column.fn("try_make_interval", years, months, weeks, days, hours)

  /**
   * Make interval from years, months, weeks, days and hours.
   *
   * @group datetime_funcs
   * @since 3.5.0
   */
  def make_interval(
      years: Column,
      months: Column,
      weeks: Column,
      days: Column,
      hours: Column): Column =
    Column.fn("make_interval", years, months, weeks, days, hours)

  /**
   * This is a special version of `make_interval` that performs the same operation, but returns a
   * NULL value instead of raising an error if interval cannot be created.
   *
   * @group datetime_funcs
   * @since 4.0.0
   */
  def try_make_interval(years: Column, months: Column, weeks: Column, days: Column): Column =
    Column.fn("try_make_interval", years, months, weeks, days)

  /**
   * Make interval from years, months, weeks and days.
   *
   * @group datetime_funcs
   * @since 3.5.0
   */
  def make_interval(years: Column, months: Column, weeks: Column, days: Column): Column =
    Column.fn("make_interval", years, months, weeks, days)

  /**
   * This is a special version of `make_interval` that performs the same operation, but returns a
   * NULL value instead of raising an error if interval cannot be created.
   *
   * @group datetime_funcs
   * @since 4.0.0
   */
  def try_make_interval(years: Column, months: Column, weeks: Column): Column =
    Column.fn("try_make_interval", years, months, weeks)

  /**
   * Make interval from years, months and weeks.
   *
   * @group datetime_funcs
   * @since 3.5.0
   */
  def make_interval(years: Column, months: Column, weeks: Column): Column =
    Column.fn("make_interval", years, months, weeks)

  /**
   * This is a special version of `make_interval` that performs the same operation, but returns a
   * NULL value instead of raising an error if interval cannot be created.
   *
   * @group datetime_funcs
   * @since 4.0.0
   */
  def try_make_interval(years: Column, months: Column): Column =
    Column.fn("try_make_interval", years, months)

  /**
   * Make interval from years and months.
   *
   * @group datetime_funcs
   * @since 3.5.0
   */
  def make_interval(years: Column, months: Column): Column =
    Column.fn("make_interval", years, months)

  /**
   * This is a special version of `make_interval` that performs the same operation, but returns a
   * NULL value instead of raising an error if interval cannot be created.
   *
   * @group datetime_funcs
   * @since 4.0.0
   */
  def try_make_interval(years: Column): Column =
    Column.fn("try_make_interval", years)

  /**
   * Make interval from years.
   *
   * @group datetime_funcs
   * @since 3.5.0
   */
  def make_interval(years: Column): Column =
    Column.fn("make_interval", years)

  /**
   * Make interval.
   *
   * @group datetime_funcs
   * @since 3.5.0
   */
  def make_interval(): Column =
    Column.fn("make_interval")

  /**
   * Create timestamp from years, months, days, hours, mins, secs and timezone fields. The result
   * data type is consistent with the value of configuration `spark.sql.timestampType`. If the
   * configuration `spark.sql.ansi.enabled` is false, the function returns NULL on invalid inputs.
   * Otherwise, it will throw an error instead.
   *
   * @group datetime_funcs
   * @since 3.5.0
   */
  def make_timestamp(
      years: Column,
      months: Column,
      days: Column,
      hours: Column,
      mins: Column,
      secs: Column,
      timezone: Column): Column =
    Column.fn("make_timestamp", years, months, days, hours, mins, secs, timezone)

  /**
   * Create timestamp from years, months, days, hours, mins and secs fields. The result data type
   * is consistent with the value of configuration `spark.sql.timestampType`. If the configuration
   * `spark.sql.ansi.enabled` is false, the function returns NULL on invalid inputs. Otherwise, it
   * will throw an error instead.
   *
   * @group datetime_funcs
   * @since 3.5.0
   */
  def make_timestamp(
      years: Column,
      months: Column,
      days: Column,
      hours: Column,
      mins: Column,
      secs: Column): Column =
    Column.fn("make_timestamp", years, months, days, hours, mins, secs)

  /**
   * Try to create a timestamp from years, months, days, hours, mins, secs and timezone fields.
   * The result data type is consistent with the value of configuration `spark.sql.timestampType`.
   * The function returns NULL on invalid inputs.
   *
   * @group datetime_funcs
   * @since 4.0.0
   */
  def try_make_timestamp(
      years: Column,
      months: Column,
      days: Column,
      hours: Column,
      mins: Column,
      secs: Column,
      timezone: Column): Column =
    Column.fn("try_make_timestamp", years, months, days, hours, mins, secs, timezone)

  /**
   * Try to create a timestamp from years, months, days, hours, mins, and secs fields. The result
   * data type is consistent with the value of configuration `spark.sql.timestampType`. The
   * function returns NULL on invalid inputs.
   *
   * @group datetime_funcs
   * @since 4.0.0
   */
  def try_make_timestamp(
      years: Column,
      months: Column,
      days: Column,
      hours: Column,
      mins: Column,
      secs: Column): Column =
    Column.fn("try_make_timestamp", years, months, days, hours, mins, secs)

  /**
   * Create the current timestamp with local time zone from years, months, days, hours, mins, secs
   * and timezone fields. If the configuration `spark.sql.ansi.enabled` is false, the function
   * returns NULL on invalid inputs. Otherwise, it will throw an error instead.
   *
   * @group datetime_funcs
   * @since 3.5.0
   */
  def make_timestamp_ltz(
      years: Column,
      months: Column,
      days: Column,
      hours: Column,
      mins: Column,
      secs: Column,
      timezone: Column): Column =
    Column.fn("make_timestamp_ltz", years, months, days, hours, mins, secs, timezone)

  /**
   * Create the current timestamp with local time zone from years, months, days, hours, mins and
   * secs fields. If the configuration `spark.sql.ansi.enabled` is false, the function returns
   * NULL on invalid inputs. Otherwise, it will throw an error instead.
   *
   * @group datetime_funcs
   * @since 3.5.0
   */
  def make_timestamp_ltz(
      years: Column,
      months: Column,
      days: Column,
      hours: Column,
      mins: Column,
      secs: Column): Column =
    Column.fn("make_timestamp_ltz", years, months, days, hours, mins, secs)

  /**
   * Try to create the current timestamp with local time zone from years, months, days, hours,
   * mins, secs and timezone fields. The function returns NULL on invalid inputs.
   *
   * @group datetime_funcs
   * @since 4.0.0
   */
  def try_make_timestamp_ltz(
      years: Column,
      months: Column,
      days: Column,
      hours: Column,
      mins: Column,
      secs: Column,
      timezone: Column): Column =
    Column.fn("try_make_timestamp_ltz", years, months, days, hours, mins, secs, timezone)

  /**
   * Try to create the current timestamp with local time zone from years, months, days, hours,
   * mins and secs fields. The function returns NULL on invalid inputs.
   *
   * @group datetime_funcs
   * @since 4.0.0
   */
  def try_make_timestamp_ltz(
      years: Column,
      months: Column,
      days: Column,
      hours: Column,
      mins: Column,
      secs: Column): Column =
    Column.fn("try_make_timestamp_ltz", years, months, days, hours, mins, secs)

  /**
   * Create local date-time from years, months, days, hours, mins, secs fields. If the
   * configuration `spark.sql.ansi.enabled` is false, the function returns NULL on invalid inputs.
   * Otherwise, it will throw an error instead.
   *
   * @group datetime_funcs
   * @since 3.5.0
   */
  def make_timestamp_ntz(
      years: Column,
      months: Column,
      days: Column,
      hours: Column,
      mins: Column,
      secs: Column): Column =
    Column.fn("make_timestamp_ntz", years, months, days, hours, mins, secs)

  /**
   * Try to create a local date-time from years, months, days, hours, mins, secs fields. The
   * function returns NULL on invalid inputs.
   *
   * @group datetime_funcs
   * @since 4.0.0
   */
  def try_make_timestamp_ntz(
      years: Column,
      months: Column,
      days: Column,
      hours: Column,
      mins: Column,
      secs: Column): Column =
    Column.fn("try_make_timestamp_ntz", years, months, days, hours, mins, secs)

  /**
   * Make year-month interval from years, months.
   *
   * @group datetime_funcs
   * @since 3.5.0
   */
  def make_ym_interval(years: Column, months: Column): Column =
    Column.fn("make_ym_interval", years, months)

  /**
   * Make year-month interval from years.
   *
   * @group datetime_funcs
   * @since 3.5.0
   */
  def make_ym_interval(years: Column): Column = Column.fn("make_ym_interval", years)

  /**
   * Make year-month interval.
   *
   * @group datetime_funcs
   * @since 3.5.0
   */
  def make_ym_interval(): Column = Column.fn("make_ym_interval")

  /**
   * (Java-specific) A transform for any type that partitions by a hash of the input column.
   *
   * @group partition_transforms
   * @since 3.0.0
   */
  def bucket(numBuckets: Column, e: Column): Column = partitioning.bucket(numBuckets, e)

  /**
   * (Java-specific) A transform for any type that partitions by a hash of the input column.
   *
   * @group partition_transforms
   * @since 3.0.0
   */
  def bucket(numBuckets: Int, e: Column): Column = partitioning.bucket(numBuckets, e)

  //////////////////////////////////////////////////////////////////////////////////////////////
  // Predicates functions
  //////////////////////////////////////////////////////////////////////////////////////////////

  /**
   * Returns `col2` if `col1` is null, or `col1` otherwise.
   *
   * @group conditional_funcs
   * @since 3.5.0
   */
  def ifnull(col1: Column, col2: Column): Column = Column.fn("ifnull", col1, col2)

  /**
   * Returns true if `col` is not null, or false otherwise.
   *
   * @group predicate_funcs
   * @since 3.5.0
   */
  def isnotnull(col: Column): Column = Column.fn("isnotnull", col)

  /**
   * Returns same result as the EQUAL(=) operator for non-null operands, but returns true if both
   * are null, false if one of the them is null.
   *
   * @group predicate_funcs
   * @since 3.5.0
   */
  def equal_null(col1: Column, col2: Column): Column = Column.fn("equal_null", col1, col2)

  /**
   * Returns null if `col1` equals to `col2`, or `col1` otherwise.
   *
   * @group conditional_funcs
   * @since 3.5.0
   */
  def nullif(col1: Column, col2: Column): Column = Column.fn("nullif", col1, col2)

  /**
   * Returns null if `col` is equal to zero, or `col` otherwise.
   *
   * @group conditional_funcs
   * @since 4.0.0
   */
  def nullifzero(col: Column): Column = Column.fn("nullifzero", col)

  /**
   * Returns `col2` if `col1` is null, or `col1` otherwise.
   *
   * @group conditional_funcs
   * @since 3.5.0
   */
  def nvl(col1: Column, col2: Column): Column = Column.fn("nvl", col1, col2)

  /**
   * Returns `col2` if `col1` is not null, or `col3` otherwise.
   *
   * @group conditional_funcs
   * @since 3.5.0
   */
  def nvl2(col1: Column, col2: Column, col3: Column): Column = Column.fn("nvl2", col1, col2, col3)

  /**
   * Returns zero if `col` is null, or `col` otherwise.
   *
   * @group conditional_funcs
   * @since 4.0.0
   */
  def zeroifnull(col: Column): Column = Column.fn("zeroifnull", col)

  // scalastyle:off line.size.limit
  // scalastyle:off parameter.number

  /* Use the following code to generate:

  (0 to 10).foreach { x =>
    val types = (1 to x).foldRight("RT")((i, s) => s"A$i, $s")
    val typeSeq = "RT" +: (1 to x).map(i => s"A$i")
    val typeTags = typeSeq.map(t => s"$t: TypeTag").mkString(", ")
    val implicitTypeTags = typeSeq.map(t => s"implicitly[TypeTag[$t]]").mkString(", ")
    println(s"""
      |/**
      | * Defines a Scala closure of $x arguments as user-defined function (UDF).
      | * The data types are automatically inferred based on the Scala closure's
      | * signature. By default the returned UDF is deterministic. To change it to
      | * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
      | *
      | * @group udf_funcs
      | * @since 1.3.0
      | */
      |def udf[$typeTags](f: Function$x[$types]): UserDefinedFunction = {
      |  SparkUserDefinedFunction(f, $implicitTypeTags)
      |}""".stripMargin)