in sql/api/src/main/scala/org/apache/spark/sql/functions.scala [7874:8615]
def schema_of_xml(xml: String): Column = schema_of_xml(lit(xml))
/**
* Parses a XML string and infers its schema in DDL format.
*
* @param xml
* a foldable string column containing a XML string.
* @group xml_funcs
* @since 4.0.0
*/
def schema_of_xml(xml: Column): Column = Column.fn("schema_of_xml", xml)
// scalastyle:off line.size.limit
/**
* Parses a XML string and infers its schema in DDL format using options.
*
* @param xml
* a foldable string column containing XML data.
* @param options
* options to control how the xml is parsed. accepts the same options and the XML data source.
* See <a href=
* "https://spark.apache.org/docs/latest/sql-data-sources-xml.html#data-source-option"> Data
* Source Option</a> in the version you use.
* @return
* a column with string literal containing schema in DDL format.
* @group xml_funcs
* @since 4.0.0
*/
// scalastyle:on line.size.limit
def schema_of_xml(xml: Column, options: java.util.Map[String, String]): Column =
Column.fnWithOptions("schema_of_xml", options.asScala.iterator, xml)
// scalastyle:off line.size.limit
/**
* (Java-specific) Converts a column containing a `StructType` into a XML string with the
* specified schema. Throws an exception, in the case of an unsupported type.
*
* @param e
* a column containing a struct.
* @param options
* options to control how the struct column is converted into a XML string. It accepts the
* same options as the XML data source. See <a href=
* "https://spark.apache.org/docs/latest/sql-data-sources-xml.html#data-source-option"> Data
* Source Option</a> in the version you use.
* @group xml_funcs
* @since 4.0.0
*/
// scalastyle:on line.size.limit
def to_xml(e: Column, options: java.util.Map[String, String]): Column =
Column.fnWithOptions("to_xml", options.asScala.iterator, e)
/**
* Converts a column containing a `StructType` into a XML string with the specified schema.
* Throws an exception, in the case of an unsupported type.
*
* @param e
* a column containing a struct.
* @group xml_funcs
* @since 4.0.0
*/
def to_xml(e: Column): Column = to_xml(e, Map.empty[String, String].asJava)
/**
* (Java-specific) A transform for timestamps and dates to partition data into years.
*
* @group partition_transforms
* @since 3.0.0
*/
def years(e: Column): Column = partitioning.years(e)
/**
* (Java-specific) A transform for timestamps and dates to partition data into months.
*
* @group partition_transforms
* @since 3.0.0
*/
def months(e: Column): Column = partitioning.months(e)
/**
* (Java-specific) A transform for timestamps and dates to partition data into days.
*
* @group partition_transforms
* @since 3.0.0
*/
def days(e: Column): Column = partitioning.days(e)
/**
* Returns a string array of values within the nodes of xml that match the XPath expression.
*
* @group xml_funcs
* @since 3.5.0
*/
def xpath(xml: Column, path: Column): Column =
Column.fn("xpath", xml, path)
/**
* Returns true if the XPath expression evaluates to true, or if a matching node is found.
*
* @group xml_funcs
* @since 3.5.0
*/
def xpath_boolean(xml: Column, path: Column): Column =
Column.fn("xpath_boolean", xml, path)
/**
* Returns a double value, the value zero if no match is found, or NaN if a match is found but
* the value is non-numeric.
*
* @group xml_funcs
* @since 3.5.0
*/
def xpath_double(xml: Column, path: Column): Column =
Column.fn("xpath_double", xml, path)
/**
* Returns a double value, the value zero if no match is found, or NaN if a match is found but
* the value is non-numeric.
*
* @group xml_funcs
* @since 3.5.0
*/
def xpath_number(xml: Column, path: Column): Column =
Column.fn("xpath_number", xml, path)
/**
* Returns a float value, the value zero if no match is found, or NaN if a match is found but
* the value is non-numeric.
*
* @group xml_funcs
* @since 3.5.0
*/
def xpath_float(xml: Column, path: Column): Column =
Column.fn("xpath_float", xml, path)
/**
* Returns an integer value, or the value zero if no match is found, or a match is found but the
* value is non-numeric.
*
* @group xml_funcs
* @since 3.5.0
*/
def xpath_int(xml: Column, path: Column): Column =
Column.fn("xpath_int", xml, path)
/**
* Returns a long integer value, or the value zero if no match is found, or a match is found but
* the value is non-numeric.
*
* @group xml_funcs
* @since 3.5.0
*/
def xpath_long(xml: Column, path: Column): Column =
Column.fn("xpath_long", xml, path)
/**
* Returns a short integer value, or the value zero if no match is found, or a match is found
* but the value is non-numeric.
*
* @group xml_funcs
* @since 3.5.0
*/
def xpath_short(xml: Column, path: Column): Column =
Column.fn("xpath_short", xml, path)
/**
* Returns the text contents of the first xml node that matches the XPath expression.
*
* @group xml_funcs
* @since 3.5.0
*/
def xpath_string(xml: Column, path: Column): Column =
Column.fn("xpath_string", xml, path)
/**
* (Java-specific) A transform for timestamps to partition data into hours.
*
* @group partition_transforms
* @since 3.0.0
*/
def hours(e: Column): Column = partitioning.hours(e)
/**
* Converts the timestamp without time zone `sourceTs` from the `sourceTz` time zone to
* `targetTz`.
*
* @param sourceTz
* the time zone for the input timestamp. If it is missed, the current session time zone is
* used as the source time zone.
* @param targetTz
* the time zone to which the input timestamp should be converted.
* @param sourceTs
* a timestamp without time zone.
* @group datetime_funcs
* @since 3.5.0
*/
def convert_timezone(sourceTz: Column, targetTz: Column, sourceTs: Column): Column =
Column.fn("convert_timezone", sourceTz, targetTz, sourceTs)
/**
* Converts the timestamp without time zone `sourceTs` from the current time zone to `targetTz`.
*
* @param targetTz
* the time zone to which the input timestamp should be converted.
* @param sourceTs
* a timestamp without time zone.
* @group datetime_funcs
* @since 3.5.0
*/
def convert_timezone(targetTz: Column, sourceTs: Column): Column =
Column.fn("convert_timezone", targetTz, sourceTs)
/**
* Make DayTimeIntervalType duration from days, hours, mins and secs.
*
* @group datetime_funcs
* @since 3.5.0
*/
def make_dt_interval(days: Column, hours: Column, mins: Column, secs: Column): Column =
Column.fn("make_dt_interval", days, hours, mins, secs)
/**
* Make DayTimeIntervalType duration from days, hours and mins.
*
* @group datetime_funcs
* @since 3.5.0
*/
def make_dt_interval(days: Column, hours: Column, mins: Column): Column =
Column.fn("make_dt_interval", days, hours, mins)
/**
* Make DayTimeIntervalType duration from days and hours.
*
* @group datetime_funcs
* @since 3.5.0
*/
def make_dt_interval(days: Column, hours: Column): Column =
Column.fn("make_dt_interval", days, hours)
/**
* Make DayTimeIntervalType duration from days.
*
* @group datetime_funcs
* @since 3.5.0
*/
def make_dt_interval(days: Column): Column =
Column.fn("make_dt_interval", days)
/**
* Make DayTimeIntervalType duration.
*
* @group datetime_funcs
* @since 3.5.0
*/
def make_dt_interval(): Column =
Column.fn("make_dt_interval")
/**
* This is a special version of `make_interval` that performs the same operation, but returns a
* NULL value instead of raising an error if interval cannot be created.
*
* @group datetime_funcs
* @since 4.0.0
*/
def try_make_interval(
years: Column,
months: Column,
weeks: Column,
days: Column,
hours: Column,
mins: Column,
secs: Column): Column =
Column.fn("try_make_interval", years, months, weeks, days, hours, mins, secs)
/**
* Make interval from years, months, weeks, days, hours, mins and secs.
*
* @group datetime_funcs
* @since 3.5.0
*/
def make_interval(
years: Column,
months: Column,
weeks: Column,
days: Column,
hours: Column,
mins: Column,
secs: Column): Column =
Column.fn("make_interval", years, months, weeks, days, hours, mins, secs)
/**
* This is a special version of `make_interval` that performs the same operation, but returns a
* NULL value instead of raising an error if interval cannot be created.
*
* @group datetime_funcs
* @since 4.0.0
*/
def try_make_interval(
years: Column,
months: Column,
weeks: Column,
days: Column,
hours: Column,
mins: Column): Column =
Column.fn("try_make_interval", years, months, weeks, days, hours, mins)
/**
* Make interval from years, months, weeks, days, hours and mins.
*
* @group datetime_funcs
* @since 3.5.0
*/
def make_interval(
years: Column,
months: Column,
weeks: Column,
days: Column,
hours: Column,
mins: Column): Column =
Column.fn("make_interval", years, months, weeks, days, hours, mins)
/**
* This is a special version of `make_interval` that performs the same operation, but returns a
* NULL value instead of raising an error if interval cannot be created.
*
* @group datetime_funcs
* @since 4.0.0
*/
def try_make_interval(
years: Column,
months: Column,
weeks: Column,
days: Column,
hours: Column): Column =
Column.fn("try_make_interval", years, months, weeks, days, hours)
/**
* Make interval from years, months, weeks, days and hours.
*
* @group datetime_funcs
* @since 3.5.0
*/
def make_interval(
years: Column,
months: Column,
weeks: Column,
days: Column,
hours: Column): Column =
Column.fn("make_interval", years, months, weeks, days, hours)
/**
* This is a special version of `make_interval` that performs the same operation, but returns a
* NULL value instead of raising an error if interval cannot be created.
*
* @group datetime_funcs
* @since 4.0.0
*/
def try_make_interval(years: Column, months: Column, weeks: Column, days: Column): Column =
Column.fn("try_make_interval", years, months, weeks, days)
/**
* Make interval from years, months, weeks and days.
*
* @group datetime_funcs
* @since 3.5.0
*/
def make_interval(years: Column, months: Column, weeks: Column, days: Column): Column =
Column.fn("make_interval", years, months, weeks, days)
/**
* This is a special version of `make_interval` that performs the same operation, but returns a
* NULL value instead of raising an error if interval cannot be created.
*
* @group datetime_funcs
* @since 4.0.0
*/
def try_make_interval(years: Column, months: Column, weeks: Column): Column =
Column.fn("try_make_interval", years, months, weeks)
/**
* Make interval from years, months and weeks.
*
* @group datetime_funcs
* @since 3.5.0
*/
def make_interval(years: Column, months: Column, weeks: Column): Column =
Column.fn("make_interval", years, months, weeks)
/**
* This is a special version of `make_interval` that performs the same operation, but returns a
* NULL value instead of raising an error if interval cannot be created.
*
* @group datetime_funcs
* @since 4.0.0
*/
def try_make_interval(years: Column, months: Column): Column =
Column.fn("try_make_interval", years, months)
/**
* Make interval from years and months.
*
* @group datetime_funcs
* @since 3.5.0
*/
def make_interval(years: Column, months: Column): Column =
Column.fn("make_interval", years, months)
/**
* This is a special version of `make_interval` that performs the same operation, but returns a
* NULL value instead of raising an error if interval cannot be created.
*
* @group datetime_funcs
* @since 4.0.0
*/
def try_make_interval(years: Column): Column =
Column.fn("try_make_interval", years)
/**
* Make interval from years.
*
* @group datetime_funcs
* @since 3.5.0
*/
def make_interval(years: Column): Column =
Column.fn("make_interval", years)
/**
* Make interval.
*
* @group datetime_funcs
* @since 3.5.0
*/
def make_interval(): Column =
Column.fn("make_interval")
/**
* Create timestamp from years, months, days, hours, mins, secs and timezone fields. The result
* data type is consistent with the value of configuration `spark.sql.timestampType`. If the
* configuration `spark.sql.ansi.enabled` is false, the function returns NULL on invalid inputs.
* Otherwise, it will throw an error instead.
*
* @group datetime_funcs
* @since 3.5.0
*/
def make_timestamp(
years: Column,
months: Column,
days: Column,
hours: Column,
mins: Column,
secs: Column,
timezone: Column): Column =
Column.fn("make_timestamp", years, months, days, hours, mins, secs, timezone)
/**
* Create timestamp from years, months, days, hours, mins and secs fields. The result data type
* is consistent with the value of configuration `spark.sql.timestampType`. If the configuration
* `spark.sql.ansi.enabled` is false, the function returns NULL on invalid inputs. Otherwise, it
* will throw an error instead.
*
* @group datetime_funcs
* @since 3.5.0
*/
def make_timestamp(
years: Column,
months: Column,
days: Column,
hours: Column,
mins: Column,
secs: Column): Column =
Column.fn("make_timestamp", years, months, days, hours, mins, secs)
/**
* Try to create a timestamp from years, months, days, hours, mins, secs and timezone fields.
* The result data type is consistent with the value of configuration `spark.sql.timestampType`.
* The function returns NULL on invalid inputs.
*
* @group datetime_funcs
* @since 4.0.0
*/
def try_make_timestamp(
years: Column,
months: Column,
days: Column,
hours: Column,
mins: Column,
secs: Column,
timezone: Column): Column =
Column.fn("try_make_timestamp", years, months, days, hours, mins, secs, timezone)
/**
* Try to create a timestamp from years, months, days, hours, mins, and secs fields. The result
* data type is consistent with the value of configuration `spark.sql.timestampType`. The
* function returns NULL on invalid inputs.
*
* @group datetime_funcs
* @since 4.0.0
*/
def try_make_timestamp(
years: Column,
months: Column,
days: Column,
hours: Column,
mins: Column,
secs: Column): Column =
Column.fn("try_make_timestamp", years, months, days, hours, mins, secs)
/**
* Create the current timestamp with local time zone from years, months, days, hours, mins, secs
* and timezone fields. If the configuration `spark.sql.ansi.enabled` is false, the function
* returns NULL on invalid inputs. Otherwise, it will throw an error instead.
*
* @group datetime_funcs
* @since 3.5.0
*/
def make_timestamp_ltz(
years: Column,
months: Column,
days: Column,
hours: Column,
mins: Column,
secs: Column,
timezone: Column): Column =
Column.fn("make_timestamp_ltz", years, months, days, hours, mins, secs, timezone)
/**
* Create the current timestamp with local time zone from years, months, days, hours, mins and
* secs fields. If the configuration `spark.sql.ansi.enabled` is false, the function returns
* NULL on invalid inputs. Otherwise, it will throw an error instead.
*
* @group datetime_funcs
* @since 3.5.0
*/
def make_timestamp_ltz(
years: Column,
months: Column,
days: Column,
hours: Column,
mins: Column,
secs: Column): Column =
Column.fn("make_timestamp_ltz", years, months, days, hours, mins, secs)
/**
* Try to create the current timestamp with local time zone from years, months, days, hours,
* mins, secs and timezone fields. The function returns NULL on invalid inputs.
*
* @group datetime_funcs
* @since 4.0.0
*/
def try_make_timestamp_ltz(
years: Column,
months: Column,
days: Column,
hours: Column,
mins: Column,
secs: Column,
timezone: Column): Column =
Column.fn("try_make_timestamp_ltz", years, months, days, hours, mins, secs, timezone)
/**
* Try to create the current timestamp with local time zone from years, months, days, hours,
* mins and secs fields. The function returns NULL on invalid inputs.
*
* @group datetime_funcs
* @since 4.0.0
*/
def try_make_timestamp_ltz(
years: Column,
months: Column,
days: Column,
hours: Column,
mins: Column,
secs: Column): Column =
Column.fn("try_make_timestamp_ltz", years, months, days, hours, mins, secs)
/**
* Create local date-time from years, months, days, hours, mins, secs fields. If the
* configuration `spark.sql.ansi.enabled` is false, the function returns NULL on invalid inputs.
* Otherwise, it will throw an error instead.
*
* @group datetime_funcs
* @since 3.5.0
*/
def make_timestamp_ntz(
years: Column,
months: Column,
days: Column,
hours: Column,
mins: Column,
secs: Column): Column =
Column.fn("make_timestamp_ntz", years, months, days, hours, mins, secs)
/**
* Try to create a local date-time from years, months, days, hours, mins, secs fields. The
* function returns NULL on invalid inputs.
*
* @group datetime_funcs
* @since 4.0.0
*/
def try_make_timestamp_ntz(
years: Column,
months: Column,
days: Column,
hours: Column,
mins: Column,
secs: Column): Column =
Column.fn("try_make_timestamp_ntz", years, months, days, hours, mins, secs)
/**
* Make year-month interval from years, months.
*
* @group datetime_funcs
* @since 3.5.0
*/
def make_ym_interval(years: Column, months: Column): Column =
Column.fn("make_ym_interval", years, months)
/**
* Make year-month interval from years.
*
* @group datetime_funcs
* @since 3.5.0
*/
def make_ym_interval(years: Column): Column = Column.fn("make_ym_interval", years)
/**
* Make year-month interval.
*
* @group datetime_funcs
* @since 3.5.0
*/
def make_ym_interval(): Column = Column.fn("make_ym_interval")
/**
* (Java-specific) A transform for any type that partitions by a hash of the input column.
*
* @group partition_transforms
* @since 3.0.0
*/
def bucket(numBuckets: Column, e: Column): Column = partitioning.bucket(numBuckets, e)
/**
* (Java-specific) A transform for any type that partitions by a hash of the input column.
*
* @group partition_transforms
* @since 3.0.0
*/
def bucket(numBuckets: Int, e: Column): Column = partitioning.bucket(numBuckets, e)
//////////////////////////////////////////////////////////////////////////////////////////////
// Predicates functions
//////////////////////////////////////////////////////////////////////////////////////////////
/**
* Returns `col2` if `col1` is null, or `col1` otherwise.
*
* @group conditional_funcs
* @since 3.5.0
*/
def ifnull(col1: Column, col2: Column): Column = Column.fn("ifnull", col1, col2)
/**
* Returns true if `col` is not null, or false otherwise.
*
* @group predicate_funcs
* @since 3.5.0
*/
def isnotnull(col: Column): Column = Column.fn("isnotnull", col)
/**
* Returns same result as the EQUAL(=) operator for non-null operands, but returns true if both
* are null, false if one of the them is null.
*
* @group predicate_funcs
* @since 3.5.0
*/
def equal_null(col1: Column, col2: Column): Column = Column.fn("equal_null", col1, col2)
/**
* Returns null if `col1` equals to `col2`, or `col1` otherwise.
*
* @group conditional_funcs
* @since 3.5.0
*/
def nullif(col1: Column, col2: Column): Column = Column.fn("nullif", col1, col2)
/**
* Returns null if `col` is equal to zero, or `col` otherwise.
*
* @group conditional_funcs
* @since 4.0.0
*/
def nullifzero(col: Column): Column = Column.fn("nullifzero", col)
/**
* Returns `col2` if `col1` is null, or `col1` otherwise.
*
* @group conditional_funcs
* @since 3.5.0
*/
def nvl(col1: Column, col2: Column): Column = Column.fn("nvl", col1, col2)
/**
* Returns `col2` if `col1` is not null, or `col3` otherwise.
*
* @group conditional_funcs
* @since 3.5.0
*/
def nvl2(col1: Column, col2: Column, col3: Column): Column = Column.fn("nvl2", col1, col2, col3)
/**
* Returns zero if `col` is null, or `col` otherwise.
*
* @group conditional_funcs
* @since 4.0.0
*/
def zeroifnull(col: Column): Column = Column.fn("zeroifnull", col)
// scalastyle:off line.size.limit
// scalastyle:off parameter.number
/* Use the following code to generate:
(0 to 10).foreach { x =>
val types = (1 to x).foldRight("RT")((i, s) => s"A$i, $s")
val typeSeq = "RT" +: (1 to x).map(i => s"A$i")
val typeTags = typeSeq.map(t => s"$t: TypeTag").mkString(", ")
val implicitTypeTags = typeSeq.map(t => s"implicitly[TypeTag[$t]]").mkString(", ")
println(s"""
|/**
| * Defines a Scala closure of $x arguments as user-defined function (UDF).
| * The data types are automatically inferred based on the Scala closure's
| * signature. By default the returned UDF is deterministic. To change it to
| * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
| *
| * @group udf_funcs
| * @since 1.3.0
| */
|def udf[$typeTags](f: Function$x[$types]): UserDefinedFunction = {
| SparkUserDefinedFunction(f, $implicitTypeTags)
|}""".stripMargin)