in sql/api/src/main/scala/org/apache/spark/sql/functions.scala [3515:5425]
def user(): Column = Column.fn("user")
/**
* Returns the user name of current execution context.
*
* @group misc_funcs
* @since 4.0.0
*/
def session_user(): Column = Column.fn("session_user")
/**
* Returns an universally unique identifier (UUID) string. The value is returned as a canonical
* UUID 36-character string.
*
* @group misc_funcs
* @since 3.5.0
*/
def uuid(): Column = Column.fn("uuid", lit(SparkClassUtils.random.nextLong))
/**
* Returns an encrypted value of `input` using AES in given `mode` with the specified `padding`.
* Key lengths of 16, 24 and 32 bits are supported. Supported combinations of (`mode`,
* `padding`) are ('ECB', 'PKCS'), ('GCM', 'NONE') and ('CBC', 'PKCS'). Optional initialization
* vectors (IVs) are only supported for CBC and GCM modes. These must be 16 bytes for CBC and 12
* bytes for GCM. If not provided, a random vector will be generated and prepended to the
* output. Optional additional authenticated data (AAD) is only supported for GCM. If provided
* for encryption, the identical AAD value must be provided for decryption. The default mode is
* GCM.
*
* @param input
* The binary value to encrypt.
* @param key
* The passphrase to use to encrypt the data.
* @param mode
* Specifies which block cipher mode should be used to encrypt messages. Valid modes: ECB,
* GCM, CBC.
* @param padding
* Specifies how to pad messages whose length is not a multiple of the block size. Valid
* values: PKCS, NONE, DEFAULT. The DEFAULT padding means PKCS for ECB, NONE for GCM and PKCS
* for CBC.
* @param iv
* Optional initialization vector. Only supported for CBC and GCM modes. Valid values: None or
* "". 16-byte array for CBC mode. 12-byte array for GCM mode.
* @param aad
* Optional additional authenticated data. Only supported for GCM mode. This can be any
* free-form input and must be provided for both encryption and decryption.
*
* @group misc_funcs
* @since 3.5.0
*/
def aes_encrypt(
input: Column,
key: Column,
mode: Column,
padding: Column,
iv: Column,
aad: Column): Column = Column.fn("aes_encrypt", input, key, mode, padding, iv, aad)
/**
* Returns an encrypted value of `input`.
*
* @see
* `org.apache.spark.sql.functions.aes_encrypt(Column, Column, Column, Column, Column,
* Column)`
*
* @group misc_funcs
* @since 3.5.0
*/
def aes_encrypt(input: Column, key: Column, mode: Column, padding: Column, iv: Column): Column =
Column.fn("aes_encrypt", input, key, mode, padding, iv)
/**
* Returns an encrypted value of `input`.
*
* @see
* `org.apache.spark.sql.functions.aes_encrypt(Column, Column, Column, Column, Column,
* Column)`
*
* @group misc_funcs
* @since 3.5.0
*/
def aes_encrypt(input: Column, key: Column, mode: Column, padding: Column): Column =
Column.fn("aes_encrypt", input, key, mode, padding)
/**
* Returns an encrypted value of `input`.
*
* @see
* `org.apache.spark.sql.functions.aes_encrypt(Column, Column, Column, Column, Column,
* Column)`
*
* @group misc_funcs
* @since 3.5.0
*/
def aes_encrypt(input: Column, key: Column, mode: Column): Column =
Column.fn("aes_encrypt", input, key, mode)
/**
* Returns an encrypted value of `input`.
*
* @see
* `org.apache.spark.sql.functions.aes_encrypt(Column, Column, Column, Column, Column,
* Column)`
*
* @group misc_funcs
* @since 3.5.0
*/
def aes_encrypt(input: Column, key: Column): Column =
Column.fn("aes_encrypt", input, key)
/**
* Returns a decrypted value of `input` using AES in `mode` with `padding`. Key lengths of 16,
* 24 and 32 bits are supported. Supported combinations of (`mode`, `padding`) are ('ECB',
* 'PKCS'), ('GCM', 'NONE') and ('CBC', 'PKCS'). Optional additional authenticated data (AAD) is
* only supported for GCM. If provided for encryption, the identical AAD value must be provided
* for decryption. The default mode is GCM.
*
* @param input
* The binary value to decrypt.
* @param key
* The passphrase to use to decrypt the data.
* @param mode
* Specifies which block cipher mode should be used to decrypt messages. Valid modes: ECB,
* GCM, CBC.
* @param padding
* Specifies how to pad messages whose length is not a multiple of the block size. Valid
* values: PKCS, NONE, DEFAULT. The DEFAULT padding means PKCS for ECB, NONE for GCM and PKCS
* for CBC.
* @param aad
* Optional additional authenticated data. Only supported for GCM mode. This can be any
* free-form input and must be provided for both encryption and decryption.
*
* @group misc_funcs
* @since 3.5.0
*/
def aes_decrypt(
input: Column,
key: Column,
mode: Column,
padding: Column,
aad: Column): Column =
Column.fn("aes_decrypt", input, key, mode, padding, aad)
/**
* Returns a decrypted value of `input`.
*
* @see
* `org.apache.spark.sql.functions.aes_decrypt(Column, Column, Column, Column, Column)`
*
* @group misc_funcs
* @since 3.5.0
*/
def aes_decrypt(input: Column, key: Column, mode: Column, padding: Column): Column =
Column.fn("aes_decrypt", input, key, mode, padding)
/**
* Returns a decrypted value of `input`.
*
* @see
* `org.apache.spark.sql.functions.aes_decrypt(Column, Column, Column, Column, Column)`
*
* @group misc_funcs
* @since 3.5.0
*/
def aes_decrypt(input: Column, key: Column, mode: Column): Column =
Column.fn("aes_decrypt", input, key, mode)
/**
* Returns a decrypted value of `input`.
*
* @see
* `org.apache.spark.sql.functions.aes_decrypt(Column, Column, Column, Column, Column)`
*
* @group misc_funcs
* @since 3.5.0
*/
def aes_decrypt(input: Column, key: Column): Column =
Column.fn("aes_decrypt", input, key)
/**
* This is a special version of `aes_decrypt` that performs the same operation, but returns a
* NULL value instead of raising an error if the decryption cannot be performed.
*
* @param input
* The binary value to decrypt.
* @param key
* The passphrase to use to decrypt the data.
* @param mode
* Specifies which block cipher mode should be used to decrypt messages. Valid modes: ECB,
* GCM, CBC.
* @param padding
* Specifies how to pad messages whose length is not a multiple of the block size. Valid
* values: PKCS, NONE, DEFAULT. The DEFAULT padding means PKCS for ECB, NONE for GCM and PKCS
* for CBC.
* @param aad
* Optional additional authenticated data. Only supported for GCM mode. This can be any
* free-form input and must be provided for both encryption and decryption.
*
* @group misc_funcs
* @since 3.5.0
*/
def try_aes_decrypt(
input: Column,
key: Column,
mode: Column,
padding: Column,
aad: Column): Column =
Column.fn("try_aes_decrypt", input, key, mode, padding, aad)
/**
* Returns a decrypted value of `input`.
*
* @see
* `org.apache.spark.sql.functions.try_aes_decrypt(Column, Column, Column, Column, Column)`
*
* @group misc_funcs
* @since 3.5.0
*/
def try_aes_decrypt(input: Column, key: Column, mode: Column, padding: Column): Column =
Column.fn("try_aes_decrypt", input, key, mode, padding)
/**
* Returns a decrypted value of `input`.
*
* @see
* `org.apache.spark.sql.functions.try_aes_decrypt(Column, Column, Column, Column, Column)`
*
* @group misc_funcs
* @since 3.5.0
*/
def try_aes_decrypt(input: Column, key: Column, mode: Column): Column =
Column.fn("try_aes_decrypt", input, key, mode)
/**
* Returns a decrypted value of `input`.
*
* @see
* `org.apache.spark.sql.functions.try_aes_decrypt(Column, Column, Column, Column, Column)`
*
* @group misc_funcs
* @since 3.5.0
*/
def try_aes_decrypt(input: Column, key: Column): Column =
Column.fn("try_aes_decrypt", input, key)
/**
* Returns a sha1 hash value as a hex string of the `col`.
*
* @group hash_funcs
* @since 3.5.0
*/
def sha(col: Column): Column = Column.fn("sha", col)
/**
* Returns the length of the block being read, or -1 if not available.
*
* @group misc_funcs
* @since 3.5.0
*/
def input_file_block_length(): Column = Column.fn("input_file_block_length")
/**
* Returns the start offset of the block being read, or -1 if not available.
*
* @group misc_funcs
* @since 3.5.0
*/
def input_file_block_start(): Column = Column.fn("input_file_block_start")
/**
* Calls a method with reflection.
*
* @group misc_funcs
* @since 3.5.0
*/
@scala.annotation.varargs
def reflect(cols: Column*): Column = Column.fn("reflect", cols: _*)
/**
* Calls a method with reflection.
*
* @group misc_funcs
* @since 3.5.0
*/
@scala.annotation.varargs
def java_method(cols: Column*): Column = Column.fn("java_method", cols: _*)
/**
* This is a special version of `reflect` that performs the same operation, but returns a NULL
* value instead of raising an error if the invoke method thrown exception.
*
* @group misc_funcs
* @since 4.0.0
*/
@scala.annotation.varargs
def try_reflect(cols: Column*): Column = Column.fn("try_reflect", cols: _*)
/**
* Returns the Spark version. The string contains 2 fields, the first being a release version
* and the second being a git revision.
*
* @group misc_funcs
* @since 3.5.0
*/
def version(): Column = Column.fn("version")
/**
* Return DDL-formatted type string for the data type of the input.
*
* @group misc_funcs
* @since 3.5.0
*/
def typeof(col: Column): Column = Column.fn("typeof", col)
/**
* Separates `col1`, ..., `colk` into `n` rows. Uses column names col0, col1, etc. by default
* unless specified otherwise.
*
* @group generator_funcs
* @since 3.5.0
*/
@scala.annotation.varargs
def stack(cols: Column*): Column = Column.fn("stack", cols: _*)
/**
* Returns a random value with independent and identically distributed (i.i.d.) values with the
* specified range of numbers. The provided numbers specifying the minimum and maximum values of
* the range must be constant. If both of these numbers are integers, then the result will also
* be an integer. Otherwise if one or both of these are floating-point numbers, then the result
* will also be a floating-point number.
*
* @group math_funcs
* @since 4.0.0
*/
def uniform(min: Column, max: Column): Column =
uniform(min, max, lit(SparkClassUtils.random.nextLong))
/**
* Returns a random value with independent and identically distributed (i.i.d.) values with the
* specified range of numbers, with the chosen random seed. The provided numbers specifying the
* minimum and maximum values of the range must be constant. If both of these numbers are
* integers, then the result will also be an integer. Otherwise if one or both of these are
* floating-point numbers, then the result will also be a floating-point number.
*
* @group math_funcs
* @since 4.0.0
*/
def uniform(min: Column, max: Column, seed: Column): Column =
Column.fn("uniform", min, max, seed)
/**
* Returns a random value with independent and identically distributed (i.i.d.) uniformly
* distributed values in [0, 1).
*
* @group math_funcs
* @since 3.5.0
*/
def random(seed: Column): Column = Column.fn("random", seed)
/**
* Returns a random value with independent and identically distributed (i.i.d.) uniformly
* distributed values in [0, 1).
*
* @group math_funcs
* @since 3.5.0
*/
def random(): Column = random(lit(SparkClassUtils.random.nextLong))
/**
* Returns the bucket number for the given input column.
*
* @group misc_funcs
* @since 3.5.0
*/
def bitmap_bit_position(col: Column): Column =
Column.fn("bitmap_bit_position", col)
/**
* Returns the bit position for the given input column.
*
* @group misc_funcs
* @since 3.5.0
*/
def bitmap_bucket_number(col: Column): Column =
Column.fn("bitmap_bucket_number", col)
/**
* Returns a bitmap with the positions of the bits set from all the values from the input
* column. The input column will most likely be bitmap_bit_position().
*
* @group agg_funcs
* @since 3.5.0
*/
def bitmap_construct_agg(col: Column): Column =
Column.fn("bitmap_construct_agg", col)
/**
* Returns the number of set bits in the input bitmap.
*
* @group misc_funcs
* @since 3.5.0
*/
def bitmap_count(col: Column): Column = Column.fn("bitmap_count", col)
/**
* Returns a bitmap that is the bitwise OR of all of the bitmaps from the input column. The
* input column should be bitmaps created from bitmap_construct_agg().
*
* @group agg_funcs
* @since 3.5.0
*/
def bitmap_or_agg(col: Column): Column = Column.fn("bitmap_or_agg", col)
//////////////////////////////////////////////////////////////////////////////////////////////
// String functions
//////////////////////////////////////////////////////////////////////////////////////////////
/**
* Computes the numeric value of the first character of the string column, and returns the
* result as an int column.
*
* @group string_funcs
* @since 1.5.0
*/
def ascii(e: Column): Column = Column.fn("ascii", e)
/**
* Computes the BASE64 encoding of a binary column and returns it as a string column. This is
* the reverse of unbase64.
*
* @group string_funcs
* @since 1.5.0
*/
def base64(e: Column): Column = Column.fn("base64", e)
/**
* Calculates the bit length for the specified string column.
*
* @group string_funcs
* @since 3.3.0
*/
def bit_length(e: Column): Column = Column.fn("bit_length", e)
/**
* Concatenates multiple input string columns together into a single string column, using the
* given separator.
*
* @note
* Input strings which are null are skipped.
*
* @group string_funcs
* @since 1.5.0
*/
@scala.annotation.varargs
def concat_ws(sep: String, exprs: Column*): Column =
Column.fn("concat_ws", lit(sep) +: exprs: _*)
/**
* Computes the first argument into a string from a binary using the provided character set (one
* of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16', 'UTF-32'). If either
* argument is null, the result will also be null.
*
* @group string_funcs
* @since 1.5.0
*/
def decode(value: Column, charset: String): Column =
Column.fn("decode", value, lit(charset))
/**
* Computes the first argument into a binary from a string using the provided character set (one
* of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16', 'UTF-32'). If either
* argument is null, the result will also be null.
*
* @group string_funcs
* @since 1.5.0
*/
def encode(value: Column, charset: String): Column =
Column.fn("encode", value, lit(charset))
/**
* Returns true if the input is a valid UTF-8 string, otherwise returns false.
*
* @group string_funcs
* @since 4.0.0
*/
def is_valid_utf8(str: Column): Column =
Column.fn("is_valid_utf8", str)
/**
* Returns a new string in which all invalid UTF-8 byte sequences, if any, are replaced by the
* Unicode replacement character (U+FFFD).
*
* @group string_funcs
* @since 4.0.0
*/
def make_valid_utf8(str: Column): Column =
Column.fn("make_valid_utf8", str)
/**
* Returns the input value if it corresponds to a valid UTF-8 string, or emits a
* SparkIllegalArgumentException exception otherwise.
*
* @group string_funcs
* @since 4.0.0
*/
def validate_utf8(str: Column): Column =
Column.fn("validate_utf8", str)
/**
* Returns the input value if it corresponds to a valid UTF-8 string, or NULL otherwise.
*
* @group string_funcs
* @since 4.0.0
*/
def try_validate_utf8(str: Column): Column =
Column.fn("try_validate_utf8", str)
/**
* Formats numeric column x to a format like '#,###,###.##', rounded to d decimal places with
* HALF_EVEN round mode, and returns the result as a string column.
*
* If d is 0, the result has no decimal point or fractional part. If d is less than 0, the
* result will be null.
*
* @group string_funcs
* @since 1.5.0
*/
def format_number(x: Column, d: Int): Column = Column.fn("format_number", x, lit(d))
/**
* Formats the arguments in printf-style and returns the result as a string column.
*
* @group string_funcs
* @since 1.5.0
*/
@scala.annotation.varargs
def format_string(format: String, arguments: Column*): Column =
Column.fn("format_string", lit(format) +: arguments: _*)
/**
* Returns a new string column by converting the first letter of each word to uppercase. Words
* are delimited by whitespace.
*
* For example, "hello world" will become "Hello World".
*
* @group string_funcs
* @since 1.5.0
*/
def initcap(e: Column): Column = Column.fn("initcap", e)
/**
* Locate the position of the first occurrence of substr column in the given string. Returns
* null if either of the arguments are null.
*
* @note
* The position is not zero based, but 1 based index. Returns 0 if substr could not be found
* in str.
*
* @group string_funcs
* @since 1.5.0
*/
def instr(str: Column, substring: String): Column = instr(str, lit(substring))
/**
* Locate the position of the first occurrence of substr column in the given string. Returns
* null if either of the arguments are null.
*
* @note
* The position is not zero based, but 1 based index. Returns 0 if substr could not be found
* in str.
*
* @group string_funcs
* @since 4.0.0
*/
def instr(str: Column, substring: Column): Column = Column.fn("instr", str, substring)
/**
* Computes the character length of a given string or number of bytes of a binary string. The
* length of character strings include the trailing spaces. The length of binary strings
* includes binary zeros.
*
* @group string_funcs
* @since 1.5.0
*/
def length(e: Column): Column = Column.fn("length", e)
/**
* Computes the character length of a given string or number of bytes of a binary string. The
* length of character strings include the trailing spaces. The length of binary strings
* includes binary zeros.
*
* @group string_funcs
* @since 3.5.0
*/
def len(e: Column): Column = Column.fn("len", e)
/**
* Converts a string column to lower case.
*
* @group string_funcs
* @since 1.3.0
*/
def lower(e: Column): Column = Column.fn("lower", e)
/**
* Computes the Levenshtein distance of the two given string columns if it's less than or equal
* to a given threshold.
* @return
* result distance, or -1
* @group string_funcs
* @since 3.5.0
*/
def levenshtein(l: Column, r: Column, threshold: Int): Column =
Column.fn("levenshtein", l, r, lit(threshold))
/**
* Computes the Levenshtein distance of the two given string columns.
* @group string_funcs
* @since 1.5.0
*/
def levenshtein(l: Column, r: Column): Column = Column.fn("levenshtein", l, r)
/**
* Locate the position of the first occurrence of substr.
*
* @note
* The position is not zero based, but 1 based index. Returns 0 if substr could not be found
* in str.
*
* @group string_funcs
* @since 1.5.0
*/
def locate(substr: String, str: Column): Column = Column.fn("locate", lit(substr), str)
/**
* Locate the position of the first occurrence of substr in a string column, after position pos.
*
* @note
* The position is not zero based, but 1 based index. returns 0 if substr could not be found
* in str.
*
* @group string_funcs
* @since 1.5.0
*/
def locate(substr: String, str: Column, pos: Int): Column =
Column.fn("locate", lit(substr), str, lit(pos))
/**
* Left-pad the string column with pad to a length of len. If the string column is longer than
* len, the return value is shortened to len characters.
*
* @group string_funcs
* @since 1.5.0
*/
def lpad(str: Column, len: Int, pad: String): Column = lpad(str, lit(len), lit(pad))
/**
* Left-pad the binary column with pad to a byte length of len. If the binary column is longer
* than len, the return value is shortened to len bytes.
*
* @group string_funcs
* @since 3.3.0
*/
def lpad(str: Column, len: Int, pad: Array[Byte]): Column = lpad(str, lit(len), lit(pad))
/**
* Left-pad the string column with pad to a length of len. If the string column is longer than
* len, the return value is shortened to len characters.
*
* @group string_funcs
* @since 4.0.0
*/
def lpad(str: Column, len: Column, pad: Column): Column = Column.fn("lpad", str, len, pad)
/**
* Trim the spaces from left end for the specified string value.
*
* @group string_funcs
* @since 1.5.0
*/
def ltrim(e: Column): Column = Column.fn("ltrim", e)
/**
* Trim the specified character string from left end for the specified string column.
* @group string_funcs
* @since 2.3.0
*/
def ltrim(e: Column, trimString: String): Column = ltrim(e, lit(trimString))
/**
* Trim the specified character string from left end for the specified string column.
* @group string_funcs
* @since 4.0.0
*/
def ltrim(e: Column, trim: Column): Column = Column.fn("ltrim", trim, e)
/**
* Calculates the byte length for the specified string column.
*
* @group string_funcs
* @since 3.3.0
*/
def octet_length(e: Column): Column = Column.fn("octet_length", e)
/**
* Marks a given column with specified collation.
*
* @group string_funcs
* @since 4.0.0
*/
def collate(e: Column, collation: String): Column = Column.fn("collate", e, lit(collation))
/**
* Returns the collation name of a given column.
*
* @group string_funcs
* @since 4.0.0
*/
def collation(e: Column): Column = Column.fn("collation", e)
/**
* Returns true if `str` matches `regexp`, or false otherwise.
*
* @group predicate_funcs
* @since 3.5.0
*/
def rlike(str: Column, regexp: Column): Column = Column.fn("rlike", str, regexp)
/**
* Returns true if `str` matches `regexp`, or false otherwise.
*
* @group predicate_funcs
* @since 3.5.0
*/
def regexp(str: Column, regexp: Column): Column = Column.fn("regexp", str, regexp)
/**
* Returns true if `str` matches `regexp`, or false otherwise.
*
* @group predicate_funcs
* @since 3.5.0
*/
def regexp_like(str: Column, regexp: Column): Column = Column.fn("regexp_like", str, regexp)
/**
* Returns a count of the number of times that the regular expression pattern `regexp` is
* matched in the string `str`.
*
* @group string_funcs
* @since 3.5.0
*/
def regexp_count(str: Column, regexp: Column): Column = Column.fn("regexp_count", str, regexp)
/**
* Extract a specific group matched by a Java regex, from the specified string column. If the
* regex did not match, or the specified group did not match, an empty string is returned. if
* the specified group index exceeds the group count of regex, an IllegalArgumentException will
* be thrown.
*
* @group string_funcs
* @since 1.5.0
*/
def regexp_extract(e: Column, exp: String, groupIdx: Int): Column =
Column.fn("regexp_extract", e, lit(exp), lit(groupIdx))
/**
* Extract all strings in the `str` that match the `regexp` expression and corresponding to the
* first regex group index.
*
* @group string_funcs
* @since 3.5.0
*/
def regexp_extract_all(str: Column, regexp: Column): Column =
Column.fn("regexp_extract_all", str, regexp)
/**
* Extract all strings in the `str` that match the `regexp` expression and corresponding to the
* regex group index.
*
* @group string_funcs
* @since 3.5.0
*/
def regexp_extract_all(str: Column, regexp: Column, idx: Column): Column =
Column.fn("regexp_extract_all", str, regexp, idx)
/**
* Replace all substrings of the specified string value that match regexp with rep.
*
* @group string_funcs
* @since 1.5.0
*/
def regexp_replace(e: Column, pattern: String, replacement: String): Column =
regexp_replace(e, lit(pattern), lit(replacement))
/**
* Replace all substrings of the specified string value that match regexp with rep.
*
* @group string_funcs
* @since 2.1.0
*/
def regexp_replace(e: Column, pattern: Column, replacement: Column): Column =
Column.fn("regexp_replace", e, pattern, replacement)
/**
* Returns the substring that matches the regular expression `regexp` within the string `str`.
* If the regular expression is not found, the result is null.
*
* @group string_funcs
* @since 3.5.0
*/
def regexp_substr(str: Column, regexp: Column): Column = Column.fn("regexp_substr", str, regexp)
/**
* Searches a string for a regular expression and returns an integer that indicates the
* beginning position of the matched substring. Positions are 1-based, not 0-based. If no match
* is found, returns 0.
*
* @group string_funcs
* @since 3.5.0
*/
def regexp_instr(str: Column, regexp: Column): Column = Column.fn("regexp_instr", str, regexp)
/**
* Searches a string for a regular expression and returns an integer that indicates the
* beginning position of the matched substring. Positions are 1-based, not 0-based. If no match
* is found, returns 0.
*
* @group string_funcs
* @since 3.5.0
*/
def regexp_instr(str: Column, regexp: Column, idx: Column): Column =
Column.fn("regexp_instr", str, regexp, idx)
/**
* Decodes a BASE64 encoded string column and returns it as a binary column. This is the reverse
* of base64.
*
* @group string_funcs
* @since 1.5.0
*/
def unbase64(e: Column): Column = Column.fn("unbase64", e)
/**
* Right-pad the string column with pad to a length of len. If the string column is longer than
* len, the return value is shortened to len characters.
*
* @group string_funcs
* @since 1.5.0
*/
def rpad(str: Column, len: Int, pad: String): Column = rpad(str, lit(len), lit(pad))
/**
* Right-pad the binary column with pad to a byte length of len. If the binary column is longer
* than len, the return value is shortened to len bytes.
*
* @group string_funcs
* @since 3.3.0
*/
def rpad(str: Column, len: Int, pad: Array[Byte]): Column = rpad(str, lit(len), lit(pad))
/**
* Right-pad the string column with pad to a length of len. If the string column is longer than
* len, the return value is shortened to len characters.
*
* @group string_funcs
* @since 4.0.0
*/
def rpad(str: Column, len: Column, pad: Column): Column = Column.fn("rpad", str, len, pad)
/**
* Repeats a string column n times, and returns it as a new string column.
*
* @group string_funcs
* @since 1.5.0
*/
def repeat(str: Column, n: Int): Column = Column.fn("repeat", str, lit(n))
/**
* Repeats a string column n times, and returns it as a new string column.
*
* @group string_funcs
* @since 4.0.0
*/
def repeat(str: Column, n: Column): Column = Column.fn("repeat", str, n)
/**
* Trim the spaces from right end for the specified string value.
*
* @group string_funcs
* @since 1.5.0
*/
def rtrim(e: Column): Column = Column.fn("rtrim", e)
/**
* Trim the specified character string from right end for the specified string column.
* @group string_funcs
* @since 2.3.0
*/
def rtrim(e: Column, trimString: String): Column = rtrim(e, lit(trimString))
/**
* Trim the specified character string from right end for the specified string column.
* @group string_funcs
* @since 4.0.0
*/
def rtrim(e: Column, trim: Column): Column = Column.fn("rtrim", trim, e)
/**
* Returns the soundex code for the specified expression.
*
* @group string_funcs
* @since 1.5.0
*/
def soundex(e: Column): Column = Column.fn("soundex", e)
/**
* Splits str around matches of the given pattern.
*
* @param str
* a string expression to split
* @param pattern
* a string representing a regular expression. The regex string should be a Java regular
* expression.
*
* @group string_funcs
* @since 1.5.0
*/
def split(str: Column, pattern: String): Column = Column.fn("split", str, lit(pattern))
/**
* Splits str around matches of the given pattern.
*
* @param str
* a string expression to split
* @param pattern
* a column of string representing a regular expression. The regex string should be a Java
* regular expression.
*
* @group string_funcs
* @since 4.0.0
*/
def split(str: Column, pattern: Column): Column = Column.fn("split", str, pattern)
/**
* Splits str around matches of the given pattern.
*
* @param str
* a string expression to split
* @param pattern
* a string representing a regular expression. The regex string should be a Java regular
* expression.
* @param limit
* an integer expression which controls the number of times the regex is applied. <ul>
* <li>limit greater than 0: The resulting array's length will not be more than limit, and the
* resulting array's last entry will contain all input beyond the last matched regex.</li>
* <li>limit less than or equal to 0: `regex` will be applied as many times as possible, and
* the resulting array can be of any size.</li> </ul>
*
* @group string_funcs
* @since 3.0.0
*/
def split(str: Column, pattern: String, limit: Int): Column =
Column.fn("split", str, lit(pattern), lit(limit))
/**
* Splits str around matches of the given pattern.
*
* @param str
* a string expression to split
* @param pattern
* a column of string representing a regular expression. The regex string should be a Java
* regular expression.
* @param limit
* a column of integer expression which controls the number of times the regex is applied.
* <ul> <li>limit greater than 0: The resulting array's length will not be more than limit,
* and the resulting array's last entry will contain all input beyond the last matched
* regex.</li> <li>limit less than or equal to 0: `regex` will be applied as many times as
* possible, and the resulting array can be of any size.</li> </ul>
*
* @group string_funcs
* @since 4.0.0
*/
def split(str: Column, pattern: Column, limit: Column): Column =
Column.fn("split", str, pattern, limit)
/**
* Substring starts at `pos` and is of length `len` when str is String type or returns the slice
* of byte array that starts at `pos` in byte and is of length `len` when str is Binary type
*
* @note
* The position is not zero based, but 1 based index.
*
* @group string_funcs
* @since 1.5.0
*/
def substring(str: Column, pos: Int, len: Int): Column =
Column.fn("substring", str, lit(pos), lit(len))
/**
* Substring starts at `pos` and is of length `len` when str is String type or returns the slice
* of byte array that starts at `pos` in byte and is of length `len` when str is Binary type
*
* @note
* The position is not zero based, but 1 based index.
*
* @group string_funcs
* @since 4.0.0
*/
def substring(str: Column, pos: Column, len: Column): Column =
Column.fn("substring", str, pos, len)
/**
* Returns the substring from string str before count occurrences of the delimiter delim. If
* count is positive, everything the left of the final delimiter (counting from left) is
* returned. If count is negative, every to the right of the final delimiter (counting from the
* right) is returned. substring_index performs a case-sensitive match when searching for delim.
*
* @group string_funcs
*/
def substring_index(str: Column, delim: String, count: Int): Column =
Column.fn("substring_index", str, lit(delim), lit(count))
/**
* Overlay the specified portion of `src` with `replace`, starting from byte position `pos` of
* `src` and proceeding for `len` bytes.
*
* @group string_funcs
* @since 3.0.0
*/
def overlay(src: Column, replace: Column, pos: Column, len: Column): Column =
Column.fn("overlay", src, replace, pos, len)
/**
* Overlay the specified portion of `src` with `replace`, starting from byte position `pos` of
* `src`.
*
* @group string_funcs
* @since 3.0.0
*/
def overlay(src: Column, replace: Column, pos: Column): Column =
Column.fn("overlay", src, replace, pos)
/**
* Splits a string into arrays of sentences, where each sentence is an array of words.
* @group string_funcs
* @since 3.2.0
*/
def sentences(string: Column, language: Column, country: Column): Column =
Column.fn("sentences", string, language, country)
/**
* Splits a string into arrays of sentences, where each sentence is an array of words. The
* default `country`('') is used.
* @group string_funcs
* @since 4.0.0
*/
def sentences(string: Column, language: Column): Column =
Column.fn("sentences", string, language)
/**
* Splits a string into arrays of sentences, where each sentence is an array of words. The
* default locale is used.
* @group string_funcs
* @since 3.2.0
*/
def sentences(string: Column): Column = Column.fn("sentences", string)
/**
* Translate any character in the src by a character in replaceString. The characters in
* replaceString correspond to the characters in matchingString. The translate will happen when
* any character in the string matches the character in the `matchingString`.
*
* @group string_funcs
* @since 1.5.0
*/
def translate(src: Column, matchingString: String, replaceString: String): Column =
Column.fn("translate", src, lit(matchingString), lit(replaceString))
/**
* Trim the spaces from both ends for the specified string column.
*
* @group string_funcs
* @since 1.5.0
*/
def trim(e: Column): Column = Column.fn("trim", e)
/**
* Trim the specified character from both ends for the specified string column.
* @group string_funcs
* @since 2.3.0
*/
def trim(e: Column, trimString: String): Column = trim(e, lit(trimString))
/**
* Trim the specified character from both ends for the specified string column.
* @group string_funcs
* @since 4.0.0
*/
def trim(e: Column, trim: Column): Column = Column.fn("trim", trim, e)
/**
* Converts a string column to upper case.
*
* @group string_funcs
* @since 1.3.0
*/
def upper(e: Column): Column = Column.fn("upper", e)
/**
* Converts the input `e` to a binary value based on the supplied `format`. The `format` can be
* a case-insensitive string literal of "hex", "utf-8", "utf8", or "base64". By default, the
* binary format for conversion is "hex" if `format` is omitted. The function returns NULL if at
* least one of the input parameters is NULL.
*
* @group string_funcs
* @since 3.5.0
*/
def to_binary(e: Column, f: Column): Column = Column.fn("to_binary", e, f)
/**
* Converts the input `e` to a binary value based on the default format "hex". The function
* returns NULL if at least one of the input parameters is NULL.
*
* @group string_funcs
* @since 3.5.0
*/
def to_binary(e: Column): Column = Column.fn("to_binary", e)
// scalastyle:off line.size.limit
/**
* Convert `e` to a string based on the `format`. Throws an exception if the conversion fails.
* The format can consist of the following characters, case insensitive: '0' or '9': Specifies
* an expected digit between 0 and 9. A sequence of 0 or 9 in the format string matches a
* sequence of digits in the input value, generating a result string of the same length as the
* corresponding sequence in the format string. The result string is left-padded with zeros if
* the 0/9 sequence comprises more digits than the matching part of the decimal value, starts
* with 0, and is before the decimal point. Otherwise, it is padded with spaces. '.' or 'D':
* Specifies the position of the decimal point (optional, only allowed once). ',' or 'G':
* Specifies the position of the grouping (thousands) separator (,). There must be a 0 or 9 to
* the left and right of each grouping separator. '$': Specifies the location of the $ currency
* sign. This character may only be specified once. 'S' or 'MI': Specifies the position of a '-'
* or '+' sign (optional, only allowed once at the beginning or end of the format string). Note
* that 'S' prints '+' for positive values but 'MI' prints a space. 'PR': Only allowed at the
* end of the format string; specifies that the result string will be wrapped by angle brackets
* if the input value is negative.
*
* If `e` is a datetime, `format` shall be a valid datetime pattern, see <a
* href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html">Datetime
* Patterns</a>. If `e` is a binary, it is converted to a string in one of the formats:
* 'base64': a base 64 string. 'hex': a string in the hexadecimal format. 'utf-8': the input
* binary is decoded to UTF-8 string.
*
* @group string_funcs
* @since 3.5.0
*/
// scalastyle:on line.size.limit
def to_char(e: Column, format: Column): Column = Column.fn("to_char", e, format)
// scalastyle:off line.size.limit
/**
* Convert `e` to a string based on the `format`. Throws an exception if the conversion fails.
* The format can consist of the following characters, case insensitive: '0' or '9': Specifies
* an expected digit between 0 and 9. A sequence of 0 or 9 in the format string matches a
* sequence of digits in the input value, generating a result string of the same length as the
* corresponding sequence in the format string. The result string is left-padded with zeros if
* the 0/9 sequence comprises more digits than the matching part of the decimal value, starts
* with 0, and is before the decimal point. Otherwise, it is padded with spaces. '.' or 'D':
* Specifies the position of the decimal point (optional, only allowed once). ',' or 'G':
* Specifies the position of the grouping (thousands) separator (,). There must be a 0 or 9 to
* the left and right of each grouping separator. '$': Specifies the location of the $ currency
* sign. This character may only be specified once. 'S' or 'MI': Specifies the position of a '-'
* or '+' sign (optional, only allowed once at the beginning or end of the format string). Note
* that 'S' prints '+' for positive values but 'MI' prints a space. 'PR': Only allowed at the
* end of the format string; specifies that the result string will be wrapped by angle brackets
* if the input value is negative.
*
* If `e` is a datetime, `format` shall be a valid datetime pattern, see <a
* href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html">Datetime
* Patterns</a>. If `e` is a binary, it is converted to a string in one of the formats:
* 'base64': a base 64 string. 'hex': a string in the hexadecimal format. 'utf-8': the input
* binary is decoded to UTF-8 string.
*
* @group string_funcs
* @since 3.5.0
*/
// scalastyle:on line.size.limit
def to_varchar(e: Column, format: Column): Column = Column.fn("to_varchar", e, format)
/**
* Convert string 'e' to a number based on the string format 'format'. Throws an exception if
* the conversion fails. The format can consist of the following characters, case insensitive:
* '0' or '9': Specifies an expected digit between 0 and 9. A sequence of 0 or 9 in the format
* string matches a sequence of digits in the input string. If the 0/9 sequence starts with 0
* and is before the decimal point, it can only match a digit sequence of the same size.
* Otherwise, if the sequence starts with 9 or is after the decimal point, it can match a digit
* sequence that has the same or smaller size. '.' or 'D': Specifies the position of the decimal
* point (optional, only allowed once). ',' or 'G': Specifies the position of the grouping
* (thousands) separator (,). There must be a 0 or 9 to the left and right of each grouping
* separator. 'expr' must match the grouping separator relevant for the size of the number. '$':
* Specifies the location of the $ currency sign. This character may only be specified once. 'S'
* or 'MI': Specifies the position of a '-' or '+' sign (optional, only allowed once at the
* beginning or end of the format string). Note that 'S' allows '-' but 'MI' does not. 'PR':
* Only allowed at the end of the format string; specifies that 'expr' indicates a negative
* number with wrapping angled brackets.
*
* @group string_funcs
* @since 3.5.0
*/
def to_number(e: Column, format: Column): Column = Column.fn("to_number", e, format)
/**
* Replaces all occurrences of `search` with `replace`.
*
* @param src
* A column of string to be replaced
* @param search
* A column of string, If `search` is not found in `str`, `str` is returned unchanged.
* @param replace
* A column of string, If `replace` is not specified or is an empty string, nothing replaces
* the string that is removed from `str`.
*
* @group string_funcs
* @since 3.5.0
*/
def replace(src: Column, search: Column, replace: Column): Column =
Column.fn("replace", src, search, replace)
/**
* Replaces all occurrences of `search` with `replace`.
*
* @param src
* A column of string to be replaced
* @param search
* A column of string, If `search` is not found in `src`, `src` is returned unchanged.
*
* @group string_funcs
* @since 3.5.0
*/
def replace(src: Column, search: Column): Column = Column.fn("replace", src, search)
/**
* Splits `str` by delimiter and return requested part of the split (1-based). If any input is
* null, returns null. if `partNum` is out of range of split parts, returns empty string. If
* `partNum` is 0, throws an error. If `partNum` is negative, the parts are counted backward
* from the end of the string. If the `delimiter` is an empty string, the `str` is not split.
*
* @group string_funcs
* @since 3.5.0
*/
def split_part(str: Column, delimiter: Column, partNum: Column): Column =
Column.fn("split_part", str, delimiter, partNum)
/**
* Returns the substring of `str` that starts at `pos` and is of length `len`, or the slice of
* byte array that starts at `pos` and is of length `len`.
*
* @group string_funcs
* @since 3.5.0
*/
def substr(str: Column, pos: Column, len: Column): Column =
Column.fn("substr", str, pos, len)
/**
* Returns the substring of `str` that starts at `pos`, or the slice of byte array that starts
* at `pos`.
*
* @group string_funcs
* @since 3.5.0
*/
def substr(str: Column, pos: Column): Column = Column.fn("substr", str, pos)
/**
* Extracts a part from a URL.
*
* @group url_funcs
* @since 4.0.0
*/
def try_parse_url(url: Column, partToExtract: Column, key: Column): Column =
Column.fn("try_parse_url", url, partToExtract, key)
/**
* Extracts a part from a URL.
*
* @group url_funcs
* @since 4.0.0
*/
def try_parse_url(url: Column, partToExtract: Column): Column =
Column.fn("try_parse_url", url, partToExtract)
/**
* Extracts a part from a URL.
*
* @group url_funcs
* @since 3.5.0
*/
def parse_url(url: Column, partToExtract: Column, key: Column): Column =
Column.fn("parse_url", url, partToExtract, key)
/**
* Extracts a part from a URL.
*
* @group url_funcs
* @since 3.5.0
*/
def parse_url(url: Column, partToExtract: Column): Column =
Column.fn("parse_url", url, partToExtract)
/**
* Formats the arguments in printf-style and returns the result as a string column.
*
* @group string_funcs
* @since 3.5.0
*/
@scala.annotation.varargs
def printf(format: Column, arguments: Column*): Column =
Column.fn("printf", (format +: arguments): _*)
/**
* Decodes a `str` in 'application/x-www-form-urlencoded' format using a specific encoding
* scheme.
*
* @group url_funcs
* @since 3.5.0
*/
def url_decode(str: Column): Column = Column.fn("url_decode", str)
/**
* This is a special version of `url_decode` that performs the same operation, but returns a
* NULL value instead of raising an error if the decoding cannot be performed.
*
* @group url_funcs
* @since 4.0.0
*/
def try_url_decode(str: Column): Column = Column.fn("try_url_decode", str)
/**
* Translates a string into 'application/x-www-form-urlencoded' format using a specific encoding
* scheme.
*
* @group url_funcs
* @since 3.5.0
*/
def url_encode(str: Column): Column = Column.fn("url_encode", str)
/**
* Returns the position of the first occurrence of `substr` in `str` after position `start`. The
* given `start` and return value are 1-based.
*
* @group string_funcs
* @since 3.5.0
*/
def position(substr: Column, str: Column, start: Column): Column =
Column.fn("position", substr, str, start)
/**
* Returns the position of the first occurrence of `substr` in `str` after position `1`. The
* return value are 1-based.
*
* @group string_funcs
* @since 3.5.0
*/
def position(substr: Column, str: Column): Column =
Column.fn("position", substr, str)
/**
* Returns a boolean. The value is True if str ends with suffix. Returns NULL if either input
* expression is NULL. Otherwise, returns False. Both str or suffix must be of STRING or BINARY
* type.
*
* @group string_funcs
* @since 3.5.0
*/
def endswith(str: Column, suffix: Column): Column =
Column.fn("endswith", str, suffix)
/**
* Returns a boolean. The value is True if str starts with prefix. Returns NULL if either input
* expression is NULL. Otherwise, returns False. Both str or prefix must be of STRING or BINARY
* type.
*
* @group string_funcs
* @since 3.5.0
*/
def startswith(str: Column, prefix: Column): Column =
Column.fn("startswith", str, prefix)
/**
* Returns the ASCII character having the binary equivalent to `n`. If n is larger than 256 the
* result is equivalent to char(n % 256)
*
* @group string_funcs
* @since 3.5.0
*/
def char(n: Column): Column = Column.fn("char", n)
/**
* Removes the leading and trailing space characters from `str`.
*
* @group string_funcs
* @since 3.5.0
*/
def btrim(str: Column): Column = Column.fn("btrim", str)
/**
* Remove the leading and trailing `trim` characters from `str`.
*
* @group string_funcs
* @since 3.5.0
*/
def btrim(str: Column, trim: Column): Column = Column.fn("btrim", str, trim)
/**
* This is a special version of `to_binary` that performs the same operation, but returns a NULL
* value instead of raising an error if the conversion cannot be performed.
*
* @group string_funcs
* @since 3.5.0
*/
def try_to_binary(e: Column, f: Column): Column = Column.fn("try_to_binary", e, f)
/**
* This is a special version of `to_binary` that performs the same operation, but returns a NULL
* value instead of raising an error if the conversion cannot be performed.
*
* @group string_funcs
* @since 3.5.0
*/
def try_to_binary(e: Column): Column = Column.fn("try_to_binary", e)
/**
* Convert string `e` to a number based on the string format `format`. Returns NULL if the
* string `e` does not match the expected format. The format follows the same semantics as the
* to_number function.
*
* @group string_funcs
* @since 3.5.0
*/
def try_to_number(e: Column, format: Column): Column = Column.fn("try_to_number", e, format)
/**
* Returns the character length of string data or number of bytes of binary data. The length of
* string data includes the trailing spaces. The length of binary data includes binary zeros.
*
* @group string_funcs
* @since 3.5.0
*/
def char_length(str: Column): Column = Column.fn("char_length", str)
/**
* Returns the character length of string data or number of bytes of binary data. The length of
* string data includes the trailing spaces. The length of binary data includes binary zeros.
*
* @group string_funcs
* @since 3.5.0
*/
def character_length(str: Column): Column = Column.fn("character_length", str)
/**
* Returns the ASCII character having the binary equivalent to `n`. If n is larger than 256 the
* result is equivalent to chr(n % 256)
*
* @group string_funcs
* @since 3.5.0
*/
def chr(n: Column): Column = Column.fn("chr", n)
/**
* Returns a boolean. The value is True if right is found inside left. Returns NULL if either
* input expression is NULL. Otherwise, returns False. Both left or right must be of STRING or
* BINARY type.
*
* @group string_funcs
* @since 3.5.0
*/
def contains(left: Column, right: Column): Column = Column.fn("contains", left, right)
/**
* Returns the `n`-th input, e.g., returns `input2` when `n` is 2. The function returns NULL if
* the index exceeds the length of the array and `spark.sql.ansi.enabled` is set to false. If
* `spark.sql.ansi.enabled` is set to true, it throws ArrayIndexOutOfBoundsException for invalid
* indices.
*
* @group string_funcs
* @since 3.5.0
*/
@scala.annotation.varargs
def elt(inputs: Column*): Column = Column.fn("elt", inputs: _*)
/**
* Returns the index (1-based) of the given string (`str`) in the comma-delimited list
* (`strArray`). Returns 0, if the string was not found or if the given string (`str`) contains
* a comma.
*
* @group string_funcs
* @since 3.5.0
*/
def find_in_set(str: Column, strArray: Column): Column = Column.fn("find_in_set", str, strArray)
/**
* Returns true if str matches `pattern` with `escapeChar`, null if any arguments are null,
* false otherwise.
*
* @group predicate_funcs
* @since 3.5.0
*/
def like(str: Column, pattern: Column, escapeChar: Column): Column =
Column.fn("like", str, pattern, escapeChar)
/**
* Returns true if str matches `pattern` with `escapeChar`('\'), null if any arguments are null,
* false otherwise.
*
* @group predicate_funcs
* @since 3.5.0
*/
def like(str: Column, pattern: Column): Column = Column.fn("like", str, pattern)
/**
* Returns true if str matches `pattern` with `escapeChar` case-insensitively, null if any
* arguments are null, false otherwise.
*
* @group predicate_funcs
* @since 3.5.0
*/
def ilike(str: Column, pattern: Column, escapeChar: Column): Column =
Column.fn("ilike", str, pattern, escapeChar)
/**
* Returns true if str matches `pattern` with `escapeChar`('\') case-insensitively, null if any
* arguments are null, false otherwise.
*
* @group predicate_funcs
* @since 3.5.0
*/
def ilike(str: Column, pattern: Column): Column = Column.fn("ilike", str, pattern)
/**
* Returns `str` with all characters changed to lowercase.
*
* @group string_funcs
* @since 3.5.0
*/
def lcase(str: Column): Column = Column.fn("lcase", str)
/**
* Returns `str` with all characters changed to uppercase.
*
* @group string_funcs
* @since 3.5.0
*/
def ucase(str: Column): Column = Column.fn("ucase", str)
/**
* Returns the leftmost `len`(`len` can be string type) characters from the string `str`, if
* `len` is less or equal than 0 the result is an empty string.
*
* @group string_funcs
* @since 3.5.0
*/
def left(str: Column, len: Column): Column = Column.fn("left", str, len)
/**
* Returns the rightmost `len`(`len` can be string type) characters from the string `str`, if
* `len` is less or equal than 0 the result is an empty string.
*
* @group string_funcs
* @since 3.5.0
*/
def right(str: Column, len: Column): Column = Column.fn("right", str, len)
/**
* Returns `str` enclosed by single quotes and each instance of single quote in it is preceded
* by a backslash.
*
* @group string_funcs
* @since 4.1.0
*/
def quote(str: Column): Column = Column.fn("quote", str)
//////////////////////////////////////////////////////////////////////////////////////////////
// DateTime functions
//////////////////////////////////////////////////////////////////////////////////////////////
/**
* Returns the date that is `numMonths` after `startDate`.
*
* @param startDate
* A date, timestamp or string. If a string, the data must be in a format that can be cast to
* a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
* @param numMonths
* The number of months to add to `startDate`, can be negative to subtract months
* @return
* A date, or null if `startDate` was a string that could not be cast to a date
* @group datetime_funcs
* @since 1.5.0
*/
def add_months(startDate: Column, numMonths: Int): Column =
add_months(startDate, lit(numMonths))
/**
* Returns the date that is `numMonths` after `startDate`.
*
* @param startDate
* A date, timestamp or string. If a string, the data must be in a format that can be cast to
* a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
* @param numMonths
* A column of the number of months to add to `startDate`, can be negative to subtract months
* @return
* A date, or null if `startDate` was a string that could not be cast to a date
* @group datetime_funcs
* @since 3.0.0
*/
def add_months(startDate: Column, numMonths: Column): Column =
Column.fn("add_months", startDate, numMonths)
/**
* Returns the current date at the start of query evaluation as a date column. All calls of
* current_date within the same query return the same value.
*
* @group datetime_funcs
* @since 3.5.0
*/
def curdate(): Column = Column.fn("curdate")
/**
* Returns the current date at the start of query evaluation as a date column. All calls of
* current_date within the same query return the same value.
*
* @group datetime_funcs
* @since 1.5.0
*/
def current_date(): Column = Column.fn("current_date")
/**
* Returns the current session local timezone.
*
* @group datetime_funcs
* @since 3.5.0
*/
def current_timezone(): Column = Column.fn("current_timezone")
/**
* Returns the current timestamp at the start of query evaluation as a timestamp column. All
* calls of current_timestamp within the same query return the same value.
*
* @group datetime_funcs
* @since 1.5.0
*/
def current_timestamp(): Column = Column.fn("current_timestamp")
/**
* Returns the current timestamp at the start of query evaluation.
*
* @group datetime_funcs
* @since 3.5.0
*/
def now(): Column = Column.fn("now")
/**
* Returns the current timestamp without time zone at the start of query evaluation as a
* timestamp without time zone column. All calls of localtimestamp within the same query return
* the same value.
*
* @group datetime_funcs
* @since 3.3.0
*/
def localtimestamp(): Column = Column.fn("localtimestamp")
/**
* Converts a date/timestamp/string to a value of string in the format specified by the date
* format given by the second argument.
*
* See <a href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html"> Datetime
* Patterns</a> for valid date and time format patterns
*
* @param dateExpr
* A date, timestamp or string. If a string, the data must be in a format that can be cast to
* a timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
* @param format
* A pattern `dd.MM.yyyy` would return a string like `18.03.1993`
* @return
* A string, or null if `dateExpr` was a string that could not be cast to a timestamp
* @note
* Use specialized functions like [[year]] whenever possible as they benefit from a
* specialized implementation.
* @throws IllegalArgumentException
* if the `format` pattern is invalid
* @group datetime_funcs
* @since 1.5.0
*/
def date_format(dateExpr: Column, format: String): Column =
Column.fn("date_format", dateExpr, lit(format))
/**
* Returns the date that is `days` days after `start`
*
* @param start
* A date, timestamp or string. If a string, the data must be in a format that can be cast to
* a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
* @param days
* The number of days to add to `start`, can be negative to subtract days
* @return
* A date, or null if `start` was a string that could not be cast to a date
* @group datetime_funcs
* @since 1.5.0
*/
def date_add(start: Column, days: Int): Column = date_add(start, lit(days))
/**
* Returns the date that is `days` days after `start`
*
* @param start
* A date, timestamp or string. If a string, the data must be in a format that can be cast to
* a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
* @param days
* A column of the number of days to add to `start`, can be negative to subtract days
* @return
* A date, or null if `start` was a string that could not be cast to a date
* @group datetime_funcs
* @since 3.0.0
*/
def date_add(start: Column, days: Column): Column = Column.fn("date_add", start, days)
/**
* Returns the date that is `days` days after `start`
*
* @param start
* A date, timestamp or string. If a string, the data must be in a format that can be cast to
* a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
* @param days
* A column of the number of days to add to `start`, can be negative to subtract days
* @return
* A date, or null if `start` was a string that could not be cast to a date
* @group datetime_funcs
* @since 3.5.0
*/
def dateadd(start: Column, days: Column): Column = Column.fn("dateadd", start, days)
/**
* Returns the date that is `days` days before `start`
*
* @param start
* A date, timestamp or string. If a string, the data must be in a format that can be cast to
* a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
* @param days
* The number of days to subtract from `start`, can be negative to add days
* @return
* A date, or null if `start` was a string that could not be cast to a date
* @group datetime_funcs
* @since 1.5.0
*/
def date_sub(start: Column, days: Int): Column = date_sub(start, lit(days))
/**
* Returns the date that is `days` days before `start`
*
* @param start
* A date, timestamp or string. If a string, the data must be in a format that can be cast to
* a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
* @param days
* A column of the number of days to subtract from `start`, can be negative to add days
* @return
* A date, or null if `start` was a string that could not be cast to a date
* @group datetime_funcs
* @since 3.0.0
*/
def date_sub(start: Column, days: Column): Column =
Column.fn("date_sub", start, days)
/**
* Returns the number of days from `start` to `end`.
*
* Only considers the date part of the input. For example:
* {{{
* dateddiff("2018-01-10 00:00:00", "2018-01-09 23:59:59")
* // returns 1
* }}}
*
* @param end
* A date, timestamp or string. If a string, the data must be in a format that can be cast to
* a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
* @param start
* A date, timestamp or string. If a string, the data must be in a format that can be cast to
* a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
* @return
* An integer, or null if either `end` or `start` were strings that could not be cast to a
* date. Negative if `end` is before `start`
* @group datetime_funcs
* @since 1.5.0
*/
def datediff(end: Column, start: Column): Column = Column.fn("datediff", end, start)
/**
* Returns the number of days from `start` to `end`.
*
* Only considers the date part of the input. For example:
* {{{
* dateddiff("2018-01-10 00:00:00", "2018-01-09 23:59:59")
* // returns 1
* }}}
*
* @param end
* A date, timestamp or string. If a string, the data must be in a format that can be cast to
* a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
* @param start
* A date, timestamp or string. If a string, the data must be in a format that can be cast to
* a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
* @return
* An integer, or null if either `end` or `start` were strings that could not be cast to a
* date. Negative if `end` is before `start`
* @group datetime_funcs
* @since 3.5.0
*/
def date_diff(end: Column, start: Column): Column = Column.fn("date_diff", end, start)
/**
* Create date from the number of `days` since 1970-01-01.
*
* @group datetime_funcs
* @since 3.5.0
*/
def date_from_unix_date(days: Column): Column = Column.fn("date_from_unix_date", days)
/**
* Extracts the year as an integer from a given date/timestamp/string.
* @return
* An integer, or null if the input was a string that could not be cast to a date
* @group datetime_funcs
* @since 1.5.0
*/
def year(e: Column): Column = Column.fn("year", e)
/**
* Extracts the quarter as an integer from a given date/timestamp/string.
* @return
* An integer, or null if the input was a string that could not be cast to a date
* @group datetime_funcs
* @since 1.5.0
*/
def quarter(e: Column): Column = Column.fn("quarter", e)
/**
* Extracts the month as an integer from a given date/timestamp/string.
* @return
* An integer, or null if the input was a string that could not be cast to a date
* @group datetime_funcs
* @since 1.5.0
*/
def month(e: Column): Column = Column.fn("month", e)
/**
* Extracts the day of the week as an integer from a given date/timestamp/string. Ranges from 1
* for a Sunday through to 7 for a Saturday
* @return
* An integer, or null if the input was a string that could not be cast to a date
* @group datetime_funcs
* @since 2.3.0
*/
def dayofweek(e: Column): Column = Column.fn("dayofweek", e)
/**
* Extracts the day of the month as an integer from a given date/timestamp/string.
* @return
* An integer, or null if the input was a string that could not be cast to a date
* @group datetime_funcs
* @since 1.5.0
*/
def dayofmonth(e: Column): Column = Column.fn("dayofmonth", e)
/**
* Extracts the day of the month as an integer from a given date/timestamp/string.
* @return
* An integer, or null if the input was a string that could not be cast to a date
* @group datetime_funcs
* @since 3.5.0
*/
def day(e: Column): Column = Column.fn("day", e)
/**
* Extracts the day of the year as an integer from a given date/timestamp/string.
* @return
* An integer, or null if the input was a string that could not be cast to a date
* @group datetime_funcs
* @since 1.5.0
*/
def dayofyear(e: Column): Column = Column.fn("dayofyear", e)
/**
* Extracts the hours as an integer from a given date/timestamp/string.
* @return
* An integer, or null if the input was a string that could not be cast to a date
* @group datetime_funcs
* @since 1.5.0
*/
def hour(e: Column): Column = Column.fn("hour", e)
/**
* Extracts a part of the date/timestamp or interval source.
*
* @param field
* selects which part of the source should be extracted.
* @param source
* a date/timestamp or interval column from where `field` should be extracted.
* @return
* a part of the date/timestamp or interval source
* @group datetime_funcs
* @since 3.5.0
*/
def extract(field: Column, source: Column): Column = {
Column.fn("extract", field, source)
}