[Spark--sql]--所有函数举例(spark-2.x版本)
! expr - Logical not.
%
expr1 % expr2 - Returns the remainder afterexpr1/
expr2.
Examples:
> SELECT 2 % 1.8; 0.2 > SELECT MOD(2, 1.8); 0.2
&
&
expr1 & expr2 - Returns the result of bitwise AND of
expr1and
expr2.
Examples:
> SELECT 3 & 5; 1
*
*
expr1 * expr2 - Returns
expr1*
expr2.
Examples:
> SELECT 2 * 3; 6
+
+
expr1 + expr2 - Returns
expr1+
expr2.
Examples:
> SELECT 1 + 2; 3
-
-
expr1 - expr2 - Returns
expr1-
expr2.
Examples:
> SELECT 2 - 1; 1
/
/
expr1 / expr2 - Returns
expr1/
expr2. It always performs floating point division.
Examples:
> SELECT 3 / 2; 1.5 > SELECT 2L / 2L; 1.0
<
<
expr1 < expr2 - Returns true if
expr1is less than
expr2.
<=
<=
expr1 <= expr2 - Returns true if
expr1is less than or equal to
expr2.
<=>
<=>
expr1 <=> expr2 - Returns same result as the EQUAL(=) operator for non-null operands, but returns true if both are null, false if one of the them is null.
=
=
expr1 = expr2 - Returns true if
expr1equals
expr2, or false otherwise.
==
==
expr1 == expr2 - Returns true if
expr1equals
expr2, or false otherwise.
>
>
expr1 > expr2 - Returns true if
expr1is greater than
expr2.
>=
>=
expr1 >= expr2 - Returns true if
expr1is greater than or equal to
expr2.
^
^
expr1 ^ expr2 - Returns the result of bitwise exclusive OR of
expr1and
expr2.
Examples:
> SELECT 3 ^ 5; 2
abs
abs
abs(expr) - Returns the absolute value of the numeric value.
Examples:
> SELECT abs(-1); 1
acos
acos
acos(expr) - Returns the inverse cosine (a.k.a. arccosine) of
exprif -1<=
expr<=1 or NaN otherwise.
Examples:
> SELECT acos(1); 0.0 > SELECT acos(2); NaN
add_months
add_months
add_months(start_date, num_months) - Returns the date that is
num_monthsafter
start_date.
Examples:
> SELECT add_months('2016-08-31', 1); 2016-09-30
and
and
expr1 and expr2 - Logical AND.
approx_count_distinct
approx_count_distinct
approx_count_distinct(expr[, relativeSD]) - Returns the estimated cardinality by HyperLogLog++.
relativeSDdefines the maximum estimation error allowed.
approx_percentile
approx_percentile
approx_percentile(col, percentage [, accuracy]) - Returns the approximate percentile value of numeric column
colat the given percentage. The value of percentage must be between 0.0 and 1.0. The
accuracyparameter (default: 10000) is a positive numeric literal which controls approximation accuracy at the cost of memory. Higher value of
accuracyyields better accuracy,
1.0/accuracyis the relative error of the approximation. When
percentageis an array, each value of the percentage array must be between 0.0 and 1.0. In this case, returns the approximate percentile array of column
colat the given percentage array.
Examples:
> SELECT approx_percentile(10.0, array(0.5, 0.4, 0.1), 100); [10.0,10.0,10.0] > SELECT approx_percentile(10.0, 0.5, 100); 10.0
array
array
array(expr, ...) - Returns an array with the given elements.
Examples:
> SELECT array(1, 2, 3); [1,2,3]
array_contains
array_contains
array_contains(array, value) - Returns true if the array contains the value.
Examples:
> SELECT array_contains(array(1, 2, 3), 2); true
ascii
ascii
ascii(str) - Returns the numeric value of the first character of
str.
Examples:
> SELECT ascii('222'); 50 > SELECT ascii(2); 50
asin
asin
asin(expr) - Returns the inverse sine (a.k.a. arcsine) the arc sin of
exprif -1<=
expr<=1 or NaN otherwise.
Examples:
> SELECT asin(0); 0.0 > SELECT asin(2); NaN
assert_true
assert_true
assert_true(expr) - Throws an exception if
expris not true.
Examples:
> SELECT assert_true(0 < 1); NULL
atan
atan
atan(expr) - Returns the inverse tangent (a.k.a. arctangent).
Examples:
> SELECT atan(0); 0.0
atan2
atan2
atan2(expr1, expr2) - Returns the angle in radians between the positive x-axis of a plane and the point given by the coordinates (
expr1,
expr2).
Examples:
> SELECT atan2(0, 0); 0.0
avg
avg
avg(expr) - Returns the mean calculated from values of a group.
base64
base64
base64(bin) - Converts the argument from a binary
binto a base 64 string.
Examples:
> SELECT base64('Spark SQL'); U3BhcmsgU1FM
bigint
bigint
bigint(expr) - Casts the value
exprto the target data type
bigint.
bin
bin
bin(expr) - Returns the string representation of the long value
exprrepresented in binary.
Examples:
> SELECT bin(13); 1101 > SELECT bin(-13); 1111111111111111111111111111111111111111111111111111111111110011 > SELECT bin(13.3); 1101
binary
binary
binary(expr) - Casts the value
exprto the target data type
binary.
bit_length
bit_length
bit_length(expr) - Returns the bit length of
expror number of bits in binary data.
Examples:
> SELECT bit_length('Spark SQL'); 72
boolean
boolean
boolean(expr) - Casts the value
exprto the target data type
boolean.
bround
bround
bround(expr, d) - Returns
exprrounded to
ddecimal places using HALF_EVEN rounding mode.
Examples:
> SELECT bround(2.5, 0); 2.0
cast
cast
cast(expr AS type) - Casts the value
exprto the target data type
type.
Examples:
> SELECT cast('10' as int); 10
cbrt
cbrt
cbrt(expr) - Returns the cube root of
expr.
Examples:
> SELECT cbrt(27.0); 3.0
ceil
ceil
ceil(expr) - Returns the smallest integer not smaller than
expr.
Examples:
> SELECT ceil(-0.1); 0 > SELECT ceil(5); 5
ceiling
ceiling
ceiling(expr) - Returns the smallest integer not smaller than
expr.
Examples:
> SELECT ceiling(-0.1); 0 > SELECT ceiling(5); 5
char
char
char(expr) - Returns the ASCII character having the binary equivalent to
expr. If n is larger than 256 the result is equivalent to chr(n % 256)
Examples:
> SELECT char(65); A
char_length
char_length
char_length(expr) - Returns the character length of
expror number of bytes in binary data.
Examples:
> SELECT char_length('Spark SQL'); 9 > SELECT CHAR_LENGTH('Spark SQL'); 9 > SELECT CHARACTER_LENGTH('Spark SQL'); 9
character_length
character_length
character_length(expr) - Returns the character length of
expror number of bytes in binary data.
Examples:
> SELECT character_length('Spark SQL'); 9 > SELECT CHAR_LENGTH('Spark SQL'); 9 > SELECT CHARACTER_LENGTH('Spark SQL'); 9
chr
chr
chr(expr) - Returns the ASCII character having the binary equivalent to
expr. If n is larger than 256 the result is equivalent to chr(n % 256)
Examples:
> SELECT chr(65); A
coalesce
coalesce
coalesce(expr1, expr2, ...) - Returns the first non-null argument if exists. Otherwise, null.
Examples:
> SELECT coalesce(NULL, 1, NULL); 1
collect_list
collect_list
collect_list(expr) - Collects and returns a list of non-unique elements.
collect_set
collect_set
collect_set(expr) - Collects and returns a set of unique elements.
concat
concat
concat(str1, str2, ..., strN) - Returns the concatenation of str1, str2, ..., strN.
Examples:
> SELECT concat('Spark', 'SQL'); SparkSQL
concat_ws
concat_ws
concat_ws(sep, [str | array(str)]+) - Returns the concatenation of the strings separated by
sep.
Examples:
> SELECT concat_ws(' ', 'Spark', 'SQL'); Spark SQL
conv
conv
conv(num, from_base, to_base) - Convert
numfrom
from_baseto
to_base.
Examples:
> SELECT conv('100', 2, 10); 4 > SELECT conv(-10, 16, -10); -16
corr
corr
corr(expr1, expr2) - Returns Pearson coefficient of correlation between a set of number pairs.
cos
cos
cos(expr) - Returns the cosine of
expr.
Examples:
> SELECT cos(0); 1.0
cosh
cosh
cosh(expr) - Returns the hyperbolic cosine of
expr.
Examples:
> SELECT cosh(0); 1.0
cot
cot
cot(expr) - Returns the cotangent of
expr.
Examples:
> SELECT cot(1); 0.6420926159343306
count
count
count(*) - Returns the total number of retrieved rows, including rows containing null.
count(expr) - Returns the number of rows for which the supplied expression is non-null.
count(DISTINCT expr[, expr...]) - Returns the number of rows for which the supplied expression(s) are unique and non-null.
count_min_sketch
count_min_sketch
count_min_sketch(col, eps, confidence, seed) - Returns a count-min sketch of a column with the given esp, confidence and seed. The result is an array of bytes, which can be deserialized to a
CountMinSketchbefore usage. Count-min sketch is a probabilistic data structure used for cardinality estimation using sub-linear space.
covar_pop
covar_pop
covar_pop(expr1, expr2) - Returns the population covariance of a set of number pairs.
covar_samp
covar_samp
covar_samp(expr1, expr2) - Returns the sample covariance of a set of number pairs.
crc32
crc32
crc32(expr) - Returns a cyclic redundancy check value of the
expras a bigint.
Examples:
> SELECT crc32('Spark'); 1557323817
cube
cube
cume_dist
cume_dist
cume_dist() - Computes the position of a value relative to all values in the partition.
current_database
current_database
current_database() - Returns the current database.
Examples:
> SELECT current_database(); default
current_date
current_date
current_date() - Returns the current date at the start of query evaluation.
current_timestamp
current_timestamp
current_timestamp() - Returns the current timestamp at the start of query evaluation.
date
date
date(expr) - Casts the value
exprto the target data type
date.
date_add
date_add
date_add(start_date, num_days) - Returns the date that is
num_daysafter
start_date.
Examples:
> SELECT date_add('2016-07-30', 1); 2016-07-31
date_format
date_format
date_format(timestamp, fmt) - Converts
timestampto a value of string in the format specified by the date format
fmt.
Examples:
> SELECT date_format('2016-04-08', 'y'); 2016
date_sub
date_sub
date_sub(start_date, num_days) - Returns the date that is
num_daysbefore
start_date.
Examples:
> SELECT date_sub('2016-07-30', 1); 2016-07-29
datediff
datediff
datediff(endDate, startDate) - Returns the number of days from
startDateto
endDate.
Examples:
> SELECT datediff('2009-07-31', '2009-07-30'); 1 > SELECT datediff('2009-07-30', '2009-07-31'); -1
day
day
day(date) - Returns the day of month of the date/timestamp.
Examples:
> SELECT day('2009-07-30'); 30
dayofmonth
dayofmonth
dayofmonth(date) - Returns the day of month of the date/timestamp.
Examples:
> SELECT dayofmonth('2009-07-30'); 30
dayofweek
dayofweek
dayofweek(date) - Returns the day of the week for date/timestamp (1 = Sunday, 2 = Monday, ..., 7 = Saturday).
Examples:
> SELECT dayofweek('2009-07-30'); 5
dayofyear
dayofyear
dayofyear(date) - Returns the day of year of the date/timestamp.
Examples:
> SELECT dayofyear('2016-04-09'); 100
decimal
decimal
decimal(expr) - Casts the value
exprto the target data type
decimal.
decode
decode
decode(bin, charset) - Decodes the first argument using the second argument character set.
Examples:
> SELECT decode(encode('abc', 'utf-8'), 'utf-8'); abc
degrees
degrees
degrees(expr) - Converts radians to degrees.
Examples:
> SELECT degrees(3.141592653589793); 180.0
dense_rank
dense_rank
dense_rank() - Computes the rank of a value in a group of values. The result is one plus the previously assigned rank value. Unlike the function rank, dense_rank will not produce gaps in the ranking sequence.
double
double
double(expr) - Casts the value
exprto the target data type
double.
e
e
e() - Returns Euler's number, e.
Examples:
> SELECT e(); 2.718281828459045
elt
elt
elt(n, str1, str2, ...) - Returns the
n-th string, e.g., returns
str2when
nis 2.
Examples:
> SELECT elt(1, 'scala', 'java'); scala
encode
encode
encode(str, charset) - Encodes the first argument using the second argument character set.
Examples:
> SELECT encode('abc', 'utf-8'); abc
exp
exp
exp(expr) - Returns e to the power of
expr.
Examples:
> SELECT exp(0); 1.0
explode
explode
explode(expr) - Separates the elements of array
exprinto multiple rows, or the elements of map
exprinto multiple rows and columns.
Examples:
> SELECT explode(array(10, 20)); 10 20
explode_outer
explode_outer
explode_outer(expr) - Separates the elements of array
exprinto multiple rows, or the elements of map
exprinto multiple rows and columns.
Examples:
> SELECT explode_outer(array(10, 20)); 10 20
expm1
expm1
expm1(expr) - Returns exp(
expr) - 1.
Examples:
> SELECT expm1(0); 0.0
factorial
factorial
factorial(expr) - Returns the factorial of
expr.
expris [0..20]. Otherwise, null.
Examples:
> SELECT factorial(5); 120
find_in_set
find_in_set
find_in_set(str, str_array) - Returns the index (1-based) of the given string (
str) in the comma-delimited list (
str_array). Returns 0, if the string was not found or if the given string (
str) contains a comma.
Examples:
> SELECT find_in_set('ab','abc,b,ab,c,def'); 3
first
first
first(expr[, isIgnoreNull]) - Returns the first value of
exprfor a group of rows. If
isIgnoreNullis true, returns only non-null values.
first_value
first_value
first_value(expr[, isIgnoreNull]) - Returns the first value of
exprfor a group of rows. If
isIgnoreNullis true, returns only non-null values.
float
float
float(expr) - Casts the value
exprto the target data type
float.
floor
floor
floor(expr) - Returns the largest integer not greater than
expr.
Examples:
> SELECT floor(-0.1); -1 > SELECT floor(5); 5
format_number
format_number
format_number(expr1, expr2) - Formats the number
expr1like '#,###,###.##', rounded to
expr2decimal places. If
expr2is 0, the result has no decimal point or fractional part. This is supposed to function like MySQL's FORMAT.
Examples:
> SELECT format_number(12332.123456, 4); 12,332.1235
format_string
format_string
format_string(strfmt, obj, ...) - Returns a formatted string from printf-style format strings.
Examples:
> SELECT format_string("Hello World %d %s", 100, "days"); Hello World 100 days
from_json
from_json
from_json(jsonStr, schema[, options]) - Returns a struct value with the given
jsonStrand
schema.
Examples:
> SELECT from_json('{"a":1, "b":0.8}', 'a INT, b DOUBLE'); {"a":1, "b":0.8} > SELECT from_json('{"time":"26/08/2015"}', 'time Timestamp', map('timestampFormat', 'dd/MM/yyyy')); {"time":"2015-08-26 00:00:00.0"}
Since:2.2.0
from_unixtime
from_unixtime
from_unixtime(unix_time, format) - Returns
unix_timein the specified
format.
Examples:
> SELECT from_unixtime(0, 'yyyy-MM-dd HH:mm:ss'); 1970-01-01 00:00:00
from_utc_timestamp
from_utc_timestamp
from_utc_timestamp(timestamp, timezone) - Given a timestamp, which corresponds to a certain time of day in UTC, returns another timestamp that corresponds to the same time of day in the given timezone.
Examples:
> SELECT from_utc_timestamp('2016-08-31', 'Asia/Seoul'); 2016-08-31 09:00:00
get_json_object
get_json_object
get_json_object(json_txt, path) - Extracts a json object from
path.
Examples:
> SELECT get_json_object('{"a":"b"}', '$.a'); b
greatest
greatest
greatest(expr, ...) - Returns the greatest value of all parameters, skipping null values.
Examples:
> SELECT greatest(10, 9, 2, 4, 3); 10
grouping
grouping
grouping_id
grouping_id
hash
hash
hash(expr1, expr2, ...) - Returns a hash value of the arguments.
Examples:
> SELECT hash('Spark', array(123), 2); -1321691492
hex
hex
hex(expr) - Converts
exprto hexadecimal.
Examples:
> SELECT hex(17); 11 > SELECT hex('Spark SQL'); 537061726B2053514C
hour
hour
hour(timestamp) - Returns the hour component of the string/timestamp.
Examples:
> SELECT hour('2009-07-30 12:58:59'); 12
hypot
hypot
hypot(expr1, expr2) - Returns sqrt(
expr12 +
expr2
2).
Examples:
> SELECT hypot(3, 4); 5.0
if
if
if(expr1, expr2, expr3) - If
expr1evaluates to true, then returns
expr2; otherwise returns
expr3.
Examples:
> SELECT if(1 < 2, 'a', 'b'); a
ifnull
ifnull
ifnull(expr1, expr2) - Returns
expr2if
expr1is null, or
expr1otherwise.
Examples:
> SELECT ifnull(NULL, array('2')); ["2"]
in
in
expr1 in(expr2, expr3, ...) - Returns true if
exprequals to any valN.
initcap
initcap
initcap(str) - Returns
strwith the first letter of each word in uppercase. All other letters are in lowercase. Words are delimited by white space.
Examples:
> SELECT initcap('sPark sql'); Spark Sql
inline
inline
inline(expr) - Explodes an array of structs into a table.
Examples:
> SELECT inline(array(struct(1, 'a'), struct(2, 'b'))); 1 a 2 b
inline_outer
inline_outer
inline_outer(expr) - Explodes an array of structs into a table.
Examples:
> SELECT inline_outer(array(struct(1, 'a'), struct(2, 'b'))); 1 a 2 b
input_file_block_length
input_file_block_length
input_file_block_length() - Returns the length of the block being read, or -1 if not available.
input_file_block_start
input_file_block_start
input_file_block_start() - Returns the start offset of the block being read, or -1 if not available.
input_file_name
input_file_name
input_file_name() - Returns the name of the file being read, or empty string if not available.
instr
instr
instr(str, substr) - Returns the (1-based) index of the first occurrence of
substrin
str.
Examples:
> SELECT instr('SparkSQL', 'SQL'); 6
int
int
int(expr) - Casts the value
exprto the target data type
int.
isnan
isnan
isnan(expr) - Returns true if
expris NaN, or false otherwise.
Examples:
> SELECT isnan(cast('NaN' as double)); true
isnotnull
isnotnull
isnotnull(expr) - Returns true if
expris not null, or false otherwise.
Examples:
> SELECT isnotnull(1); true
isnull
isnull
isnull(expr) - Returns true if
expris null, or false otherwise.
Examples:
> SELECT isnull(1); false
java_method
java_method
java_method(class, method[, arg1[, arg2 ..]]) - Calls a method with reflection.
Examples:
> SELECT java_method('java.util.UUID', 'randomUUID'); c33fb387-8500-4bfa-81d2-6e0e3e930df2 > SELECT java_method('java.util.UUID', 'fromString', 'a5cf6c42-0c85-418f-af6c-3e4e5b1328f2'); a5cf6c42-0c85-418f-af6c-3e4e5b1328f2
json_tuple
json_tuple
json_tuple(jsonStr, p1, p2, ..., pn) - Returns a tuple like the function get_json_object, but it takes multiple names. All the input parameters and output column types are string.
Examples:
> SELECT json_tuple('{"a":1, "b":2}', 'a', 'b'); 1 2
kurtosis
kurtosis
kurtosis(expr) - Returns the kurtosis value calculated from values of a group.
lag
lag
lag(input[, offset[, default]]) - Returns the value of
inputat the
offsetth row before the current row in the window. The default value of
offsetis 1 and the default value of
defaultis null. If the value of
inputat the
offsetth row is null, null is returned. If there is no such offset row (e.g., when the offset is 1, the first row of the window does not have any previous row),
defaultis returned.
last
last
last(expr[, isIgnoreNull]) - Returns the last value of
exprfor a group of rows. If
isIgnoreNullis true, returns only non-null values.
last_day
last_day
last_day(date) - Returns the last day of the month which the date belongs to.
Examples:
> SELECT last_day('2009-01-12'); 2009-01-31
last_value
last_value
last_value(expr[, isIgnoreNull]) - Returns the last value of
exprfor a group of rows. If
isIgnoreNullis true, returns only non-null values.
lcase
lcase
lcase(str) - Returns
strwith all characters changed to lowercase.
Examples:
> SELECT lcase('SparkSql'); sparksql
lead
lead
lead(input[, offset[, default]]) - Returns the value of
inputat the
offsetth row after the current row in the window. The default value of
offsetis 1 and the default value of
defaultis null. If the value of
inputat the
offsetth row is null, null is returned. If there is no such an offset row (e.g., when the offset is 1, the last row of the window does not have any subsequent row),
defaultis returned.
least
least
least(expr, ...) - Returns the least value of all parameters, skipping null values.
Examples:
> SELECT least(10, 9, 2, 4, 3); 2
left
left
left(str, len) - Returns the leftmost
len(
lencan be string type) characters from the string
str,if
lenis less or equal than 0 the result is an empty string.
Examples:
> SELECT left('Spark SQL', 3); Spa
length
length
length(expr) - Returns the character length of
expror number of bytes in binary data.
Examples:
> SELECT length('Spark SQL'); 9 > SELECT CHAR_LENGTH('Spark SQL'); 9 > SELECT CHARACTER_LENGTH('Spark SQL'); 9
levenshtein
levenshtein
levenshtein(str1, str2) - Returns the Levenshtein distance between the two given strings.
Examples:
> SELECT levenshtein('kitten', 'sitting'); 3
like
like
str like pattern - Returns true if str matches pattern, null if any arguments are null, false otherwise.
Arguments:
str - a string expression
pattern - a string expression. The pattern is a string which is matched literally, with exception to the following special symbols:
_ matches any one character in the input (similar to . in posix regular expressions)
% matches zero or more characters in the input (similar to .* in posix regular expressions)
The escape character is '\'. If an escape character precedes a special symbol or another escape character, the following character is matched literally. It is invalid to escape any other character.
Since Spark 2.0, string literals are unescaped in our SQL parser. For example, in order to match "\abc", the pattern should be "\abc".
When SQL config 'spark.sql.parser.escapedStringLiterals' is enabled, it fallbacks to Spark 1.6 behavior regarding string literal parsing. For example, if the config is enabled, the pattern to match "\abc" should be "\abc".
Examples:
> SELECT '%SystemDrive%\Users\John' like '\%SystemDrive\%\\Users%' true
Note:
Use RLIKE to match with standard regular expressions.
ln
ln
ln(expr) - Returns the natural logarithm (base e) of
expr.
Examples:
> SELECT ln(1); 0.0
locate
locate
locate(substr, str[, pos]) - Returns the position of the first occurrence of
substrin
strafter position
pos. The given
posand return value are 1-based.
Examples:
> SELECT locate('bar', 'foobarbar'); 4 > SELECT locate('bar', 'foobarbar', 5); 7 > SELECT POSITION('bar' IN 'foobarbar'); 4
log
log
log(base, expr) - Returns the logarithm of
exprwith
base.
Examples:
> SELECT log(10, 100); 2.0
log10
log10
log10(expr) - Returns the logarithm of
exprwith base 10.
Examples:
> SELECT log10(10); 1.0
log1p
log1p
log1p(expr) - Returns log(1 +
expr).
Examples:
> SELECT log1p(0); 0.0
log2
log2
log2(expr) - Returns the logarithm of
exprwith base 2.
Examples:
> SELECT log2(2); 1.0
lower
lower
lower(str) - Returns
strwith all characters changed to lowercase.
Examples:
> SELECT lower('SparkSql'); sparksql
lpad
lpad
lpad(str, len, pad) - Returns
str, left-padded with
padto a length of
len. If
stris longer than
len, the return value is shortened to
lencharacters.
Examples:
> SELECT lpad('hi', 5, ''); hi > SELECT lpad('hi', 1, ''); h
ltrim
ltrim
ltrim(str) - Removes the leading and trailing space characters from
str.
Examples:
> SELECT ltrim(' SparkSQL'); SparkSQL
map
map
map(key0, value0, key1, value1, ...) - Creates a map with the given key/value pairs.
Examples:
> SELECT map(1.0, '2', 3.0, '4'); {1.0:"2",3.0:"4"}
map_keys
map_keys
map_keys(map) - Returns an unordered array containing the keys of the map.
Examples:
> SELECT map_keys(map(1, 'a', 2, 'b')); [1,2]
map_values
map_values
map_values(map) - Returns an unordered array containing the values of the map.
Examples:
> SELECT map_values(map(1, 'a', 2, 'b')); ["a","b"]
max
max
max(expr) - Returns the maximum value of
expr.
md5
md5
md5(expr) - Returns an MD5 128-bit checksum as a hex string of
expr.
Examples:
> SELECT md5('Spark'); 8cde774d6f7333752ed72cacddb05126
mean
mean
mean(expr) - Returns the mean calculated from values of a group.
min
min
min(expr) - Returns the minimum value of
expr.
minute
minute
minute(timestamp) - Returns the minute component of the string/timestamp.
Examples:
> SELECT minute('2009-07-30 12:58:59'); 58
mod
mod
expr1 mod expr2 - Returns the remainder after
expr1/
expr2.
Examples:
> SELECT 2 mod 1.8; 0.2 > SELECT MOD(2, 1.8); 0.2
monotonically_increasing_id
monotonically_increasing_id
monotonically_increasing_id() - Returns monotonically increasing 64-bit integers. The generated ID is guaranteed to be monotonically increasing and unique, but not consecutive. The current implementation puts the partition ID in the upper 31 bits, and the lower 33 bits represent the record number within each partition. The assumption is that the data frame has less than 1 billion partitions, and each partition has less than 8 billion records.
month
month
month(date) - Returns the month component of the date/timestamp.
Examples:
> SELECT month('2016-07-30'); 7
months_between
months_between
months_between(timestamp1, timestamp2) - Returns number of months between
timestamp1and
timestamp2.
Examples:
> SELECT months_between('1997-02-28 10:30:00', '1996-10-30'); 3.94959677
named_struct
named_struct
named_struct(name1, val1, name2, val2, ...) - Creates a struct with the given field names and values.
Examples:
> SELECT named_struct("a", 1, "b", 2, "c", 3); {"a":1,"b":2,"c":3}
nanvl
nanvl
nanvl(expr1, expr2) - Returns
expr1if it's not NaN, or
expr2otherwise.
Examples:
> SELECT nanvl(cast('NaN' as double), 123); 123.0
negative
negative
negative(expr) - Returns the negated value of
expr.
Examples:
> SELECT negative(1); -1
next_day
next_day
next_day(start_date, day_of_week) - Returns the first date which is later than
start_dateand named as indicated.
Examples:
> SELECT next_day('2015-01-14', 'TU'); 2015-01-20
not
not
not expr - Logical not.
now
now
now() - Returns the current timestamp at the start of query evaluation.
ntile
ntile
ntile(n) - Divides the rows for each window partition into
nbuckets ranging from 1 to at most
n.
nullif
nullif
nullif(expr1, expr2) - Returns null if
expr1equals to
expr2, or
expr1otherwise.
Examples:
> SELECT nullif(2, 2); NULL
nvl
nvl
nvl(expr1, expr2) - Returns
expr2if
expr1is null, or
expr1otherwise.
Examples:
> SELECT nvl(NULL, array('2')); ["2"]
nvl2
nvl2
nvl2(expr1, expr2, expr3) - Returns
expr2if
expr1is not null, or
expr3otherwise.
Examples:
> SELECT nvl2(NULL, 2, 1); 1
octet_length
octet_length
octet_length(expr) - Returns the byte length of
expror number of bytes in binary data.
Examples:
> SELECT octet_length('Spark SQL'); 9
or
or
expr1 or expr2 - Logical OR.
parse_url
parse_url
parse_url(url, partToExtract[, key]) - Extracts a part from a URL.
Examples:
> SELECT parse_url('http://spark.apache.org/pathquery=1', 'HOST') spark.apache.org > SELECT parse_url('http://spark.apache.org/pathquery=1', 'QUERY') query=1 > SELECT parse_url('http://spark.apache.org/pathquery=1', 'QUERY', 'query') 1
percent_rank
percent_rank
percent_rank() - Computes the percentage ranking of a value in a group of values.
percentile
percentile
percentile(col, percentage [, frequency]) - Returns the exact percentile value of numeric column
colat the given percentage. The value of percentage must be between 0.0 and 1.0. The value of frequency should be positive integral
percentile(col, array(percentage1 [, percentage2]...) [, frequency]) - Returns the exact percentile value array of numeric column
colat the given percentage(s). Each value of the percentage array must be between 0.0 and 1.0. The value of frequency should be positive integral
percentile_approx
percentile_approx
percentile_approx(col, percentage [, accuracy]) - Returns the approximate percentile value of numeric column
colat the given percentage. The value of percentage must be between 0.0 and 1.0. The
accuracyparameter (default: 10000) is a positive numeric literal which controls approximation accuracy at the cost of memory. Higher value of
accuracyyields better accuracy,
1.0/accuracyis the relative error of the approximation. When
percentageis an array, each value of the percentage array must be between 0.0 and 1.0. In this case, returns the approximate percentile array of column
colat the given percentage array.
Examples:
> SELECT percentile_approx(10.0, array(0.5, 0.4, 0.1), 100); [10.0,10.0,10.0] > SELECT percentile_approx(10.0, 0.5, 100); 10.0
pi
pi
pi() - Returns pi.
Examples:
> SELECT pi(); 3.141592653589793
pmod
pmod
pmod(expr1, expr2) - Returns the positive value of
expr1mod
expr2.
Examples:
> SELECT pmod(10, 3); 1 > SELECT pmod(-10, 3); 2
posexplode
posexplode
posexplode(expr) - Separates the elements of array
exprinto multiple rows with positions, or the elements of map
exprinto multiple rows and columns with positions.
Examples:
> SELECT posexplode(array(10,20)); 0 10 1 20
posexplode_outer
posexplode_outer
posexplode_outer(expr) - Separates the elements of array
exprinto multiple rows with positions, or the elements of map
exprinto multiple rows and columns with positions.
Examples:
> SELECT posexplode_outer(array(10,20)); 0 10 1 20
position
position
position(substr, str[, pos]) - Returns the position of the first occurrence of
substrin
strafter position
pos. The given
posand return value are 1-based.
Examples:
> SELECT position('bar', 'foobarbar'); 4 > SELECT position('bar', 'foobarbar', 5); 7 > SELECT POSITION('bar' IN 'foobarbar'); 4
positive
positive
positive(expr) - Returns the value of
expr.
pow
pow
pow(expr1, expr2) - Raises
expr1to the power of
expr2.
Examples:
> SELECT pow(2, 3); 8.0
power
power
power(expr1, expr2) - Raises
expr1to the power of
expr2.
Examples:
> SELECT power(2, 3); 8.0
printf
printf
printf(strfmt, obj, ...) - Returns a formatted string from printf-style format strings.
Examples:
> SELECT printf("Hello World %d %s", 100, "days"); Hello World 100 days
quarter
quarter
quarter(date) - Returns the quarter of the year for date, in the range 1 to 4.
Examples:
> SELECT quarter('2016-08-31'); 3
radians
radians
radians(expr) - Converts degrees to radians.
Examples:
> SELECT radians(180); 3.141592653589793
rand
rand
rand([seed]) - Returns a random value with independent and identically distributed (i.i.d.) uniformly distributed values in [0, 1).
Examples:
> SELECT rand(); 0.9629742951434543 > SELECT rand(0); 0.8446490682263027 > SELECT rand(null); 0.8446490682263027
randn
randn
randn([seed]) - Returns a random value with independent and identically distributed (i.i.d.) values drawn from the standard normal distribution.
Examples:
> SELECT randn(); -0.3254147983080288 > SELECT randn(0); 1.1164209726833079 > SELECT randn(null); 1.1164209726833079
rank
rank
rank() - Computes the rank of a value in a group of values. The result is one plus the number of rows preceding or equal to the current row in the ordering of the partition. The values will produce gaps in the sequence.
reflect
reflect
reflect(class, method[, arg1[, arg2 ..]]) - Calls a method with reflection.
Examples:
> SELECT reflect('java.util.UUID', 'randomUUID'); c33fb387-8500-4bfa-81d2-6e0e3e930df2 > SELECT reflect('java.util.UUID', 'fromString', 'a5cf6c42-0c85-418f-af6c-3e4e5b1328f2'); a5cf6c42-0c85-418f-af6c-3e4e5b1328f2
regexp_extract
regexp_extract
regexp_extract(str, regexp[, idx]) - Extracts a group that matches
regexp.
Examples:
> SELECT regexp_extract('100-200', '(\d+)-(\d+)', 1); 100
regexp_replace
regexp_replace
regexp_replace(str, regexp, rep) - Replaces all substrings of
strthat match
regexpwith
rep.
Examples:
> SELECT regexp_replace('100-200', '(\d+)', 'num'); num-num
repeat
repeat