Spark Functions

The semantics of Spark functions align with Spark 3.5. In the function descriptions, a function is marked as ANSI compliant if it adheres to ANSI standard, subject to the spark.ansi_enabled configuration. Otherwise, it simply follows Spark’s semantics in ANSI OFF mode.

Here is a list of all scalar and aggregate Spark functions available in Velox. Function names link to function descriptions. Check out coverage maps for all functions.

Scalar Functions

Aggregate Functions

Window Functions

abs()

divide_deny_precision_loss()

not()

avg()

dense_rank()

acos()

doy()

overlay()

bit_xor()

nth_value()

acosh()

element_at()

pmod()

bloom_filter_agg()

ntile()

add()

empty2null()

power()

collect_list()

rank()

add_deny_precision_loss()

endswith()

quarter()

collect_set()

row_number()

add_months()

equalnullsafe()

raise_error()

corr()

aggregate()

equalto()

rand()

covar_samp()

array()

exists()

random()

first()

array_append()

exp()

regexp_extract()

first_ignore_null()

array_compact()

expm1()

regexp_extract_all()

kurtosis()

array_contains()

factorial()

regexp_replace()

last()

array_distinct()

filter()

remainder()

last_ignore_null()

array_except()

find_in_set()

repeat()

max()

array_insert()

flatten()

replace()

max_by()

array_intersect()

floor()

reverse()

min()

array_join()

forall()

rint()

min_by()

array_max()

from_unixtime()

rlike()

mode()

array_min()

from_utc_timestamp()

round()

regr_replacement()

array_position()

get()

rpad()

skewness()

array_prepend()

get_json_object()

rtrim()

stddev()

array_remove()

get_timestamp()

sec()

stddev_samp()

array_repeat()

greaterthan()

second()

sum()

array_sort()

greaterthanorequal()

sha1()

var_samp()

array_union()

greatest()

sha2()

variance()

arrays_zip()

hash()

shiftleft()

ascii()

hash_with_seed()

shiftright()

asin()

hex()

shuffle()

asinh()

hour()

sign()

atan()

hypot()

sinh()

atan2()

in()

size()

atanh()

instr()

slice()

between()

isnan()

sort_array()

bin()

isnotnull()

soundex()

bit_count()

isnull()

spark_partition_id()

bit_get()

json_array_length()

split()

bit_length()

json_object_keys()

sqrt()

bitwise_and()

last_day()

startswith()

bitwise_not()

least()

str_to_map()

bitwise_or()

left()

substring()

bitwise_xor()

length()

substring_index()

cbrt()

lessthan()

subtract()

ceil()

lessthanorequal()

subtract_deny_precision_loss()

checked_add()

levenshtein()

timestamp_micros()

checked_divide()

like()

timestamp_millis()

checked_multiply()

locate()

to_unix_timestamp()

checked_subtract()

log()

to_utc_timestamp()

chr()

log10()

transform()

concat()

log1p()

translate()

contains()

log2()

trim()

conv()

lower()

trunc()

cos()

lpad()

unaryminus()

cosh()

ltrim()

unbase64()

cot()

luhn_check()

unhex()

crc32()

make_date()

unix_date()

csc()

make_timestamp()

unix_micros()

date_add()

make_ym_interval()

unix_millis()

date_format()

map()

unix_seconds()

date_from_unix_date()

map_concat()

unix_timestamp()

date_sub()

map_entries()

unscaled_value()

date_trunc()

map_filter()

upper()

datediff()

map_from_arrays()

url_decode()

day()

map_keys()

url_encode()

dayofmonth()

map_values()

uuid()

dayofweek()

map_zip_with()

varchar_type_write_side_check()

dayofyear()

mask()

week_of_year()

decimal_equalto()

md5()

weekday()

decimal_greaterthan()

might_contain()

width_bucket()

decimal_greaterthanorequal()

minute()

xxhash64()

decimal_lessthan()

monotonically_increasing_id()

xxhash64_with_seed()

decimal_lessthanorequal()

month()

year()

decimal_notequalto()

multiply()

year_of_week()

degrees()

multiply_deny_precision_loss()

zip_with()

divide()

next_day()