spark-sql 笔记
1、get_json_object 返回json键值
select get_json_object('{"a":"dd"}','$.a');
dd
select get_json_object('{"b":"c"}','$.a');//json 键 “b” ≠ a 这里没有返回值
➜
select get_json_object('{"c":"d"}','$.c');//json 键 “c” = c 这里返回值 d
➜
get_json_object({"c":"d"}, $.c)
d
select get_json_object('{"ac":"d"}','$.ac'); //主键 ac
➜
get_json_object({"ac":"d"}, $.ac)
d
get_json_object({"a":{"B":"c"}}, $.a)
{"B":"c"}
2、substring_index
select substring_index('www.33.vv','',3);
空串返回空;
select substring_index('www.33.vv','e',3);
不存在,就返回 原始串 www.33.vv
select substring_index('www.33.vv','.',3);
超出,就返回 原始串 www.33.vv
select substring_index('www.abcd.ory','.',1);
正常情况:www
3、hex 、unhex 、decode(... , 'utf-8')
select hex('spark sql');
hex(spark sql)
返回:737061726B2073716C
select decode(unhex('737061726B2073716C'),'utf-8');
返回:
decode(unhex(737061726B2073716C), utf-8)
spark sql
4、to_json
select to_json(named_struct('a','1','b','1'));
➜
structstojson(named_struct(a, 1, b, 1))
{"a":"1","b":"1"}
----------------
select to_json(named_struct('a','b','c')); //键值不成对 就会报错
➜
StatementCallback; uncategorized SQLException for SQL [select to_json(named_struct('a','b','c'))]; SQL state [null]; error code [0];
----------------
select to_json(named_struct('time',to_timestamp('2015-08-26','yyyy-MM-dd')),map('timestampFormat','dd/MM/yyyy'));
➜
structstojson(named_struct(time, to_timestamp('2015-08-26', 'yyyy-MM-dd')))
{"time":"26/08/2015"}
------------------------------------
structstojson(named_struct(time, to_timestamp('2015-11-12 22:10:30', 'yyyy-MM-dd HH:mm:ss'))) |
---|
{"time":"2015-11-12T22:10:30.000+08:00"} |
-------------------------
structstojson(named_struct(time, to_timestamp('2015-12-11 10:23:01', 'yyyy-MM-dd HH:mm:ss'))) |
---|
{"time":"2015-10-11 10:23:01"} |
map('timestampFormat','yyyy-HH HH:mm:ss') ); map 函数可以有选择性的显示日期格式
{"time":"2015-10 10:23:01"},输出,年月 和 时分秒;
-------------------------
两项报错:
to_timestamp('2015-11-10 XXXXX',日期时间格式 ),函数,如果没有写日期格式就会报错
select to_json(named_struct('time',to_timestamp('2015-11-11 12:23:12')) , map('timestampFormat','yyyy-MM-dd HH:mm:ss') );
select to_json(named_struct('time',to_timestamp('2015-11-11') ) );
报错:StatementCallback; uncategorized SQLException for SQL [select
---------------------
5 、current_timestamp/now()获取当前时间
a | b |
---|---|
1627527952763 | 1627527952763 |
select current_date ;获取日期的年月日
current_date() |
---|
2021-07-29 |
select day(current_date) 或者 select day('2021-07-29')返回29,代表29号;
如果current_date 获取的日期是 2021-07-29 ,那么day(current_date)返回的结果:29;
------------------
select dayofweek('2021-07-29') ,
报错信息:StatementCallback; uncategorized SQLException for SQL
如果日期直接输入字符串,这样查询就会报错;
---------------
获取年份下当前第几周,从当年的第一天开始到当天之间的总天数 除以 7 得出的一个数据 N ;余数如果大于3 ,那么返回 N+ 1 ;如果余数小于3 那么返回 N;
select weekofyear(current_date); 或者 select weekofyear('2021-07-29');
返回结果:30;
d2 |
---|
1 |
d2 |
---|
1 |
d2 |
---|
2 |
select weekofyear('2021-01-15') as d2;
d2 |
---|
2 |
d2 |
---|
3 |
------------
select dayofyear('');
dd | de |
---|---|
210 | 210 |
------------
CAST(month(current_date()) AS STRING) 数值转换为字符串;
---------------------------------
年月日 , 时分秒;
select
year(current_date) as year,
month(current_date) as month,
day(current_date) as day,
hour(current_timestamp) as hour,
minute(current_timestamp) as minute,
second(current_timestamp) as second;
select
year(current_date) as year,
(
case length(cast( month(current_date) as string) )
when 1
then concat( '0' , cast( month(current_date) as string) )
else month(current_date)
end
) as month,
day(current_date) as day,
(
case length(hour(current_timestamp))
when 1
then concat( '0' , cast ( hour(current_timestamp) as string) )
else hour(current_timestamp)
end
) as hour,
(
case length(minute(current_timestamp))
when 1
then concat( '0' , cast( minute(current_timestamp) as string) )
else minute(current_timestamp)
end
) as minute,
(
case length(second(current_timestamp))
when 1
then concat( '0' , cast( second(current_timestamp) as string) )
else second(current_timestamp)
end
) as second;
--------------------
select date_format(current_date,'yyyy') as year,
date_format(current_date,'MM') as MM,
date_format(current_date,'dd') as DD,
date_format(current_timestamp,'HH') as HH,
date_format(current_timestamp,'mm') as monute,
date_format(current_timestamp,'ss') as ss
;
---------------------
当年到当天的天数 和 周数;
select dayofyear(current_date) as days,weekofyear(current_date) as weeks;
-----------------------
获取当前时间的年月日 时分秒 ;
select
from_unixtime(CAST(current_timestamp() AS BIGINT), 'yyyy-MM-dd HH:mm:ss') as ymd1 ,
from_unixtime(CAST(current_timestamp() AS BIGINT)) as ydm2
;
-------------------------------------
日期格式转换timestamp
SELECT
unix_timestamp(current_date, 'yyyy-MM-dd') as c1 ,
unix_timestamp('2021-07-29 12:12:12', 'yyyy-MM-dd HH:mm:ss') as c2
;
-------------------------
可以返回 { 年,月,日,时,分,秒 } 的任一元素值;
select from_unixtime(cast(current_timestamp() as bigint),'yyyy-MM-dd HH:mm:dd');
SELECT to_date('2016-04-08', 'yyyy-MM-dd');
SELECT to_date(current_date, 'yyyy-MM-dd');
quarter 将1年4等分(range 1 to 4)
SELECT quarter('2016-08-31');
日期、时间计算
SELECT months_between('2021-02-28 10:30:00', '2021-03-30');
.
.