Spark

官方文档

spark sql 函数

表结构相关

显示该表的建表语句

SHOW CREATE TABLE `database`.`tableName`

显示该表的列名、列的类型、列的注释

DESC `database`.`tableName`

显示该表的列名

SHOW COLUMNS FROM `database`.`tableName`

读相关

字符串

-- 将字符串按某个字符分割成数组
select split('a,b,c', ',')[0]; -> a

-- 将字符串中的特定字符串 替换为 指定字符串
select replace('abc bca test', ' ',',') -> abc,bca,test

-- 截取字符串
select subStr('123456abcd',1,5) -> 12345

case when

(case `col` when xxx then  xxx  else xxx end ) as xxx;
select  
str, 
(case (split(str,'-')[0]) when split(str,'-')[0] then split(str,'-')[0]  else '' end )  as first_id,
 (case (split(str,'-')[1]) when split(str,'-')[1]  then split(str,'-')[1]  else '' end )  as sec_id,
  (case (split(str,'-')[2]) when split(str,'-')[2]  then split(str,'-')[2]  else '' end )  as third_id
  from (
  select '123-456' as str
  ) a
str     first_id sec_id third_id 
123-456 123      456

日期、时间

-- 当前时间,type:timeStamp
SELECT now();

-- 将字符串 转为 date 类型
SELECT to_date('2022-11-10 11:00:01')

-- 计算两个时间相差月份,type: double 
-- months_between(CAST(2022-11-20 AS TIMESTAMP), CAST(2022-05-02 AS TIMESTAMP), true)
select months_between('2022-11-20','2022-05-02')

-- 计算两个时间相差天数
-- DATEDIFF(now(), '2023-10-08 00:00:00')  
select DATEDIFF('2023-10-08 00:00:00', now())

-- timestamp
-- to_timestamp(String,formatter)
select to_timestamp('202211211100','yyyyMMddHHmm') time  2022-11-21 11:00:00.0

Map

-- 转为Map<K,V> 类型 
select map('key','value')

Array

--  判断数组是否包含'a'
select array_contains (split('a,b,c', ','),'a'); -> true
-- 转为 Array 类型
select array("1","2","3") 

Struct

-- 转为struct<col1:string,col2:string,col3:string> 类型
select struct('a','b','c');

Agg

// 把多行聚合成一行,type:list
collect_list(`col`)
// 同上,type: set
collect_set(`col`)

explode

// 将数组拆成多行
select explode(array("1","2","3")) as col
col
1
2
3

lateral view + explode

-- 将列名拼接起来
select id,  number from
(
select 1 as id , array("1","2","3") as arr
) a  lateral view explode (arr) as number

id number
1  1
1  2 
1  3

Json

-- 转为json 字符串
select to_json(struct('a','b','c')); -> {"col1":"a","col2":"b","col3":"c"}
select to_json(map('key','value')); -> {"key":"value"}
select to_json(split('a,b,c', ',')); -> ["a","b","c"]

-- 解析json
select get_json_object('["a","b","c"]','$[0]'); -> a
select get_json_object('["a","b","c"]','$[*]'); -> a,b,c

select get_json_object('{"key":"value"}','$.key')  -> value

写相关

插入数据进某表

INSERT OVERWRITE TABLE  `database`.`tableName`
(
  SELECT * FROM `tableName2`
2
posted @   Eiffelzero  阅读(15)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· 开源Multi-agent AI智能体框架aevatar.ai,欢迎大家贡献代码
· Manus重磅发布:全球首款通用AI代理技术深度解析与实战指南
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
点击右上角即可分享
微信分享提示