Teradata SQL programming
Teradata的SQL设计和Oracle真不是一个水平, 一点美感的没有.
上个世纪它靠着MPP一招鲜吃变天, 居然做了十多年数据仓库的老大, 时过境迁, 现在有不少SQL On Hadoop 产品已经出来了, 考虑到scale out的成本和能力, Teradata 数据仓库优势荡然全无. 将来必将会被SQL on Hadoop/Spark替代.
毕竟在Teradata上做了几年, 也该写点总结. 下面是我常用的一些编程知识
--字符串函数
SELECT 'FirstName' || ' ' || 'LastName' as Full_Name;
CHAR2HEXINT ('A')
--would result in the value ‘0041’.
LOWER()
substr()
TRIM()
UPPER()
CHARACTERS() 或 character_length() 得到字符串的长度
SELECT position ('a' in 'Name')
oracle version replace(), no way. write your code
---type() 可返回 字段的类型,
SELECT
CAST(100.00 AS DECIMAL(15,2)) * CAST(100.00 AS DECIMAL(15,2)) AS C1
, TYPE(C1)
--获取字段类型
select type('abc')
--get current date
select current_date
select current_time
select current_timestamp(0) --不带毫秒
select current_timestamp --带6位毫秒, 最高精度
--日期加减
select current_date+1 --得到明天
select add_months(current_timestamp,1) --得到下一个月
select add_months(current_date ,1)-current_date --两个日期相差多少天
select add_months(current_timestamp,1) -current_timestamp day(4) to second --两个时间戳相减, 仍是时间戳
--使用INTERVAL进行时间日期的增减
Select current_date + interval '1' year
Select current_date - interval '1' year
Select current_date + interval '1' month
Select current_date + interval '1' day
Select current_timestamp + interval '1' hour
Select current_timestamp + interval '1' minute
Select current_timestamp + interval '1' second
--两个 timestamp 相减, 结果仍是一个timestamp, 而日期相减, 结果为相差的天数
select cast (endtime as timestamp(0) format 'yyyy-mm-ddbhh:mi:ss') -
cast (starttime as timestamp(0) format 'yyyy-mm-ddbhh:mi:ss') DAY(4) TO SECOND ,
a. * from PETL.ETL_JOB_STATUS A
where 1=1
and jobstatus='Done';
The DAY(4) specifies four digits of precision, and allows for a maximum of 9999 days, or
approximately 27 years.
-- timestamp 相减, 前3个写法, 要求时间跨度不能太大, 会溢出的
select ((current_timestamp + interval '1' hour ) -current_timestamp) day(4) --得到的差多少天
select ((current_timestamp + interval '1' hour ) -current_timestamp) hour(4) --得到的差多少小时
select ((current_timestamp + interval '25' hour ) -current_timestamp) minute(4) --得到的差多少分
select ((current_timestamp + interval '1' hour ) -current_timestamp) day(4) to second --得到的是timestamp
--统计执行总时长
select txdate
,sum(extract(DAY from duration)) * 24.000
+sum(extract(HOUR from duration)) *1.000
+sum(extract(MINUTE from duration)) /60.000
+sum(extract(SECOND from duration)) /3600.000
as duration_hours from
(
select cast (endtime as timestamp(0) format 'yyyy-mm-ddbhh:mi:ss') -
cast (starttime as timestamp(0) format 'yyyy-mm-ddbhh:mi:ss') day(4) TO SECOND duration,
txdate from PETL.ETL_JOB_STATUS A
where 1=1
and jobstatus='Done'
and A.txdate>=date'2012-06-01'
and A.txdate<date'2012-07-19'
) xx
group by xx.txdate
order by xx.txdate
;
--从日期中提取年月日
select EXTRACT(YEAR FROM current_date)
YEAR
MONTH
DAY
HOUR
MINUTE
SECOND
TIMEZONE_HOUR
TIMEZONE_MINUTE
--几个有用的查询
select user
select session
select role
select * from sys_calendar.calendar
--字符转日期
select date'2012-05-12'
select CAST('20120512' AS DATE FORMAT 'YYYYMMDD')
--字符转 timestamp
select cast ('20120512 231056' as timestamp(0) FORMAT 'YYYYMMDDBHHMISS' )
--日期 转 字符串
--recommended usage, statement 1
SELECT CAST(cast(current_date AS FORMAT 'yyyymmdd') as varchar(8))
select current_date (format 'YYYYMMDD') (varchar(8))
--timestamp 转 字符串
--recommended usage, statement 1
SELECT CAST(CAST(TIMESTAMP'2010-03-12 14:32:45' AS FORMAT 'yyyymmddbhh:mi:ssbt') AS varchar(20));
select current_timestamp (format 'YYYYMMDDBHHMISS') (varchar(15))
SELECT CAST(TIMESTAMP'2010-03-12 14:32:45' AS FORMAT 'yyyymmddbhh:mi:ssbt') (varchar(20));
--error when runing
select current_date (format 'YYYYMMDD') (varchar(8))||'abc'
--how to achieve
select CAST(cast(current_date AS FORMAT 'yyyymmdd') as varchar(8))||'abc'
--合并日期和时间 为 timestamp
SELECT CAST(CAST(CURRENT_DATE AS FORMAT 'YYYY-MM-DD') || ' ' || CAST(CAST(CURRENT_TIME AS FORMAT 'HH:MI:SS') AS CHAR(8)) AS TIMESTAMP(0));
--searched case 语句
case
when AA=v1 then r1
when AA=v2 then r2
else null
end
--value case 语句
case AA
when v1 then r1
when v2 then r2
else null
end
--case 变种 NULLIF
NULLIF returns NULL if its arguments are equal. Otherwise, it returns its first argument,
scalar_expression_1.
--case 变种 COALESCE
COALESCE returns NULL if all its arguments evaluate to null. Otherwise, it returns the value
of the first non-null argument in the scalar_expression list.
oracle:nvl(f1,f2...),teradata:coalesce(f1,f2...)
=====================================
建表
=====================================
--创建 MULTISET 表
MULTISET:默认为 SET;
NO LOG:默认为 LOG,LOG指示维护交易日志,NO LOG 的性能更好;
create MULTISET table t1
(f1 integer, f2 integer) PRIMARY INDEX ( f1 ) ;
;
--创建临时表
CREATE MULTISET TABLE PDATA.EQP_PERF_HIS_SS1_CUR_I AS PDATA.EQP_PERF_HIS
WITH NO DATA
PRIMARY index (fab_code)
;
CREATE MULTISET TABLE PDATA.EQP_PERF_HIS_SS1_CUR_I AS (select * from PDATA.EQP_PERF_HIS)
WITH NO DATA
--WITH DATA
PRIMARY index (fab_code)
;
真正的临时表 volatile , session 结束后自动drop, 也可以手动删除
CREATE VOLATILE MULTISET TABLE PDATA.EQP_PERF_HIS_SS1_CUR_I AS (select * from PDATA.EQP_PERF_HIS)
WITH NO DATA
--WITH DATA
PRIMARY index (fab_code)
还有一种 GLOBAL TEMPORARY TABLE, 不常用.
大小写敏感
--default, 大小写不敏感
select f from (select 'a' f ) dual_a where 1=1 and f = 'A'
select f from (select 'a' f ) dual_a where 1=1 and f like '%A%'
--大小写敏感的写法
select f from (select 'a' f ) dual_a where 1=1 and f(casespecific) like '%A%'
select * from scott.emp where ename(CASESPECIFIC) = 'FAN';--使用关键字CASESPECIFIC将区分大小写
转义 _ 字符, 下例是使用\取转义_
select * from like_escape_test where some_text like '%\_%' escape '\';
top n 语句
select top 10 * from table_a order by field_a;
UPDATE Specifying a Correlation Name for the Updated Table in the FROM Clause
需要说明的是, SET 子句中的目标字段不能加表名alias.
UPDATE e
FROM employee AS e, department AS d
SET salary = salary * 1.05
WHERE e.emp_no = d.emp_no
AND d.name LIKE '%Support%'
update join 语句
UPDATE employee
SET salary_amount=salary_amount * 1.10
WHERE employee.dep_no = department.dep_no
AND department.name LIKE '%Support%'
;
Note: In an update, you can't use the ON clause,
so the join condition is specified in the WHERE clause.
在SP中, 可以使用变量, 但在Macro中, 是不能使用变量. 声明变量必须放在SP的开头部分. 语法为:
DECLARE vcount INTEGER DEFAULT 0;
DECLARE temp1, par1 VARCHAR(40) DEFAULT NULL;
Teradata没有 oracle的打印功能, 下面的语句并不能输出
PRINT 'EmpNo:', vcount;
使用游标 cursor
FOR loopvar AS cur1 CURSOR FOR
SELECT employee_number, department_number FROM employee
DO
PRINT 'EmpNo:', loopvar.employee_number;
END FOR;
上面的例子中示范了游标的规则:
1.声明游标,需要使用 FOR 语句。
2.要赋予游标一个名字,例子中的名字为 cur1
3.要给循环赋一个名字 loopvar
定义SP
REPLACE PROCEDURE PDATA.SP_WAT_PARAMETERS()
BEGIN
END;
定义Macro
REPLACE MACRO MARTVIEW_KPI.EIS_INDEX_RESULT_MACRO(SQL_Date VARCHAR(20))
AS
(
);
-- 动态执行sql语句
--===========================
SET Sql_text = 'DELETE FROM temp_Table' ;
CALL DBC.SYSEXECSQL(:Sql_text) ;
=====================================
-- bteq 命令行工具
=====================================
bteq工具的调用方法是:
bteq <sql_file.btq >log.txt
下面是sql_file.btq文件的内容
.LOGON ip/userid,pwd
drop table SMCTA.SS1SDBSDB_TB_INFO_EQP4687 ;
;
.IF ERRORCODE <> 0 THEN .GOTO QUITWITHERROR;
SELECT 1 ;
.IF ACTIVITYCOUNT = 0 THEN .GOTO QUITWITHNOERROR;
.GOTO QUITWITHNOERROR;
.LABEL QUITWITHERROR
.LOGOFF;
.QUIT 12;
.LABEL QUITWITHNOERROR
.LOGOFF;
.QUIT 0;
.LOGOFF
上面是sql_file.btq文件的内容
=====================================
-- jdbc访问
=====================================
JDBC connection string:
url="jdbc:teradata://edwprd/TMODE=TERA,CHARSET=ASCII,CLIENT_CHARSET=cp936,DATABASE=TESTDB,lob_support=off"
edwprd为IP, 考虑到Teradata是多节点一体机, 最好是在hosts中, 定义一个域名解析规则, 这样就有了多节点冗余的功能.
Hosts:
153.65.129.189 edwprd dbccop1
153.65.129.190 edwprd dbccop2
153.65.129.191 edwprd dbccop3
153.65.129.192 edwprd dbccop4
上个世纪它靠着MPP一招鲜吃变天, 居然做了十多年数据仓库的老大, 时过境迁, 现在有不少SQL On Hadoop 产品已经出来了, 考虑到scale out的成本和能力, Teradata 数据仓库优势荡然全无. 将来必将会被SQL on Hadoop/Spark替代.
毕竟在Teradata上做了几年, 也该写点总结. 下面是我常用的一些编程知识
--字符串函数
SELECT 'FirstName' || ' ' || 'LastName' as Full_Name;
CHAR2HEXINT ('A')
--would result in the value ‘0041’.
LOWER()
substr()
TRIM()
UPPER()
CHARACTERS() 或 character_length() 得到字符串的长度
SELECT position ('a' in 'Name')
oracle version replace(), no way. write your code
---type() 可返回 字段的类型,
SELECT
CAST(100.00 AS DECIMAL(15,2)) * CAST(100.00 AS DECIMAL(15,2)) AS C1
, TYPE(C1)
--获取字段类型
select type('abc')
--get current date
select current_date
select current_time
select current_timestamp(0) --不带毫秒
select current_timestamp --带6位毫秒, 最高精度
--日期加减
select current_date+1 --得到明天
select add_months(current_timestamp,1) --得到下一个月
select add_months(current_date ,1)-current_date --两个日期相差多少天
select add_months(current_timestamp,1) -current_timestamp day(4) to second --两个时间戳相减, 仍是时间戳
--使用INTERVAL进行时间日期的增减
Select current_date + interval '1' year
Select current_date - interval '1' year
Select current_date + interval '1' month
Select current_date + interval '1' day
Select current_timestamp + interval '1' hour
Select current_timestamp + interval '1' minute
Select current_timestamp + interval '1' second
--两个 timestamp 相减, 结果仍是一个timestamp, 而日期相减, 结果为相差的天数
select cast (endtime as timestamp(0) format 'yyyy-mm-ddbhh:mi:ss') -
cast (starttime as timestamp(0) format 'yyyy-mm-ddbhh:mi:ss') DAY(4) TO SECOND ,
a. * from PETL.ETL_JOB_STATUS A
where 1=1
and jobstatus='Done';
The DAY(4) specifies four digits of precision, and allows for a maximum of 9999 days, or
approximately 27 years.
-- timestamp 相减, 前3个写法, 要求时间跨度不能太大, 会溢出的
select ((current_timestamp + interval '1' hour ) -current_timestamp) day(4) --得到的差多少天
select ((current_timestamp + interval '1' hour ) -current_timestamp) hour(4) --得到的差多少小时
select ((current_timestamp + interval '25' hour ) -current_timestamp) minute(4) --得到的差多少分
select ((current_timestamp + interval '1' hour ) -current_timestamp) day(4) to second --得到的是timestamp
--统计执行总时长
select txdate
,sum(extract(DAY from duration)) * 24.000
+sum(extract(HOUR from duration)) *1.000
+sum(extract(MINUTE from duration)) /60.000
+sum(extract(SECOND from duration)) /3600.000
as duration_hours from
(
select cast (endtime as timestamp(0) format 'yyyy-mm-ddbhh:mi:ss') -
cast (starttime as timestamp(0) format 'yyyy-mm-ddbhh:mi:ss') day(4) TO SECOND duration,
txdate from PETL.ETL_JOB_STATUS A
where 1=1
and jobstatus='Done'
and A.txdate>=date'2012-06-01'
and A.txdate<date'2012-07-19'
) xx
group by xx.txdate
order by xx.txdate
;
--从日期中提取年月日
select EXTRACT(YEAR FROM current_date)
YEAR
MONTH
DAY
HOUR
MINUTE
SECOND
TIMEZONE_HOUR
TIMEZONE_MINUTE
--几个有用的查询
select user
select session
select role
select * from sys_calendar.calendar
--字符转日期
select date'2012-05-12'
select CAST('20120512' AS DATE FORMAT 'YYYYMMDD')
--字符转 timestamp
select cast ('20120512 231056' as timestamp(0) FORMAT 'YYYYMMDDBHHMISS' )
--日期 转 字符串
--recommended usage, statement 1
SELECT CAST(cast(current_date AS FORMAT 'yyyymmdd') as varchar(8))
select current_date (format 'YYYYMMDD') (varchar(8))
--timestamp 转 字符串
--recommended usage, statement 1
SELECT CAST(CAST(TIMESTAMP'2010-03-12 14:32:45' AS FORMAT 'yyyymmddbhh:mi:ssbt') AS varchar(20));
select current_timestamp (format 'YYYYMMDDBHHMISS') (varchar(15))
SELECT CAST(TIMESTAMP'2010-03-12 14:32:45' AS FORMAT 'yyyymmddbhh:mi:ssbt') (varchar(20));
--error when runing
select current_date (format 'YYYYMMDD') (varchar(8))||'abc'
--how to achieve
select CAST(cast(current_date AS FORMAT 'yyyymmdd') as varchar(8))||'abc'
--合并日期和时间 为 timestamp
SELECT CAST(CAST(CURRENT_DATE AS FORMAT 'YYYY-MM-DD') || ' ' || CAST(CAST(CURRENT_TIME AS FORMAT 'HH:MI:SS') AS CHAR(8)) AS TIMESTAMP(0));
--searched case 语句
case
when AA=v1 then r1
when AA=v2 then r2
else null
end
--value case 语句
case AA
when v1 then r1
when v2 then r2
else null
end
--case 变种 NULLIF
NULLIF returns NULL if its arguments are equal. Otherwise, it returns its first argument,
scalar_expression_1.
--case 变种 COALESCE
COALESCE returns NULL if all its arguments evaluate to null. Otherwise, it returns the value
of the first non-null argument in the scalar_expression list.
oracle:nvl(f1,f2...),teradata:coalesce(f1,f2...)
=====================================
建表
=====================================
--创建 MULTISET 表
MULTISET:默认为 SET;
NO LOG:默认为 LOG,LOG指示维护交易日志,NO LOG 的性能更好;
create MULTISET table t1
(f1 integer, f2 integer) PRIMARY INDEX ( f1 ) ;
;
--创建临时表
CREATE MULTISET TABLE PDATA.EQP_PERF_HIS_SS1_CUR_I AS PDATA.EQP_PERF_HIS
WITH NO DATA
PRIMARY index (fab_code)
;
CREATE MULTISET TABLE PDATA.EQP_PERF_HIS_SS1_CUR_I AS (select * from PDATA.EQP_PERF_HIS)
WITH NO DATA
--WITH DATA
PRIMARY index (fab_code)
;
真正的临时表 volatile , session 结束后自动drop, 也可以手动删除
CREATE VOLATILE MULTISET TABLE PDATA.EQP_PERF_HIS_SS1_CUR_I AS (select * from PDATA.EQP_PERF_HIS)
WITH NO DATA
--WITH DATA
PRIMARY index (fab_code)
还有一种 GLOBAL TEMPORARY TABLE, 不常用.
大小写敏感
--default, 大小写不敏感
select f from (select 'a' f ) dual_a where 1=1 and f = 'A'
select f from (select 'a' f ) dual_a where 1=1 and f like '%A%'
--大小写敏感的写法
select f from (select 'a' f ) dual_a where 1=1 and f(casespecific) like '%A%'
select * from scott.emp where ename(CASESPECIFIC) = 'FAN';--使用关键字CASESPECIFIC将区分大小写
转义 _ 字符, 下例是使用\取转义_
select * from like_escape_test where some_text like '%\_%' escape '\';
top n 语句
select top 10 * from table_a order by field_a;
UPDATE Specifying a Correlation Name for the Updated Table in the FROM Clause
需要说明的是, SET 子句中的目标字段不能加表名alias.
UPDATE e
FROM employee AS e, department AS d
SET salary = salary * 1.05
WHERE e.emp_no = d.emp_no
AND d.name LIKE '%Support%'
update join 语句
UPDATE employee
SET salary_amount=salary_amount * 1.10
WHERE employee.dep_no = department.dep_no
AND department.name LIKE '%Support%'
;
Note: In an update, you can't use the ON clause,
so the join condition is specified in the WHERE clause.
在SP中, 可以使用变量, 但在Macro中, 是不能使用变量. 声明变量必须放在SP的开头部分. 语法为:
DECLARE vcount INTEGER DEFAULT 0;
DECLARE temp1, par1 VARCHAR(40) DEFAULT NULL;
Teradata没有 oracle的打印功能, 下面的语句并不能输出
PRINT 'EmpNo:', vcount;
使用游标 cursor
FOR loopvar AS cur1 CURSOR FOR
SELECT employee_number, department_number FROM employee
DO
PRINT 'EmpNo:', loopvar.employee_number;
END FOR;
上面的例子中示范了游标的规则:
1.声明游标,需要使用 FOR 语句。
2.要赋予游标一个名字,例子中的名字为 cur1
3.要给循环赋一个名字 loopvar
定义SP
REPLACE PROCEDURE PDATA.SP_WAT_PARAMETERS()
BEGIN
END;
定义Macro
REPLACE MACRO MARTVIEW_KPI.EIS_INDEX_RESULT_MACRO(SQL_Date VARCHAR(20))
AS
(
);
-- 动态执行sql语句
--===========================
SET Sql_text = 'DELETE FROM temp_Table' ;
CALL DBC.SYSEXECSQL(:Sql_text) ;
=====================================
-- bteq 命令行工具
=====================================
bteq工具的调用方法是:
bteq <sql_file.btq >log.txt
下面是sql_file.btq文件的内容
.LOGON ip/userid,pwd
drop table SMCTA.SS1SDBSDB_TB_INFO_EQP4687 ;
;
.IF ERRORCODE <> 0 THEN .GOTO QUITWITHERROR;
SELECT 1 ;
.IF ACTIVITYCOUNT = 0 THEN .GOTO QUITWITHNOERROR;
.GOTO QUITWITHNOERROR;
.LABEL QUITWITHERROR
.LOGOFF;
.QUIT 12;
.LABEL QUITWITHNOERROR
.LOGOFF;
.QUIT 0;
.LOGOFF
上面是sql_file.btq文件的内容
=====================================
-- jdbc访问
=====================================
JDBC connection string:
url="jdbc:teradata://edwprd/TMODE=TERA,CHARSET=ASCII,CLIENT_CHARSET=cp936,DATABASE=TESTDB,lob_support=off"
edwprd为IP, 考虑到Teradata是多节点一体机, 最好是在hosts中, 定义一个域名解析规则, 这样就有了多节点冗余的功能.
Hosts:
153.65.129.189 edwprd dbccop1
153.65.129.190 edwprd dbccop2
153.65.129.191 edwprd dbccop3
153.65.129.192 edwprd dbccop4