SAS--SQL过程(基础版)
本文总结SAS中的SQL过程编程技术(基础版)
Part 1:【select语句综述】
子句顺序(select和from是必要的其他可选):
select
from
where
group by
having
order by
1.建立逻辑库访问数据
libname resdat "F:\SAS\ResDat"; /*resdat是一个已经包含本文所用数据的SAS格式的数据集,通过该文件路径建立逻辑库来实现数据集访问*/
2.选择所有列
proc sql outobs=3; /*outobs=规定输出的观测个数*/
select* /*符号*表示选择所有列*/
from ResDat.dret; /*from+源数据表*/
run;
3.选择特定列
proc sql outobs=3;
title'股票代码和名称'; /*title设置表名*/
select stkcd,lstknm /*select+列名*/
from resdat.lstkinfo;
quit;
4.查看表属性
proc sql;
describe table resdat.lstkinfo;
quit;
5.创建说明列
proc sql outobs=3;
select 'stock code for',lstknm,'is',stkcd
from resdat.lstkinfo;
quit;
6.创建新列值
proc sql outobs=3;
title'trading sum';
select stkcd,lstknm,clpr*trdvol format=12.2 /*创建新列值,format用于设定输出格式*/
from resdat.qttndist;
quit;
proc sql outobs=3;
title 'trading sum';
select stkcd,lstknm,clpr*mcfacpr as adjpr format 8.2, /*为列分配别名*/
(calculated adjpr*trdvol) as trdsum format 12.2 /*使用别名引用一个计算过的列值时须使用calculated关键词*/
from resdat.qttndist;
quit;
7.case语句
proc sql outobs=3; /*case可以设定带有逻辑条件的语句*/
select stkcd,lstknm,lstdt,
case
when exchflg='1' then '上交所'
when exchflg='2' then '深交所'
else'所有交易所'
end as exchflg /*case最后必须使用end关键词*/
from resdat.lstkinfo
order by lstdt;
quit;
proc sql outobs=3; /*case的另一种表达*/
select stkcd,lstknm,lstdt,
case exchflg
when'1' then '上交所'
when'2' then '深交所'
else'所有交易所'
end as newexchflg
from resdat.lstkinfo;
quit;
8.order by语句
proc sql outobs=3;
select stkcd,lstknm,date,clpr*mcfacpr as adjpr format 8.2
from resdat.qttndist
order by adjpr desc; /*按照adjpr降序排列*/
quit;
proc sql outobs=3;
select stkcd,lstknm,date,clpr*mcfacpr as adjpr format 8.2
from resdat.qttndist
order by 4 desc; /*与上式效果同,按照第4列(adjpr)降序排列*/
quit;
9.where语句
proc sql outobs=3;
select lstknm,stkcd from resdat.lstkinfo
where stkcd in('000001' '600651' '000004'); /*in算符:找出括号内对应的观测*/
quit;
proc sql outobs=3;
select lstknm,stkcd from resdat.lstkinfo
where lzipsd is missing; /*is missing找出含缺失值的观测,与is null等价*/
quit;
proc sql;
create table stkinfo1991 as select*from resdat.lstkinfo
where lstdt between'1jan1991'd and '31dec1991'd; /*between and 算符:返回1991年的数据*/
quit;
proc sql;
select stkcd,lstknm from resdat.lstkinfo
where lstknm like'ST%'; /*like算符:匹配查询*/
quit;
proc sql;
select stkcd,lstknm from resdat.lstkinfo
where lstknm eqt'ST'; /*截短算符,与上等价*/
quit;
10.汇总函数
注:汇总函数计算时一般不考虑缺失值
proc sql;
select sum(dividend) format=8.2 as totledv /*sum函数:对dividend变量求和,生成新列totledv*/
from resdat.stk000001;
quit;
proc sql;
select count(*)as number /*count函数:汇总所有观测个数*/
from resdat.lstkinfo;
quit;
proc sql;
select count(distinct stkcd)as count /*存在重复观测值时的观测数汇总,所有相同值只汇总一次,忽略缺失值*/
from resdat.qttndist;
quit;
proc sql;
select avg(trdsum) as avgtrdsum format 13.2 /*avg函数:返回非空值的平均值*/
from resdat.idx000001;
quit;
若想考虑进来缺失值,可进行如下操作:
proc sql;
select
case
when trdsum is missing then 0 /*将缺失值置为0*/
else trdsum
end as newtrdsum,
avg(calculated newtrdsum)as avgtrdsum format 13.2
from resdat.idx000001;
quit;
proc sql outobs=3;
title'2005年股票月收益平均值';
select stkcd,avg(monret) as amret from resdat.monret
where'1jan2005'd<=date<='31dec2005'd
group by stkcd /*group by: 按stkcd进行分组*/
order by calculated amret desc; /*order by: 按amret进行排序*/
quit;
proc sql; /*列出2005年交易天数不小于240天的股票*/
select stkcd,count(*) as trday from resdat.dret
where '1jan2005'd<=date<='31dec2005'd
group by stkcd
having calculated trday>=240; /*group by+having语句*/
quit;
Part 2:【多表检索(横向连接)】
china:
usa:
1.内连接
proc sql;
select* from resdat.china,resdat.usa /*简单的内连接,返回where设定的匹配条件的连接行*/
where china.level=usa.level;
quit;
proc sql;
select* from resdat.china as a,resdat.usa as b /*使用别名,as可不写*/
where a.level=b,level;
quit;
proc sql;
select * from resdat.china a inner join resdat.china b /*使用关键词inner join的内连接,这里相当于自连接*/
on a.level=b.level
order by level;
quit;
proc sql outobs=3; /*多表连接*/
select a.stkcd,b.lstknm,c.clpr
from resdat.sampstk a,resdat.lstkinfo b,resdat.qttndist c
where a.stkcd=b.stkcd and b.stkcd=c.stkcd and a.stkcd=c.stkcd;
quit;
2.外连接
proc sql;
select * from resdat.china a left join resdat.usa b /*左外连接,以左边表为匹配条件,不匹配的在右边列为空值*/
on a.level=b.level;
quit;
proc sql;
select * from resdat.china a right join resdat.usa b /*右外连接:与上相反*/
on a.level=b.level;
quit;
proc sql;
select * from resdat.china a full join resdat.usa b /*完全外连接:包括所有不匹配的行(不管左表右表)*/
on a.level=b.level;
quit;
/*merge语句:需要对数据预排序*/
data merge;
merge a b;
by code;
run;
Part 3:【子查询语句】
proc sql;
select*from a
where code eq (select code from b where assistant='chen'); /*单值子查询*/
quit;
proc sql;
select stkcd,lstknm,lstdt from resdat.lstkinfo
where stkcd in (select stkcd from resdat.sampstk); /*多值子查询*/
quit;
proc sql;
select stkcd,yrret from resdat.yrret a
where (select stktype from resdat.lstkinfo b /*混合子查询*/
where a.stkcd=b.stkcd)='A'
and'1jan2005'd<=date<='31dec2005'd;
proc sql;
select stkcd,yrret from resdat.yrret a
where exists(select stktype from resdat.lstkinfo b /*exists语句:检测一组数据存在性,与上式等价*/
where a.stkcd=b.stkcd and stktype='A')
and'1jan2005'd<=date<='31dec2005'd;
quit;
Part 4:【SET算符:合并多个查询结果(纵向连接)】
a:
b:
proc sql; /*union:并集,合并两个查询结果,返回所有非重复观测*/
select*from a
union /*若使用unoin all:返回所有观测*/
select*from b;
quit;
proc sql; /*except:差集返回只属于第一个查询(a)的观测*/
select*from a
except
select*from b;
quit;
proc sql; /*intersect:交集,返回两个查询的公共部分*/
select*from a
intersect
select*from b;
quit;
proc sql; /*outer union:直接连接查询结果(横向),不合并相同名称和数据类型的列*/
select*from a
outer union /*若使用outer union corr:合并两表中相同名称的列*/
select*from b;
quit;
Part 5:【创建与更新表】
1.创建表
1)列定义方式创建表
proc sql;
create table newstkinfo
(
stkcd char(6),
lstknm char(20),
lstdt num informat=date9. /*规定输入和输出格式*/
format=date9.
);
quit;
2)从查询结果创建表
proc sql outobs=3;
create table stkinfo as
select stkcd,lstknm,lstdt
from resdat.lstkinfo;
quit;
3)通过复制创建表
proc sql;
create table stkinfo2 as select*from stkinfo;
quit;
4)/*通过like复制已有表的结构属性来创建空表*/
proc sql;
create table stkinfo3 like stkinfo2;
5)查看表格属性
proc sql;
describe table stkinfo3;
2.在表中插入行
1)
proc sql; /*set子句插入观测,赋值时漏掉某个列,默认缺失*/
insert into stkinfo3
set stkcd='000002',lstknm='万科A',lstdt='29jan1991'd
set stkcd='000004',lstknm='ST国农',lstdt='14jun1991'd;
quit;
2)
proc sql; /*values插入观测,每列都需要赋值,.和''可赋空值*/
insert into stkinfo3
values('000001','s深发展A',.)
values('000005','','10dec1990'd);
quit;
3)
proc sql; /*在表中插入查询结果*/
create table stkinfo4 like stkinfo3;
proc sql;
insert into stkinfo4
select* from stkinfo3 where lstdt is not missing;
quit;
3.更新表数据(update)
data invest;
input stkcd$ stknm $ inv;
cards;
000001 深发展 100000
000002 万科A 50000
000003 方正科技 40000
;
run;
1)同种方式更新
proc sql;
update invest
set inv=inv*1.1; /*把inv提高10%*/
quit;
2)不同方式更新
proc sql;
update invest set inv=inv*1.5 where stkcd='000001';
update invest set inv=inv*1.4 where stkcd='000002';
update invest set inv=inv-10000 where stkcd='000003';
select * from invest;
quit;
3)删除观测行
proc sql;
delete from invest where stkcd='000003';
quit;
4.列修改(alter)
1)add增加列
proc sql;
alter table invest
add total num label='总股本',ratio num label='投资比例';
quit;
2)利用update补充新增列的值
proc sql;
update invest set total=400000;
update invest set ratio=inv/total;
quit;
3)modify用于改变列的长度/输入格式/输出格式/标签
proc sql;
alter table invest
modify stkcd char(10) format=$ 10.;
update invest set stkcd='res'||stkcd; /*连接符*/
select*from invest (rename=(stkcd=resstkcd)); /*改变列名*/
quit;
4) 删除列
proc sql;
alter table invest
drop stknm;
5)删除表
proc sql;
drop table invest;
附件:本文所用数据集resdat