【SAS NOTES】实际分析应用

 1 data guanhui.visit_8_12;/*导入客户访问情况*/
 2     infile 'E:\****.txt' dlm='|' firstobs=2;
 3     input user :$11. serv_number :$11. log_time :$16. log_IP :$15. content :$100.;
 4     log_date=scan(log_time,1,' ');
 5         log_year=scan(log_date,1,'-');
 6         log_month=scan(log_date,2,'-');
 7         log_day=scan(log_date,3,'-');
 8     log_clock=scan(log_time,2,' ');
 9         log_hour=scan(log_clock,1,':');
10         log_minute=scan(log_clock,2,':');
11     visit_time=DHMS(mdy(log_month,log_day,log_year),log_hour,log_minute,0);
12 run;
13 proc print data=guanhui.visit_8_12 (firstobs=1 obs=12);
14     format visit_time datetime18.;
15 run;
16 data guanhui.buydetail;/*交易明细汇总*/
17     infile 'E:\****.txt' dlm='|' firstobs=2;
18     input serv_number :$11. pay_way :$50. fee pay_date :$19. pay_month :$10. goods_name :$100. goods_detail :$100.;
19     buy_date=scan(pay_date,1,' ');
20     buy_clock=scan(pay_date,2,' ');
21         buy_year=scan(buy_date,1,'-');
22         buy_month=scan(buy_date,2,'-');
23         buy_day=scan(buy_date,3,'-');
24         buy_hour=scan(buy_clock,1,':');
25         buy_minute=scan(buy_clock,2,':');
26         buy_second=scan(buy_clock,3,':');
27     buy_time=DHMS(mdy(buy_month,buy_day,buy_year),buy_hour,buy_minute,buy_second);
28 run;
29 proc print data=guanhui.buydetail (firstobs=1 obs=12);
30     format buy_time datetime18.;
31 run;
32 data guanhui.existuser;/*存量客户明细*/
33     infile 'E:\****.txt' dlm='|' firstobs=2;
34     input user :$20. serv_number :$11. logon_date :$15.;
35         logon_year=scan(logon_date,1,'-');
36         logon_month=scan(logon_date,2,'-');
37         logon_day=scan(logon_date,3,'-');
38         logon_time=mdy(logon_month,logon_day,logon_year);
39 run;
40 proc print data=guanhui.existuser (firstobs=1 obs=10);
41     format logon_time mmddyy8.;
42 run;
43 data guanhui.activeuser;/*2012年活跃用户*/
44     infile 'E:\*****.txt' dlm='09'x firstobs=2;
45     input user :$15. serv_number :$11. lastlog_time :$20.;
46     lastlog_date=scan(lastlog_time,1,' ');
47         lastlog_year=scan(lastlog_date,1,'-');
48         lastlog_month=scan(lastlog_date,2,'-');
49         lastlog_day=scan(lastlog_date,3,'-');
50     lastlog_clock=scan(lastlog_time,2,' ');
51         lastlog_hour=scan(lastlog_clock,1,':');
52         lastlog_minute=scan(lastlog_clock,2,':');
53     lastlog_t=DHMS(mdy(lastlog_month,lastlog_day,lastlog_year),lastlog_hour,lastlog_minute,0);
54 run;
55 proc print data=guanhui.activeuser (firstobs=1 obs=10);
56     format lastlog_t datetime18.;
57 run;
58 /*以上部分为数据导入部分,下面为数据分析部分*/
59 data work.buedetail_same;
60     set guanhui.buydetail;
61     drop buy_date buy_clock buy_year buy_month buy_day buy_hour buy_minute buy_second pay_date pay_month;
62     format buy_time datetime18.;
63     month=MONTH(datepart(buy_time));
64     day=DAY(datepart(buy_time));
65     hour=HOUR(buy_time);
66     weekday=WEEKDAY(datepart(buy_time));
67     quter=QTR(datepart(buy_time));
68 run;
69 proc print data=work.buedetail_same (firstobs=1 obs=100);
70 run;
71 proc tabulate data=work.buedetail_same out=guanhui.buydetail_evedetailfee;
72     class month weekday hour goods_name;
73     var fee;
74     table month,(max min mean sum median)*fee;
75     table weekday,(max min mean sum median)*fee;
76     table hour,(max min mean sum median)*fee;
77     table goods_name,(max min mean sum median)*fee;
78 run;
79 proc print data=guanhui.buydetail_evedetailfee;
80 run;

数据导入主要为 : $ scan mdy dhms 等函数及字符处理运用。

在检查中print可以用(firstobs=1 obs=10)来显示部分数据。

在分析处理部分中:

1、用data set 来从源数据中取数以及生成新变量来进行分析处理。

2、此次常规分析汇总功能用proc tabulate过程。

3、在常规分析中对日期的处理分析经常遇到,在源数据中形成一个完整日期格式的变量,在分析中注意对该变量的函数处理需要相应格式的支持。所以datepart()取出date变量中日期部分很重要,否则会出错。

 

posted on 2013-01-30 11:11  colipso  阅读(372)  评论(0编辑  收藏  举报

导航