【SAS NOTES】实际分析应用
1 data guanhui.visit_8_12;/*导入客户访问情况*/ 2 infile 'E:\****.txt' dlm='|' firstobs=2; 3 input user :$11. serv_number :$11. log_time :$16. log_IP :$15. content :$100.; 4 log_date=scan(log_time,1,' '); 5 log_year=scan(log_date,1,'-'); 6 log_month=scan(log_date,2,'-'); 7 log_day=scan(log_date,3,'-'); 8 log_clock=scan(log_time,2,' '); 9 log_hour=scan(log_clock,1,':'); 10 log_minute=scan(log_clock,2,':'); 11 visit_time=DHMS(mdy(log_month,log_day,log_year),log_hour,log_minute,0); 12 run; 13 proc print data=guanhui.visit_8_12 (firstobs=1 obs=12); 14 format visit_time datetime18.; 15 run; 16 data guanhui.buydetail;/*交易明细汇总*/ 17 infile 'E:\****.txt' dlm='|' firstobs=2; 18 input serv_number :$11. pay_way :$50. fee pay_date :$19. pay_month :$10. goods_name :$100. goods_detail :$100.; 19 buy_date=scan(pay_date,1,' '); 20 buy_clock=scan(pay_date,2,' '); 21 buy_year=scan(buy_date,1,'-'); 22 buy_month=scan(buy_date,2,'-'); 23 buy_day=scan(buy_date,3,'-'); 24 buy_hour=scan(buy_clock,1,':'); 25 buy_minute=scan(buy_clock,2,':'); 26 buy_second=scan(buy_clock,3,':'); 27 buy_time=DHMS(mdy(buy_month,buy_day,buy_year),buy_hour,buy_minute,buy_second); 28 run; 29 proc print data=guanhui.buydetail (firstobs=1 obs=12); 30 format buy_time datetime18.; 31 run; 32 data guanhui.existuser;/*存量客户明细*/ 33 infile 'E:\****.txt' dlm='|' firstobs=2; 34 input user :$20. serv_number :$11. logon_date :$15.; 35 logon_year=scan(logon_date,1,'-'); 36 logon_month=scan(logon_date,2,'-'); 37 logon_day=scan(logon_date,3,'-'); 38 logon_time=mdy(logon_month,logon_day,logon_year); 39 run; 40 proc print data=guanhui.existuser (firstobs=1 obs=10); 41 format logon_time mmddyy8.; 42 run; 43 data guanhui.activeuser;/*2012年活跃用户*/ 44 infile 'E:\*****.txt' dlm='09'x firstobs=2; 45 input user :$15. serv_number :$11. lastlog_time :$20.; 46 lastlog_date=scan(lastlog_time,1,' '); 47 lastlog_year=scan(lastlog_date,1,'-'); 48 lastlog_month=scan(lastlog_date,2,'-'); 49 lastlog_day=scan(lastlog_date,3,'-'); 50 lastlog_clock=scan(lastlog_time,2,' '); 51 lastlog_hour=scan(lastlog_clock,1,':'); 52 lastlog_minute=scan(lastlog_clock,2,':'); 53 lastlog_t=DHMS(mdy(lastlog_month,lastlog_day,lastlog_year),lastlog_hour,lastlog_minute,0); 54 run; 55 proc print data=guanhui.activeuser (firstobs=1 obs=10); 56 format lastlog_t datetime18.; 57 run; 58 /*以上部分为数据导入部分,下面为数据分析部分*/ 59 data work.buedetail_same; 60 set guanhui.buydetail; 61 drop buy_date buy_clock buy_year buy_month buy_day buy_hour buy_minute buy_second pay_date pay_month; 62 format buy_time datetime18.; 63 month=MONTH(datepart(buy_time)); 64 day=DAY(datepart(buy_time)); 65 hour=HOUR(buy_time); 66 weekday=WEEKDAY(datepart(buy_time)); 67 quter=QTR(datepart(buy_time)); 68 run; 69 proc print data=work.buedetail_same (firstobs=1 obs=100); 70 run; 71 proc tabulate data=work.buedetail_same out=guanhui.buydetail_evedetailfee; 72 class month weekday hour goods_name; 73 var fee; 74 table month,(max min mean sum median)*fee; 75 table weekday,(max min mean sum median)*fee; 76 table hour,(max min mean sum median)*fee; 77 table goods_name,(max min mean sum median)*fee; 78 run; 79 proc print data=guanhui.buydetail_evedetailfee; 80 run;
数据导入主要为 : $ scan mdy dhms 等函数及字符处理运用。
在检查中print可以用(firstobs=1 obs=10)来显示部分数据。
在分析处理部分中:
1、用data set 来从源数据中取数以及生成新变量来进行分析处理。
2、此次常规分析汇总功能用proc tabulate过程。
3、在常规分析中对日期的处理分析经常遇到,在源数据中形成一个完整日期格式的变量,在分析中注意对该变量的函数处理需要相应格式的支持。所以datepart()取出date变量中日期部分很重要,否则会出错。