1、先将QQ群的消息记录以.txt文件格式导出来,保存路径及名称自己定义(在本文我导出到Y盘,命名为test.txt)
2、程序如下:
data statistics1; if _n_=1 then do; patternid1=prxparse("/\s\w*-*\w*-*\w*\(\d+\)/"); patternid2=prxparse("/\(\d+\)/"); end; retain patternid1 patternid2; infile 'Y:\test.txt' truncover; input string $200.; length name $50. qq $30.; call prxsubstr(patternid1,string,start1,length1); call prxsubstr(patternid2,string,start2,length2); if start1 and start2 gt 0 then do; name=substrn(string,start1+1,length1-1); qq=substrn(string,start2+1,length2-2); output; end; run; data statistics2; if _n_=1 then do; patternid1=prxparse("/\s\w*-*\w*-*\w*<(\w*|\d*)@(\w*|\d*).\w+>/"); patternid2=prxparse("/<(\w*|\d*)@(\w*|\d*).\w+>/"); end; retain patternid1 patternid2; infile 'Y:\test.txt' truncover; input string $200.; length name $50. qq $30.; call prxsubstr(patternid1,string,start1,length1); call prxsubstr(patternid2,string,start2,length2); if start1 and start2 gt 0 then do; name=substrn(string,start1+1,length1-1); qq=substrn(string,start2+1,length2-2); output; end; run; data qunname; infile 'Y:\test.txt' truncover firstobs=6 obs=6; input @'消息对象:' qunname $; call symput('_qunname',qunname); run; data statistics(drop=patternid1 patternid2 string start1 start2 length1 length2); set statistics1 statistics2; where qq^='10000'; n=_n_; run; proc sort data=statistics;by qq n;run; data match; set statistics; by qq n; if last.qq then output; drop n; run; data _null_; set statistics nobs=t; call symput("_nobs",t); stop; run; proc sql; create table rtf as select qq,n(qq) as frequency,n(qq)/&_nobs.*100 as rate from statistics group by qq order by 1; quit; data rtf; merge rtf match; by qq; run; proc sort data=rtf;by descending frequency;run; options nodate nonumber; ods results=off; title; footnote; ods listing close; ods rtf file='Y:\statistics.rtf' style=Mystyles bodytitle; proc report data=rtf nowindows style(report)={font_size=10.5pt pretext="QQ群(&_qunname.)成员发言次数及频率统计分析表" posttext="Author:liyongzhao,Created Date:2013-9-12." just=left} style(column)={font=('times new roman',12pt)}; column name frequency rate; define name/center style(column)={cellwidth=15%} 'QQ群成员'; define frequency/center style(column)={cellwidth=5%} '发言次数'; define rate/center style(column)={cellwidth=5%} format=6.3 '发言频率(%)'; run; ods rtf close;
3、打开Statistics.rtf即可查看结果。
结果类似下图(截取开头部分,隐去QQ群名称和个人QQ号码):
有道无术,术尚可进;有术无道,止于术也!