巧用二进制

当我们根据筛选数据中的可用信息时,有时候需要对某几个变量进行组合,根据特定组合事件的发生概率进行筛选,例如有A,B,C....N种事件,事件发生错误的次数未知,规定错误事件发生概率低于0.1%为安全事件,如果我们要判断哪几种事件所组成的事件集合为安全事件,那我们就必须要用到排列组合了。

 

思路如下:

要从N种事件中找出符合条件的事件组合,就要找到N种事件的所有可能的组合。所有可能的组合即2的N次方。可能的组合用2的N次方个观测遍历,每一个观测计算每一种可能性,问题即解。

SAS里面有BINARY.函数,用来将NUMERIC转换成二进制。再通过二进制的0/1进行判别,问题即解。

 

贴上CODE:

  1 %let XXX=C:\Users\yant07\XXX;
  2 libname XXX "C:\Users\yant07\XXX";
  3 
  4 %macro tmf( 
  5              date=1|2|3
  6             ,data=DATA  
7
,average=20 8 ); 9 10 data _null_; 11 daten=count("&date","|")+1; 12 call symputx("daten",daten,G); 13 datec=compress("&date",,"dk"); 14 call symputx("datec",datec,G); 15 run; 16 17 %do d= 1 %to &daten; 18 %let date&d=%scan(%bquote(&date),&d,"|"); 19 20 data a&&date&d.; 21 set tmf.&data; 22 keep did doc_submitter_NTID_from_Design tmf_item_id date y; 23 rename doc_submitter_NTID_from_Design=id 24 tmf_item_id=file; 25 if ^missing(m_rule) then y=1; 26 else y=0; 27 date=input(scan(compress(put(QC_Task_Completed_Date,DATETIME27.6)),1,":"),anydtdte.); 28 if month(date)=&&date&d.; 29 format date date9.; 30 proc sort nodupkey; 31 by did id file; 32 run; 33 proc sql noprint; 34 create table aa&&date&d. as 35 select *,count(y) as nT&d 36 ,sum(y) as nF&d 37 from a&&date&d. 38 group by id,file; 39 proc sort;by id file descending date ; 40 proc sort nodupkey;by id file; 41 run; 42 quit; 43 44 /*modified by yant07 - count the average total file per each ID*/ 45 %end; 46 data Merged_file; 47 merge aa:; 48 by id file; 49 if ^missing(nT1) and ^missing(nT2) and ^missing(nT3) 50 and (nT1+nT2+nT3)/3>=&average. 51 ; 52 run; 53 54 proc sql noprint; 55 select distinct id ,count(distinct id) into: id separated by ",",:nid from Merged_file; 56 quit; 57 58 proc sql noprint; 59 create table file_id as 60 select id,count(distinct file) as file_id from Merged_file 61 group by id; 62 proc sort;by descending file_id; 63 quit; 64 65 %do d= 1 %to &daten; 66 %let date&d=%scan(%bquote(&date),&d,"|"); 67 68 %do i= 1 %to &nid; 69 %let id&i = %scan(%bquote(&id) ,&i, ",") ; 70 data &&id&i.&&date&d.; 71 set Merged_file; 72 where id="&&id&i"; 73 keep file id nt&d nf&d; 74 proc sort nodupkey; 75 by file id nt&d nf&d; 76 run; 77 proc sql noprint; 78 select count(file) into : countfile from &&id&i.&&date&d.; 79 select file into: nfile separated by "," from &&id&i.&&date&d.; 80 select nt&d into: ntfile separated by "," from &&id&i.&&date&d.; 81 select nf&d into: nffile separated by "," from &&id&i.&&date&d.; 82 quit; 83 data q_&&id&i.&&date&d.; 84 length combine $88.; 85 array nfile[&countfile] $10; 86 array ncfile[&countfile] $10; 87 array ntfile[&countfile] 8; 88 array nffile[&countfile] 8; 89 retain nf nt combine; 90 id="&&id&i."; 91 comb=2**&countfile; 92 fmt="Binary"||cats("&countfile")||"."; 93 do i = 1 to comb-1; 94 binary=reverse(putn(i,fmt)); 95 nf=0;nt=0;combine=""; 96 do j = 1 to &countfile; 97 nfile[j]=substr(binary,j,1); 98 nCfile[j]=scan("&nfile",j,","); 99 nTfile[j]=scan("&ntfile",j,","); 100 nFfile[j]=scan("&nffile",j,","); 101 nf=nf+nfile[j]*nffile[j]; 102 nt=nt+nfile[j]*ntfile[j]; 103 if compress(nfile[j])=1 then do; 104 combine=compress(combine)||"|"||compress(nCfile[j]); 105 end; 106 end; 107 count=count(binary,"1"); 108 rate=nf/nt; 109 if rate <=0.005 110 then output; 111 end; 112 keep id binary rate combine count; 113 rename count=count&d. 114 binary=binary&d. 115 rate=rate&d. 116 ; 117 proc sort;by id combine ; 118 run; 119 %end; 120 %end; 121 122 %do i= 1 %to &nid; 123 %let id&i = %scan(%bquote(&id) ,&i, ",") ; 124 125 data qq_&&id&i.; 126 merge 127 %do d= 1 %to &daten; 128 %let date&d=%scan(%bquote(&date),&d,"|"); 129 q_&&id&i.&&date&d.(in=a&d) 130 %end; 131 ; 132 if a1 and a2 and a3; 133 by id combine; 134 run; 135 %end; 136 data qqq; 137 set qq:; 138 keep id combine rate:; 139 if ^missing(combine); 140 rename combine=combination; 141 proc sort;by id combination; 142 run; 143 144 ods printer pdf file="&tmf\TMF_Combinations_&datec._average&average..pdf" ; 145 proc print data=qqq; 146 by id; 147 run; 148 ods printer close; 149 %mend; 150 151 %tmf ; 152 *proc datasets library=work memtype=data kill nolist; 153 quit;

 

by yant07

posted @ 2016-10-24 17:33  yant07  阅读(409)  评论(0编辑  收藏  举报