数据挖掘 Apriori Algorithm python实现
该算法主要是处理关联分析的;
大多书上面都会介绍,这里就不赘述了;
dataset=[[1,2,5],[2,4],[2,3],[1,2,4],[1,3],[2,3],[1,3],[1,2,3,5],[1,2,3]] def init(dataset): sset=[] for i in dataset: for j in i: if not [j] in sset: sset.append([j]) sset.sort() return list(map(frozenset,sset)) def scan(D,Ck,minsupport): # D:数据集;Ck候选集;minS:最小支持度 cnt={} for i in D: for j in Ck: if j.issubset(i): if j not in cnt.keys():cnt[j]=1 else : cnt[j]+=1 number=int(len(D)) Lk=[]#频繁k项集 supportdata={} for item in cnt: support=cnt[item]/number if support>=minsupport:#大于最小支持度就加入 Lk.append(item) supportdata[item]=support return Lk,supportdata def Link(Lk,k): #将频繁k-1项集拼接为候选k项集 Ck=[] length=len(Lk) for i in range(length): l1=list(Lk[i])[:k-2] l1.sort() for j in range(i+1,length): l2=list(Lk[j])[:k-2] l2.sort() if l1==l2: Ck.append(Lk[i]|Lk[j])# union return Ck def AprioriAlgo(dataset,minsupport): sset=init(dataset) L1,supportdata=scan(dataset,sset,minsupport) L=[L1] k=2 while(len(L[k-2])>0): l1=L[k-2] ck=Link(l1,k) print("ck: ",ck) lk,supk=scan(dataset,ck,minsupport) supportdata.update(supk) print("lk: ",lk)#频繁k项集 L.append(lk) k+=1 return L,supportdata L,supportdata=AprioriAlgo(dataset,minsupport=0.2)
EPFL - Fighting