python实现简单关联规则Apriori算法
1 from itertools import combinations 2 from copy import deepcopy 3 4 5 # 导入数据,并剔除支持度计数小于min_support的1项集 6 def load_data(data): 7 I_dict = {} 8 for i in data: 9 for j in i: 10 I_dict[j] = I_dict.get(j, 0) + 1 11 F_dict = deepcopy(I_dict) 12 for k in I_dict.keys(): 13 if F_dict.get(k) < min_support: 14 del F_dict[k] 15 return F_dict 16 17 18 # 判断频繁项集是否大于min_support 19 def get_support_set(p_set): 20 item_supp_set = [] 21 for item in p_set: 22 count = 0 23 for ds in data_set: 24 if item.issubset(ds): 25 count += 1 26 if count >= min_support: 27 item_supp_set.append([item, count]) 28 return item_supp_set 29 30 31 # 找出所有频繁项集 32 # 以二项集为初始集 33 def get_all_items(two_set, k=3): 34 all_frequent = [] 35 flag = True 36 while flag: 37 mid_set = [] 38 temp = [] 39 t_ = [ks[0] for ks in two_set] 40 for kk in t_: 41 for tt in kk: 42 if tt not in temp: 43 temp.append(tt) 44 k_ = [set(t) for t in combinations(temp, k)] 45 for ff in k_: 46 count_k = 0 47 for d in t_: 48 if ff.issuperset(d): 49 count_k += 1 50 if count_k == k: 51 mid_set.append(ff) 52 frequent_mid_set = get_support_set(mid_set) 53 if mid_set: 54 k += 1 55 two_set = frequent_mid_set 56 all_frequent.extend(frequent_mid_set) 57 else: 58 flag = False 59 return all_frequent 60 61 62 if __name__ == '__main__': 63 data = [['I1', 'I2', 'I5'], 64 ['I2', 'I4'], 65 ['I2', 'I3'], 66 ['I1', 'I2', 'I4'], 67 ['I1', 'I3'], 68 ['I2', 'I3'], 69 ['I1', 'I3'], 70 ['I1', 'I2', 'I3', 'I5'], 71 ['I1', 'I2', 'I3']] 72 data_set = [set(d) for d in data] 73 min_support = 1 74 one = [[{lk}, lv] for lk, lv in load_data(data).items()] 75 two = [set(t) for t in combinations(list(load_data(data).keys()), 2)] 76 two_f_set = get_support_set(two) 77 all_frequent_set = one + two_f_set + get_all_items(two_f_set) 78 for afs in all_frequent_set: 79 print(afs)
输出结果: