Apriori算法(C#)
AprioriMethod.cs
using System; using System.Collections.Generic; using System.Linq; using System.Web; /// <summary> ///AprioriMethod 的摘要说明 /// </summary> public class AprioriMethod { private readonly static int support = 2; // 支持度阈值 private readonly static double confidence = 0.7; // 置信度阈值 private readonly static char[] item_Split = { ';' }; // 项之间的分隔符 private readonly static string itemSplit = ";"; private readonly static String CON = "->"; // 项之间的分隔符 private readonly static List<String> transList = new List<String>(); //所有交易 public AprioriMethod() { // //TODO: 在此处添加构造函数逻辑 // //初始化交易记录 transList.Add("移动硬盘;电脑;手机;优盘"); transList.Add("电脑;优盘;"); transList.Add("电脑;优盘;"); transList.Add("手机;电脑;移动硬盘;"); transList.Add("移动硬盘;手机;"); transList.Add("电脑;手机;"); transList.Add("移动硬盘;手机;"); transList.Add("移动硬盘;电脑;手机;优盘;"); transList.Add("移动硬盘;电脑;手机;"); } public Dictionary<String, int> getFC() //计算所有频繁项集 { Dictionary<String, int> frequentCollections = new Dictionary<String, int>();//所有的频繁集 foreach (KeyValuePair<string, int> item in getItem1FC()) { if (frequentCollections.ContainsKey(item.Key)) { frequentCollections.Remove(item.Key); } frequentCollections.Add(item.Key, item.Value); } Dictionary<String, int> itemkFcMap = new Dictionary<String, int>(); foreach (KeyValuePair<string, int> item in getItem1FC()) { itemkFcMap.Add(item.Key, item.Value); } while (itemkFcMap != null && itemkFcMap.Count != 0) { Dictionary<String, int> candidateCollection = getCandidateCollection(itemkFcMap); List<String> ccKeySet = candidateCollection.Keys.ToList(); //对候选集项进行累加计数 foreach (String trans in transList) { foreach (String candidate in ccKeySet) { bool flag = true; // 用来判断交易中是否出现该候选项,如果出现,计数加1 String[] candidateItems = candidate.Split(item_Split, StringSplitOptions.RemoveEmptyEntries); foreach (String candidateItem in candidateItems) { if (trans.IndexOf(candidateItem + itemSplit) == -1) { flag = false; break; } } if (flag) { int count = candidateCollection[candidate]; candidateCollection.Remove(candidate); candidateCollection.Add(candidate, count + 1); } } } //从候选集中找到符合支持度的频繁集项 itemkFcMap.Clear(); foreach (String candidate in ccKeySet) { int count = candidateCollection[candidate]; if (count >= support) { itemkFcMap.Add(candidate, count); } } //合并所有频繁集 foreach (KeyValuePair<string, int> item in itemkFcMap) { if (frequentCollections.ContainsKey(item.Key)) { frequentCollections.Remove(item.Key); } frequentCollections.Add(item.Key, item.Value); } } return frequentCollections; } private Dictionary<String, int> getItem1FC() //计算所有频繁1项集 { Dictionary<String, int> sItem1Fc = new Dictionary<String, int>(); Dictionary<String, int> rItem1Fc = new Dictionary<String, int>(); //频繁1项集 foreach (String trans in transList) { String[] items = trans.Split(item_Split, StringSplitOptions.RemoveEmptyEntries); foreach (String item in items) { int count; if (sItem1Fc.ContainsKey(item + itemSplit)) { count = sItem1Fc[item + itemSplit]; sItem1Fc.Remove(item + itemSplit); sItem1Fc.Add(item + itemSplit, count + 1); } else { sItem1Fc.Add(item + itemSplit, 1); } } } List<String> keySet = sItem1Fc.Keys.ToList(); foreach (String key in keySet) { int count = sItem1Fc[key]; if (count >= support) { rItem1Fc.Add(key, count); } } return rItem1Fc; } private Dictionary<String, int> getCandidateCollection(Dictionary<String, int> itemkFcMap) //生成候选项集 { Dictionary<String, int> candidateCollection = new Dictionary<String, int>(); List<String> itemkSet1 = itemkFcMap.Keys.ToList(); List<String> itemkSet2 = itemkFcMap.Keys.ToList(); foreach (String itemk1 in itemkSet1) { foreach (String itemk2 in itemkSet2) { //进行连接 String[] tmp1 = itemk1.Split(item_Split, StringSplitOptions.RemoveEmptyEntries); String[] tmp2 = itemk2.Split(item_Split, StringSplitOptions.RemoveEmptyEntries); String c = ""; if (tmp1.Length == 1) { if (tmp1[0].CompareTo(tmp2[0]) < 0) { c = tmp1[0] + itemSplit + tmp2[0] + itemSplit; } } else { bool flag = true; for (int i = 0; i < tmp1.Length - 1; i++) { if (!tmp1[i].Equals(tmp2[i])) { flag = false; break; } } if (flag && (tmp1[tmp1.Length - 1].CompareTo(tmp2[tmp2.Length - 1]) < 0)) { c = itemk1 + tmp2[tmp2.Length - 1] + itemSplit; } } //进行剪枝 bool hasInfrequentSubSet = false; if (!c.Equals("")) { String[] tmpC = c.Split(item_Split, StringSplitOptions.RemoveEmptyEntries); for (int i = 0; i < tmpC.Length; i++) { String subC = ""; for (int j = 0; j < tmpC.Length; j++) { if (i != j) { subC = subC + tmpC[j] + itemSplit; } } if (!itemkFcMap.ContainsKey(subC)) { hasInfrequentSubSet = true; break; } } } else { hasInfrequentSubSet = true; } if (!hasInfrequentSubSet) { candidateCollection.Add(c, 0); } } } return candidateCollection; } public Dictionary<String, Double> getRelationRules(Dictionary<String, int> frequentCollection) //计算关联规则 { Dictionary<String, Double> relationRules = new Dictionary<String, Double>(); List<String> keySet = frequentCollection.Keys.ToList(); foreach (String key in keySet) { double countAll = frequentCollection[key]; String[] keyItems = key.Split(item_Split, StringSplitOptions.RemoveEmptyEntries); if (keyItems.Length > 1) { List<String> source = keyItems.ToList(); //Collections.addAll(source, keyItems); List<List<String>> result = new List<List<String>>(); buildSubSet(source, result); //获得source的所有非空子集 foreach (List<String> itemList in result) { if (itemList.Count < source.Count) { //只处理真子集 List<String> otherList = new List<String>(); foreach (String sourceItem in source) { if (!itemList.Contains(sourceItem)) { otherList.Add(sourceItem); } } String reasonStr = "";//前置 String resultStr = "";//结果 foreach (String item in itemList) { reasonStr = reasonStr + item + itemSplit; } foreach (String item in otherList) { resultStr = resultStr + item + itemSplit; } double countReason = frequentCollection[reasonStr]; double itemConfidence = countAll / countReason;//计算置信度 if (itemConfidence >= confidence) { String rule = reasonStr + CON + resultStr; //relationRules.Remove(rule); relationRules.Add(rule, itemConfidence); } } } } } return relationRules; } private void buildSubSet(List<String> sourceSet, List<List<String>> result) //建立频繁项集的子集 { // 仅有一个元素时,递归终止。此时非空子集仅为其自身,所以直接添加到result中 if (sourceSet.Count == 1) { List<String> set = new List<String>(); set.Add(sourceSet[0]); result.Add(set); } else if (sourceSet.Count > 1) { // 当有n个元素时,递归求出前n-1个子集,在于result中 buildSubSet(sourceSet.Take(sourceSet.Count - 1).ToList(), result); int size = result.Count;// 求出此时result的长度,用于后面的追加第n个元素时计数 // 把第n个元素加入到集合中 List<String> single = new List<String>(); single.Add(sourceSet[sourceSet.Count - 1]); result.Add(single); // 在保留前面的n-1子集的情况下,把第n个元素分别加到前n个子集中,并把新的集加入到result中; // 为保留原有n-1的子集,所以需要先对其进行复制 List<String> clone; for (int i = 0; i < size; i++) { clone = new List<String>(); foreach (String str in result[i]) { clone.Add(str); } clone.Add(sourceSet[sourceSet.Count - 1]); result.Add(clone); } } } }
Default.aspx.cs
AprioriMethod apriori = new AprioriMethod(); Dictionary<String, int> frequentCollection = apriori.getFC(); Response.Write("----------------频繁集" + "----------------"); Response.Write("<br/>"); foreach (var item in frequentCollection) { Response.Write(item.Key + "-----" + item.Value); Response.Write("<br/>"); } Dictionary<String, Double> relationRules = apriori.getRelationRules(frequentCollection); Response.Write("----------------关联规则" + "----------------"); Response.Write("<br/>"); foreach (var item in relationRules) { Response.Write(item.Key + "-----" + item.Value); Response.Write("<br/>"); }
结果:
----------------频繁集----------------
移动硬盘;-----6
电脑;-----7
手机;-----7
优盘;-----4
电脑;移动硬盘;-----4
电脑;手机;-----5
电脑;优盘;-----3
手机;移动硬盘;-----6
电脑;手机;移动硬盘;-----4
----------------关联规则----------------
电脑;->手机;-----0.714285714285714
手机;->电脑;-----0.714285714285714
优盘;->电脑;-----0.75
手机;->移动硬盘;-----0.857142857142857
移动硬盘;->手机;-----1
电脑;手机;->移动硬盘;-----0.8
电脑;移动硬盘;->手机;-----1