关联规则-R语言实现

本文旨在演示r语言arules包的关联规则用法,以及利用arulesViz对结果进行可视化

关联规则是形如X→Y的蕴涵式,其中, X和Y分别称为关联规则的先导(antecedent或left-hand-side, LHS)和后继(consequent或right-hand-side, RHS) 。其中,关联规则XY,存在支持度和信任度。 For more details see关联规则.

r语言arules包提供了有效处理稀疏二元数据的数据结构,而且提供函数执Apriori和Eclat算法挖掘频繁项集、最大频繁项集、闭频繁项集和关联规则详见

蘑菇数据data下载

r语言代码

library(arules)
## Loading required package: Matrix
## 
## Attaching package: 'arules'
## 
## The following objects are masked from 'package:base':
## 
##     %in%, abbreviate, write
data=read.csv(file.choose(),head=F)
trans <- as(data,"transactions") #数据格式转换

#inspect(trans)  #数据查看
image(trans [1:50])  

itemFrequencyPlot(trans, support=0.5)  

itemFrequencyPlot(trans, topN=10, horiz=T)  

basketSize<-size(trans)
summary(basketSize)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      23      23      23      23      23      23
itemFreq <- itemFrequency(trans)  
itemCount <- (itemFreq/sum(itemFreq))*sum(basketSize)  
summary(itemCount)  
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       4     156     600    1570    2346    8124
orderedItem <- sort(itemCount, decreasing = T)  
orderedItem[1:10] 
## V17=p V18=w  V7=f V19=o  V8=c  V9=b V13=s V14=s  V5=f V11=t 
##  8124  7924  7914  7488  6812  5612  5176  4936  4748  4608
#求关联规则
rules <- apriori(trans,parameter=list(support=0.3,confidence=1))
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport support minlen maxlen
##           1    0.1    1 none FALSE            TRUE     0.3      1     10
##  target   ext
##   rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## apriori - find association rules with the apriori algorithm
## version 4.21 (2004.05.09)        (c) 1996-2004   Christian Borgelt
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[119 item(s), 8124 transaction(s)] done [0.00s].
## sorting and recoding items ... [28 item(s)] done [0.00s].
## creating transaction tree ... done [0.02s].
## checking subsets of size 1 2 3 4 5 6 7 8 9 done [0.00s].
## writing ... [4316 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
summary(rules)
## set of 4316 rules
## 
## rule length distribution (lhs + rhs):sizes
##    1    2    3    4    5    6    7    8    9 
##    1   42  293  832 1244 1107  594  179   24 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00    4.00    5.00    5.32    6.00    9.00 
## 
## summary of quality measures:
##     support         confidence      lift      
##  Min.   :0.3003   Min.   :1    Min.   :1.000  
##  1st Qu.:0.3112   1st Qu.:1    1st Qu.:1.000  
##  Median :0.3299   Median :1    Median :1.025  
##  Mean   :0.3540   Mean   :1    Mean   :1.141  
##  3rd Qu.:0.3712   3rd Qu.:1    3rd Qu.:1.027  
##  Max.   :1.0000   Max.   :1    Max.   :2.927  
## 
## mining info:
##   data ntransactions support confidence
##  trans          8124     0.3          1
inspect(rules[1:10])
##    lhs        rhs     support   confidence lift    
## 1  {}      => {V17=p} 1.0000000 1          1.000000
## 2  {V12=?} => {V17=p} 0.3052683 1          1.000000
## 3  {V9=n}  => {V19=o} 0.3092073 1          1.084936
## 4  {V9=n}  => {V7=f}  0.3092073 1          1.026535
## 5  {V9=n}  => {V17=p} 0.3092073 1          1.000000
## 6  {V3=s}  => {V17=p} 0.3146233 1          1.000000
## 7  {V20=e} => {V7=f}  0.3417036 1          1.026535
## 8  {V20=e} => {V17=p} 0.3417036 1          1.000000
## 9  {V23=d} => {V18=w} 0.3874938 1          1.025240
## 10 {V23=d} => {V17=p} 0.3874938 1          1.000000
edible <- subset(rules, rhs %in% c("V1=e"))  
inspect(edible[1:10]) 
##      lhs                        rhs    support   confidence lift    
## 126  {V6=n,V11=t}            => {V1=e} 0.3072378 1          1.930608
## 578  {V6=n,V9=b,V11=t}       => {V1=e} 0.3072378 1          1.930608
## 581  {V6=n,V11=t,V19=o}      => {V1=e} 0.3072378 1          1.930608
## 583  {V6=n,V7=f,V11=t}       => {V1=e} 0.3072378 1          1.930608
## 585  {V6=n,V11=t,V18=w}      => {V1=e} 0.3072378 1          1.930608
## 587  {V6=n,V11=t,V17=p}      => {V1=e} 0.3072378 1          1.930608
## 590  {V6=n,V9=b,V19=o}       => {V1=e} 0.3308715 1          1.930608
## 1595 {V6=n,V9=b,V11=t,V19=o} => {V1=e} 0.3072378 1          1.930608
## 1599 {V6=n,V7=f,V9=b,V11=t}  => {V1=e} 0.3072378 1          1.930608
## 1603 {V6=n,V9=b,V11=t,V18=w} => {V1=e} 0.3072378 1          1.930608
#规则保存
write(rules, file="rules.csv", sep=",", quote=TRUE, row.names=FALSE)  
rules_df <- as(rules, "data.frame")  

利用arulesViz对结果进行可视化

#可视化
library(grid)
library(RColorBrewer)
library(arulesViz)
## 
## Attaching package: 'arulesViz'
## 
## The following object is masked from 'package:arules':
## 
##     abbreviate
## 
## The following object is masked from 'package:base':
## 
##     abbreviate
mushroom.rules <- apriori(trans,parameter = list(support = 0.8, confidence = 1))  
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport support minlen maxlen
##           1    0.1    1 none FALSE            TRUE     0.8      1     10
##  target   ext
##   rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## apriori - find association rules with the apriori algorithm
## version 4.21 (2004.05.09)        (c) 1996-2004   Christian Borgelt
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[119 item(s), 8124 transaction(s)] done [0.02s].
## sorting and recoding items ... [5 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [16 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
plot(mushroom.rules,   
control=list(jitter=2, col = rev(brewer.pal(9, "Greens")[4:9])),  
shading = "lift")    

plot(mushroom.rules, method="grouped",     
control=list(k=100,col = rev(brewer.pal(9, "Greens")[4:9])))  

plot(edible[1:20], measure="confidence", method="graph",   
control=list(type="items"), shading = "lift")

plot(edible, method="paracoord", control=list(reorder=TRUE))

蘑菇数据的决策树分类介绍详见

posted @ 2016-10-27 21:01  马家寨香椿  阅读(1170)  评论(0编辑  收藏  举报