R语言进阶数据展现-6
调整宽度 air<-read.csv("airpollution.csv") boxplot(air,las=1) boxplot(air,boxwex=0.2,las=1)#boxwex调整宽度,其值与我们要画的变量个数有关应该小于1。 boxplot(air,width=c(1,2))#第二幅图的宽度是第一幅的2倍。 分类画图 metals<-read.csv("metals.csv") boxplot(Cu~Source,data=metals,#y~group 按照Source分类,画图。 main="Summary of Copper (Cu) concentrations by Site") boxplot(Cu~Source*Expt,data=metals,#按照Source*Expt分类画图 main="Summary of Copper (Cu) concentrations by Site") 根据各个变量的样本数不同,调整宽度 boxplot(Cu ~ Source, data = metals,varwidth=TRUE,#varwidth,大致与样本数的平方根成正比 main="Summary of Copper concentrations by Site") 添加凹槽 boxplot(Cu ~ Source, data = metals, varwidth=TRUE,notch=TRUE, main="Summary of Copper concentrations by Site")#notch.frac 可以控制凹槽处相对于箱型的宽度。取值为0-1 是否包括异常值(outliers) boxplot(metals[,-1],outline=FALSE, main="Summary of metal concentrations by Site \n (without outliers)") 水平箱线图 boxplot(metals[,-1], horizontal=TRUE,las=1,#注意,这里是horizontal,不是horiz main="Summary of metal concentrations by Site") 颜色,边框等 boxplot(metals[,-1], border = "white",col = "black",boxwex = 0.3,#border同时也控制中线的颜色,除非用medcol单独更改。 medlwd=1, whiskcol="black",staplecol="black",#medlwd中线宽度,whiskcol虚线颜色,staplecol虚线相接的两横线的颜色。 outcol="red",cex=0.3,outpch=19,#outlier的颜色,大小,样式 main="Summary of metal concentrations by Site") grid(nx=NA,ny=NULL,col="gray",lty="dashed")
改变whisker的长度(outliers的定义) boxplot(metals[,-1], range=1,border = "white",col = "black",#range, 默认是1.5,取正值。越大,则outlier越少。 boxwex = 0.3,medlwd=1,whiskcol="black", staplecol="black",outcol="red",cex=0.3,outpch=19, main="Summary of metal concentrations by Site \n (range=1) ") boxplot(metals[,-1], range=0,border = "white",col = "black",#range取0时,则恰好使得outlier没有。 boxwex = 0.3,medlwd=1,whiskcol="black", staplecol="black",outcol="red",cex=0.3,outpch=19, main="Summary of metal concentrations by Site \n (range=0)") 显示样本数目 b<-boxplot(metals[,-1], xaxt="n",border = "white",col = "black",#先不建x轴 boxwex = 0.3,medlwd=1,whiskcol="black", staplecol="black",outcol="red",cex=0.3,outpch=19, main="Summary of metal concentrations by Site") axis(side=1,at=1:length(b$names),labels=paste(b$names,"\n(n=",b$n,")",sep=""),mgp=c(3,2,0))#加坐标轴,设置边距。 install.packages("gplots") library(gplots)#直接使用包,但有可能数字会和坐标轴重叠。 boxplot.n(metals[,-1], border = "white",col = "black",boxwex = 0.3, medlwd=1,whiskcol="black",staplecol="black", outcol="red",cex=0.3,outpch=19, main="Summary of metal concentrations by Site")#直接使用包,但有可能数字会和坐标轴重叠。可以把Top=True,把标签放在顶部。 把一个变量进行划分 cuts<-c(0,40,80)#划分区间 Y<-split(x=metals$Cu, f=findInterval(metals$Cu, cuts))#split,把x根据f分成各个group。findInterval(metals$Cu, cuts)则是根绝cuts的分类,给每个metals$Cu中的数字一个相应的标签,以方便group。 boxplot(Y,xaxt="n", border = "white",col = "black",boxwex = 0.3, medlwd=1,whiskcol="black",staplecol="black", outcol="red",cex=0.3,outpch=19, main="Summary of Copper concentrations", xlab="Concentration ranges",las=1) axis(1,at=1:length(clabels), labels=c("Below 0","0 to 40","40 to 80","Above 80"), lwd=0,lwd.ticks=1,col="gray") 函数化 boxplot.cuts<-function(y,cuts,...) { Y<-split(metals$Cu, f=findInterval(y, cuts)) b<-boxplot(Y,xaxt="n", border = "white",col = "black",boxwex = 0.3, medlwd=1,whiskcol="black",staplecol="black", outcol="red",cex=0.3,outpch=19, main="Summary of Copper concentrations", xlab="Concentration ranges",las=1,...)#... is used to symbolize extra arguments to be added if required. clabels<-paste("Below",cuts[1]) for(k in 1:(length(cuts)-1)) { clabels<-c(clabels, paste(as.character(cuts[k]), "to", as.character(cuts[k+1]))) } clabels<-c(clabels, paste("Above",as.character(cuts[length(cuts)]))) axis(1,at=1:length(clabels), labels=clabels,lwd=0,lwd.ticks=1,col="gray") } boxplot.cuts<-function(y,cuts) { f=cut(y, c(min(y[!is.na(y)]),cuts,max(y[!is.na(y)])), ordered_results=TRUE); Y<-split(y, f=f) b<-boxplot(Y,xaxt="n", border = "white",col = "black",boxwex = 0.3, medlwd=1,whiskcol="black",staplecol="black", outcol="red",cex=0.3,outpch=19, main="Summary of Copper concentrations", xlab="Concentration ranges",las=1) clabels = as.character(levels(f)) axis(1,at=1:length(clabels), labels=clabels,lwd=0,lwd.ticks=1,col="gray") }