R语言学习笔记(二)
第一章 R语言介绍 读取,设置当前工作区 setwd("E:\\Desktop\\R Language\\R") getwd() 特殊显示格式 > options(digits=3) #显示小数点后三位 > x<-runif(20) ? > x [1] 0.329 0.499 0.360 0.922 0.733 0.969 0.840 0.484 0.386 0.964 0.150 0.421 [13] 0.130 0.809 0.483 0.427 0.880 0.221 0.632 0.866 文件操作 ls() #列出当前目录 rm() #删除目录 dir.create("folder") #创建目录 安装包 install.packages("vcd") > help(package="vcd") > library(vcd) #加载包 保存工作区 save.image("E:\\Desktop\\R Language\\R\\chapter01\\.RData") 第二章 数据结构和数据录入 向量 - 元素的类型必须一致 > a <- c("k","j","h","a","c","m") > a[3] [1] "h" > a[2:6] [1] "j" "h" "a" "c" "m" > a[c(1,2)] [1] "k" "j" 矩阵 - 元素的类型必须一致,只有二维 > y <- matrix(1:20,nrow=5,ncol=4) > y [,1] [,2] [,3] [,4] [1,] 1 6 11 16 [2,] 2 7 12 17 [3,] 3 8 13 18 [4,] 4 9 14 19 [5,] 5 10 15 20 > cells <- c(1,26,24,68) > rnames <- c("R1","R2") > cnames <- c("C1","C2") > mymatrix <- matrix(cells, nrow=2,ncol=2,byrow=TRUE,dimnames=list(rnames,cnames)) #为行列加别名 > mymatrix C1 C2 R1 1 26 R2 24 68 > mymatrix <- matrix(cells, nrow=2,ncol=2,byrow=FALSE,dimnames=list(rnames,cnames)) > mymatrix C1 C2 R1 1 24 R2 26 68 > x <- matrix(1:10,nrow=2) > x [,1] [,2] [,3] [,4] [,5] [1,] 1 3 5 7 9 [2,] 2 4 6 8 10 > > x(2,) 数组 - 元素类型必须一致,有多维 > #Array > dim1 <- c("A1","A2") > dim2 <- c("B1","B2","B3") > dim3 <- c("C1","C2","C3","C4") > z <- array(1:24, c(2,3,4),dimnames=list(dim1,dim2,dim3)) > z , , C1 B1 B2 B3 A1 1 3 5 A2 2 4 6 , , C2 B1 B2 B3 A1 7 9 11 A2 8 10 12 , , C3 B1 B2 B3 A1 13 15 17 A2 14 16 18 , , C4 B1 B2 B3 A1 19 21 23 A2 20 22 24 数据框 - 元素类型可以不同,和表格类似 patientID <- c(1,2,3,4) > age <- c(25,34,28,52) > diabetes <- c("Type1","Type2","Type1","Type1") > status <- c("Poor","Improved","Excellent","Poor") > patientdata <- data.frame(patientID,age,diabetes,status) > patientdata patientID age diabetes status 1 1 25 Type1 Poor 2 2 34 Type2 Improved 3 3 28 Type1 Excellent 4 4 52 Type1 Poor > > patientdata[1:2] patientID age 1 1 25 2 2 34 3 3 28 4 4 52 > patientdata[c("diabetes","status")] diabetes status 1 Type1 Poor 2 Type2 Improved 3 Type1 Excellent 4 Type1 Poor > patientdata$age [1] 25 34 28 52 > > > table(patientdata$diabetes,patientdata$status) #转为表,和Excel Pivot Table类似 Excellent Improved Poor Type1 1 0 2 Type2 0 1 0 > Attache,With,Detach - 变量作用域,推荐使用with > attach(mtcars) > summary(mpg) Min. 1st Qu. Median Mean 3rd Qu. Max. 10.4 15.4 19.2 20.1 22.8 33.9 > plot(mpg,disp) > plot(mpg,wt) > detach(mtcars) > > > > #with > with(mtcars,{ + nokeepstats <- summary(mpg) + keepstats <<- summary(mpg) # 把with里面声明的变量保存在with的作用域之外 + }) > keepstats Min. 1st Qu. Median Mean 3rd Qu. Max. 10.4 15.4 19.2 20.1 22.8 33.9 > nokeepstats 閿欒: 鎵句笉鍒板璞?nokeepstats' 标识列 > #Identifier > patientdata<-data.frame(patientID,age,diabetes,status,row.names=patientID) > patientdata patientID age diabetes status 1 1 25 Type1 Poor 2 2 34 Type2 Improved 3 3 28 Type1 Excellent 4 4 52 Type1 Poor 因子 Factor - 字符类型,但是有统计意义 #factor > patientID <- c(1,2,3,4) > age <- c(25,34,28,52) > diabetes <- c("Type1","Type2","Type1","Type1") > status <-c("Poor","Improved","Excellent","Poor") > diabetes <- factor(diabetes) > satus <-factor(stats,order=TRUE) Error in factor(stats, order = TRUE) : 鎵句笉鍒板璞?stats' > status <-factor(stats,order=TRUE) Error in factor(stats, order = TRUE) : 鎵句笉鍒板璞?stats' > status <-factor(status,order=TRUE) > patientdata<-data.frame(patientID,age,diabetes,status) > str(patientdata) 'data.frame': 4 obs. of 4 variables: $ patientID: num 1 2 3 4 $ age : num 25 34 28 52 $ diabetes : Factor w/ 2 levels "Type1","Type2": 1 2 1 1 $ status : Ord.factor w/ 3 levels "Excellent"<"Improved"<..: 3 2 1 3 > summary(patientdata + ) patientID age diabetes status Min. :1.00 Min. :25.0 Type1:3 Excellent:1 1st Qu.:1.75 1st Qu.:27.2 Type2:1 Improved :1 Median :2.50 Median :31.0 Poor :2 Mean :2.50 Mean :34.8 3rd Qu.:3.25 3rd Qu.:38.5 Max. :4.00 Max. :52.0 > patientdata patientID age diabetes status 1 1 25 Type1 Poor 2 2 34 Type2 Improved 3 3 28 Type1 Excellent 4 4 52 Type1 Poor 列表 - 元素可以为不同类型,多维 > g<-"My First List" > h<-c(25,26,18,39) > j<-matrix(1:10,nrow=5) > k<-c("one","two","three") > mylist<-list(title=g,ages=h,j,k) > mylist $title [1] "My First List" $ages [1] 25 26 18 39 [[3]] [,1] [,2] [1,] 1 6 [2,] 2 7 [3,] 3 8 [4,] 4 9 [5,] 5 10 [[4]] [1] "one" "two" "three" > mylist[1] $title [1] "My First List" > mylist[2] $ages [1] 25 26 18 39 > mylist["ages"] $ages [1] 25 26 18 39 > mylist[c(1,2)] $title [1] "My First List" $ages [1] 25 26 18 39 > mylist[c(1,2,3)] $title [1] "My First List" $ages [1] 25 26 18 39 [[3]] [,1] [,2] [1,] 1 6 [2,] 2 7 [3,] 3 8 [4,] 4 9 [5,] 5 10 > mylist[["ages"]] [1] 25 26 18 39 数据输入 > #data input > mydata <- data.frame(age=numeric(0),gender=character(0),weight=numeric(0)) > mydata <- edit(mydata) #手工输入 mydata <- read.table("studentgrades.csv",header=TRUE,row.names="StudentID",spe=",") #文本文件 导入excel文件 library(xlsx) workbook <- "test.xlsx" mydataframe <- read.xlsx(workbook, 1) 第三章 图形初阶 散点图和辅助线 pdf("chapter03\\mygraph.pdf") #结果输出到pdf > attach(mtcars) > plot(wt,mpg) > abline(li(mpg,wt)) #辅助线 > title("Regression of MPG on Weight") #标题 > detach(mtcars) > dev.off() #立即输出 画布切换 dev.new() 新建 dev.set() 设置焦点 dev.next() dev.prev() 画图参数> > #画图参数,改变绘图样式 > opar <- par(no.readonly=TRUE) > par(lty=2,pch=17) > plot(dose,drugA,type="b") > par(opar) > plot(dose,drugA,type="b",lty=2,pch=17) > plot(dose,drugA,type="b",lty=2,pch=18) > plot(dose,drugA,type="b",lty=2,pch=17) > > #pch > #pcex > #cex > #lty > #lwd > plot(dose,drugA,type="b",lty=3,lwd=3,pch=15,cex=2) 颜色 install.packages(RColorBrewer) library(RColorBrewer) > n <-7 > mycolors <- brewer.pal(n,"Set1") > barplot(rep(1,n),col=mycolors) > > > n<-10 > mycolors<-rainbow(n) > pie(rep(1,n),labels=mycolors,col=mycolors) > mygrays<-gray(0:n/n) > pie(rep(1,n),labels=mygrays,col=mycolors) 字体 > par(font.lab=3,cex.lab=1.5, font.main=4, cex.main=2) > windowsFonts(A=windowsFont("Arial Black"),B=windowsFont("Bookman Old Style"),C=windowsFont("comic Sans MS")) > par(family="A") > par(family="A") > plot(mpg,wt) 图片尺寸 > opar<- par(no.readonly=TRUE) > par(pin=c(2,3)) > par(cex.axis=.75, font.axis=3) > plot(dose,drugA,type="b",pch=19,lty=2,col="red") > plot(dose,drugA,type="b",pch=23,lty=6,col="blue",bg="green") 添加文本和自定义坐标轴 > plot(dose,drugA,type="b",col="red",lty=2,pch=2,lwd=2,main="Clinical Trials for Drug A",sub="This is hypothetical data",xlab="Dosage",ylab="Drug Response",xlim=c(0,60),ylim=c(0,70)) > > x<-c(1:10) > y<-x > z<-10/x > opar<-par(no.readonly=TRUE) > par(mar=c(5,4,4,8)+0.1) > plot(x,y,type="b",pch=21,col="red",yaxt="n",lty=3,ann=FALSE) > lines(x,z,type="b",pch=22,col="blue",lty=2) > axis(2,at=x,labels=x,col.axis="red",las=2) > axis(4,at=x,labels=round(z,digits=2),col.axis="blue",las=2,cex.axis=0.7,tck=-.01) > axis(4,at=z,labels=round(z,digits=2),col.axis="blue",las=2,cex.axis=0.7,tck=-.01) > mtext("y=1/x",side=4,line=3,cex.lab=1,las=2,col="blue") > mtext("y=1/x",side=3,line=3,cex.lab=1,las=2,col="blue") > mtext("y=1/x",side=3,line=2,cex.lab=1,las=2,col="blue") > mtext("y=1/x",side=3,line=0,cex.lab=1,las=2,col="blue") > title("An example of creative Axes",xlab="X values",ylab="Y=X") > par(opar) > > > dev.new() > abline(h=c(1,5,7)) Error in int_abline(a = a, b = b, h = h, v = v, untf = untf, ...) : plot.new has not been called yet > dev.set() windows 3 > abline(h=c(1,5,7)) > abline(v=seq(1,10,2), lty=2, col="blue") 标识符,图表legend > opar<-par(no.readonly=TRUE) > par(lwd=2,cex=1.5,font.lab=2) plot(dose,drugA,type="b",pch=15,lty=1,col="red",ylim=c(0,60),main="Drug A vs. Drug B",xlab="Drug Dosage",ylab="Drug Response") abline(h=c(30),lwd=1.5,lty=2,col="gray") > line(dose,drugB,type="b",pch=17,lty=2,col="blue") install.packages("Hmisc") > library(Hmisc) minor.tick(nx=3,ny=3,tick.ratio=0.5) > legend("topleft",inset=.05,title="Drug Type", c("A","B"),lty=c(1,2),pch=c(15,17),col=c("red","blue")) > par(opar) #文本标注 > plot(wt,mpg,main="xXX",xlab="xlab",ylab="ylab",pch=18,col="blue") > text(wt,mpg,row.names(mtcars),cex=0.6,pos=4,col="red") 图形组合 多幅图表在一个画布中显示 > opar<-par(no.readonly=TRUE) > par(mfrow=c(2,2)) > plot(wt,mpg,main="wt vs. mpg") > plot(wt,disp,main="wt vs. disp") > hist(wt,main="hist.") > boxplot(wt,main="boxplot of wt") > > > layout(matrix(c(1,1,2,3),2,2,byrow=TRUE),widths=c(3,1),heights=c(1,2)) > hist(wt) > hist(mpg) > hist(disp) > > > > opar<-par(no.readonly=TRUE) > par(fig=c(0,0.8,0,0.8) + ) > plot(mtcars$wt, mtcars$mpg, xlab="xlab", ylab="ylab") > par(fig=c(0,0.8,0.55,1),new=TRUE) > boxplot(mtcars$wt,horizontal=TRUE,axes=FALSE) > par(fig=c(0.65,1,0,0.8),new=TRUE) > boxplot(mtcars$mpg,axes=FALSE) > mtext("Enhanced Scatterplot",side=3,outer=TRUE,line=-3) 下图是本章最佳图片:D, 虽然R画出来的图形不是很美观,但是它提供了很多灵活的绘图参数,自由组合这些参数应该能画出非常给力的分析报表。