探索性数据分析 面膜数据描述统计#R

mydata<-read.csv("C:\\Users\\yangfan\\Desktop\\statistics\\大数据班\\面膜数据.csv",header=T)#读取数据#
new=mydata[,8:12]#提取所用列,全部行#
a=mydata$补水保湿#提取单一变量#
table(a)#统计频数#
counts=apply(new,2,sum)#new是数据,2是列操作第二维,sum代表函数,可以自己写函数比如: myFun=function(x){sum(x^2)} \napply(new,2,sum)
barplot(counts)
class()#查看变量类型#

 面膜数据分析的案例

face=read.csv("C:\\Users\\yangfan\\Desktop\\statistics\\大数据班\\面膜数据2.csv",header=T)
get=face[,8:12]
num=apply(get,2,sum)
barplot(num,xlab="功能",ylab="款式数量",ylim = c(0,800),col="lightblue")

volume=face[,3]*face[,8:12]
sumvolume=apply(volume,2,sum)
barplot(sumvolume,xlab="功能",ylab="总销量",col="lightpink")
avgvolume=sumvolume/num
barplot(avgvolume,xlab="功能",ylab="平均销量",col="lightblue")

price=face[,2]*face[,8:12]
sumprice=apply(price,2,sum)
avgprice=sumprice/num
barplot(avgprice,xlab="功能",ylab="平均价格")

place=face$产地
summary(face$产地) japanavg
=mean(face[which(place=='日本'),]$价格) chinaavg=mean(face[which(place=='中国'),]$价格) koreaavg=mean(face[which(place=='韩国'),]$价格) usaavg=mean(face[which(place=='美国'),]$价格) etcavg=mean(face[which((place=='其他/other')|(place=='英国')),]$价格) cprice=c(chinaavg,japanavg,koreaavg,usaavg,etcavg) names(cprice)=c("中国","日本","韩国","美国","其它") barplot(cprice,xlab="国家",ylab="平均价格") summary(face$月销量) japanvolume=mean(face[which(place=='日本'),]$月销量) chinavolume=mean(face[which(place=='中国'),]$月销量) koreavolume=mean(face[which(place=='韩国'),]$月销量) usavolume=mean(face[which(place=='美国'),]$月销量) etcvolume=mean(face[which((place=='其他/other')|(place=='英国')),]$月销量) cvolume=c(chinavolume,japanvolume,koreavolume,usavolume,etcvolume) names(cvolume)=c("中国","日本","韩国","美国","其它") barplot(cvolume,xlab="国家",ylab="平均销量")

 

posted @ 2017-09-16 22:23  凡柒  阅读(596)  评论(1编辑  收藏  举报