利用生鲜数据画图
library("ggplot2") library("RODBC") #加载RODBC包,读取数据库数据 library("sqldf") #sql语句处理数据 library("scales") library("reshape2") library(gcookbook) # For the data set library("tcltk") #### 生成数据框 #### library(RODBC) #library(plyr) channel1 <- odbcConnectExcel2007("C:/Users/Jennifer/Desktop/生鲜/生鲜二级商家.xlsx") jbp01<-sqlFetch(channel1 ,"Sheet4") head(jbp01) #,as.is=F) odbcClose(channel1) f = file("C:/Users/Jennifer/Desktop/Book1.xlsx") readLines(f,10) #head(jbp01) close(myconnect) #关闭数据库连接 ####数据处理#### #jbp01$month <- as.character(jbp01$month) #月份处理成文本 names(jbp01) <- tolower(names(jbp01)) #字段名调整为小写,易于处理 jbp01$date_id <- as.Date(jbp01$date_id) #日期字段 调整为日期型 head(jbp01) ####线图#### #用sql语句处理 jbp02 <- sqldf("select * from jbp01",row.names=T) p <- ggplot(jbp02,aes(x=categ_lvl2_name,y=sum(mrchnt_num),colour=factor(prov_name))) p+ geom_line(size=0.8)+ xlab("日期")+ ylab("销售金额 单位:K")+ labs(title="生鲜商城数据")+#设置图的标题 scale_y_continuous() #coord_flip() #横纵坐标翻转 #多个线图 jbp02 <- sqldf("select prov_name,categ_lvl2_name,sum(mrchnt_num) mrchnt_num from jbp01 group by prov_name,categ_lvl2_name",row.names=T) p <- ggplot(jbp02,aes(x=categ_lvl2_name,y=mrchnt_num),fill= categ_lvl2_name) p+ geom_bar( stat = "identity")+ facet_wrap(~prov_name,scale="free_y")+scale_x_continuous(breaks=1:11)+ xlab("二级类目")+ ylab("商家数")+ labs(title="不同省份二级类目商家数") #设置图的标题 ####饼图#### jbp02 <- sqldf("select categ_lvl2_name,sum(mrchnt_num) mrchnt_num from jbp01 where prov_name ='上海' group by categ_lvl2_name",row.names=T) jbp02 <- transform(jbp02, percent=mrchnt_num/sum(mrchnt_num)) p <- ggplot(jbp02,aes(x="", y=percent, fill=categ_lvl2_name)) p+ geom_bar(width = 1, stat = "identity")+ coord_polar("y") ####中国地图#### #library(ggmap) #library(mapproj) #map <- get_map(location='china',zoom=4) #ggmap(map) #此路不通,GOOGLE地图链接不上 library(maps) library(mapdata) map("china") library(maptools) setwd("C:/Users/Jennifer/Documents/R/win-library/3.0/china-province-border-data") #这一步很重要 x=readShapePoly('bou2_4p.shp') #运行之前要设置工作空间 bou2_4p.dbf,bou2_4p.shp,bou2_4p.shx plot(x) plot(x,col=gray(924:0/924)); #设置分割线 getColor=function(mapdata,provname,provcol,othercol) { f=function(x,y) ifelse(x %in% y,which(y==x),0) colIndex=sapply(mapdata@data$NAME,f,provname) col=c(othercol,provcol)[colIndex+1] return(col) } # provname=c("北京市","天津市","上海市","重庆市") #ctr+shift+C # provcol=c("red","green","yellow","purple") # plot(x,col=getColor(x,provname,provcol,"white")) jbp03 <- sqldf("select case when mrchnt_name='武汉' then '湖北省' when mrchnt_name='济南' then '山东省' when mrchnt_name='广州' then '广东省' when mrchnt_name='福建' then '福建省' when mrchnt_name='成都' then '四川省' when mrchnt_name='北京' then '北京市' when mrchnt_name='上海' then '上海市' end prov_name,sum(sale_amt) sale_amt from jbp01 where manufacture like '%宝洁%'group by case when mrchnt_name='武汉' then '湖北省' when mrchnt_name='济南' then '山东省' when mrchnt_name='广州' then '广东省' when mrchnt_name='福建' then '福建省' when mrchnt_name='成都' then '四川省' when mrchnt_name='北京' then '北京市' when mrchnt_name='上海' then '上海市' end",row.names=T) prov_name <-c(jbp03$prov_name) sale_amt <- c(jbp03$sale_amt) prov_col=rgb(red=1-sale_amt/max(sale_amt)/2,green=1-sale_amt/max(sale_amt)/2,blue=0) plot(x,col=getColor(x,prov_name,prov_col,"white"),xlab="",ylab="") ####条形图#### jbp02 <- sqldf("select * from jbp01",row.names=T) p <- ggplot(jbp02,aes(x=factor(categ_lvl2_name),y=mrchnt_num)) p+ geom_bar(stat="identity",colour='red',fill='blue') #堆积图 myconnect <- odbcConnect("jbp2014",uid="chengyingbao",pwd="Mis,111",case="tolower") #建立数据库连接 jbp01 <- sqlQuery(myconnect,"select DATEPART(month, date_id) month,t1.* from temp1 t1") #head(jbp01) close(myconnect) #关闭数据库连接 names(jbp01) <- tolower(names(jbp01)) #字段名调整为小写,易于处理 jbp01$date_id <- as.Date(jbp01$date_id) #日期字段 调整为日期型 jbp02 <- sqldf("select categ_lvl2_name,prov_name,count(mrchnt_name) mrchnt_num from jbp01 group by prov_name ",row.names=T) p <- ggplot(jbp02,aes(x=factor(categ_lvl2_name),y=mrchnt_num)) p+ geom_bar(stat="identity",colour='red',fill='blue')+ facet_wrap(~prov_name) #geom_bar(position='dodge',stat="identity",colour='red',fill='blue') #dodge方式是将不同年份的数据并列放置;stack方式是将不同年份数据堆叠放置 ####气泡图#### jbp02 <- sqldf("select prov_name,categ_lvl2_name,count(mrchnt_name) mrchnt from jbp01 group by categ_lvl2_name ",row.names=T) p <- ggplot(jbp02,aes(x=categ_lvl2_name,y=mrchnt,colour=factor(prov_name))) p+ geom_point(aes(size = mrchnt))+ scale_size_continuous(range=c(3,10))+ scale_y_continuous(labels=comma)+ xlab("一级类目")+ ylab("销售金额")+ labs(title="气泡图") #设置图的标题 ####广告PPT地图#### library(maps) library(mapdata) library(maptools) myconnct <- odbcConnect("jbp2014",uid="chengyingbao",pwd="Mis,111") jbp01 <- sqlQuery(myconnct,"select * from prov_vstrs") close(myconnct) jbp01 <- sqldf("select prov_name,categ_lvl2_name,count(mrchnt_name) mrchnt from jbp01 group by categ_lvl2_name ",row.names=T) names(jbp01) <- tolower(names(jbp01)) jbp01$prov_name <-as.character(jbp01$prov_name) setwd("C:/Users/Jennifer/Documents/R/win-library/3.0/china-province-border-data") #这一步很重要 x=readShapePoly('bou2_4p.shp') #运行之前要设置工作空间 bou2_4p.dbf,bou2_4p.shp,bou2_4p.shx getColor=function(mapdata,provname,provcol,othercol) { f=function(x,y) ifelse(x %in% y,which(y==x),0) colIndex=sapply(mapdata@data$NAME,f,provname) col=c(othercol,provcol)[colIndex+1] return(col) } prov_name <-c(jbp01$prov_name) vistrs <- c(jbp01$vistrs) prov_col=rgb(red=1-vistrs/max(vistrs)/2,green=1-vistrs/max(vistrs)/2,blue=0) plot(x,col=getColor(x,prov_name,prov_col,"white"),xlab="",ylab="") ####广告PPT图2#### myconnct <- odbcConnect("jbp2014",uid="chengyingbao",pwd="Mis,111") jbp01 <- sqlQuery(myconnct,"select * from ad_chart") close(myconnct) names(jbp01) <- tolower(names(jbp01)) jbp01$date_id<-as.Date(jbp01$date_id) jbp01 <- sqldf("select date_id,channel,sum(vistrs)-sum(invalid_vistrs) valid_vistrs from jbp01 where channel <> 'SEM' group by date_id,channel ",row.names=T) p <- ggplot(jbp01,aes(x=date_id,y=valid_vistrs,group=channel,colour=factor(channel))) p +geom_line(size=0.75)+ facet_wrap(~channel)+ scale_x_date(labels = date_format("%m/%d"), minor_breaks = date_breaks("1 week"))+ scale_y_continuous(labels=comma)+ xlab("日期")+ ylab("有效访客Vstrs")+ labs(title="渠道日有效访客数") #设置图的标题 ####广告PPT 玫瑰图#### myconnct <- odbcConnect("jbp2014",uid="chengyingbao",pwd="Mis,111") jbp01 <- sqlQuery(myconnct,"select * from ad_pie") close(myconnct) names(jbp01) <- tolower(names(jbp01)) names(jbp01) <- c("channel","无效访客","有效访客") mt <- melt(jbp01, id.vars=c("channel"), value.name="visits", variable.name="type") ggplot(mt)+geom_bar(aes(x=channel,y=visits,fill=type),stat="identity")+ coord_polar()+ ##玫瑰图 xlab("渠道")+ ylab("访客数")+ theme(text=element_text(size=12),axis.text.x=element_text(colour="black",size=12,face="bold"))+ scale_y_continuous(label=comma)+ guides(fill=guide_legend(title=NULL))+ labs(title="渠道访客差异") #ylim(0,500) #设立坐标轴的范围 #guides(fill=F)#剔除标签 ####堆积图 玫瑰图#### myconnct <- odbcConnect("jbp2014",uid="chengyingbao",pwd="Mis,111") jbp01 <- sqlQuery(myconnct,"select * from ad_pie") close(myconnct) names(jbp01) <- tolower(names(jbp01)) ggplot(jbp01)+geom_bar(aes(x=channel,y=vstrs,fill=id),stat="identity")+ coord_polar() ##玫瑰图 堆积图衍生 # stat= bin 默认 identity 独立变量 ggplot(jbp01)+geom_bar(aes(x=channel,y=vstrs,fill=id),stat="identity") #堆积柱状图 ggplot(jbp01)+geom_bar(aes(x=channel,y=vstrs,fill=id),stat="identity",position="fill") + #堆积柱状图 scale_y_continuous(labels=percent) #position: stack(数值) fill(百分比) identity() dodge(并排) jitter(增加扰动) ####融合#### mt <- melt(jbp01, id.vars=c("channel"), value.name="visits", variable.name="type") names(mt) str(mt) sem_visigt, seo_vist dcast(mt, channel + dd ~ type, sum, mean) plyr splite-c-com dplyr #####ppt cluser#### myconnct <- odbcConnect(dsn="jbp2014",uid="chengyingbao",pwd="Mis,111") chart01 <- sqlQuery(channel=myconnct,query="select page_categ_name,pro_vstrs_per,yhd_vstrs_per from ad_cluster") close(myconnct) names(chart01) <- tolower(names(chart01)) names(chart01) <- c("page_categ_name","可乐活动访客","全站访客") mt <- melt(data=chart01,id.vars="page_categ_name",value.name="percent",variable.name="type") mt <- sqldf("select page_categ_name,type,case when type='可乐活动访客' then percent*-1 else percent end percent from mt") p = ggplot(mt) p+ geom_bar(aes(x=page_categ_name,y=percent,fill=type),stat="identity",position="identity")+ coord_flip()+ theme(axis.text.x=element_blank()) #p = ggplot(mt,aes(x=interaction(page_categ_name, type),y=percent)) #p+ # geom_bar(aes(x=page_categ_name,y=percent,fill=type),stat="identity",position="identity")+ # coord_flip()+ # geom_text(aes(label=percent, vjust=-0.2)) library("ggplot2") library("RODBC") #加载RODBC包,读取数据库数据 library("sqldf") #sql语句处理数据 library("scales") library("reshape2") library(gcookbook) # For the data set library("tcltk") #### 生成数据框 #### #myconnect <- odbcConnect("jbp2014",uid="chengyingbao",pwd="Mis,111",case="tolower") #建立数据库连接 #jbp01 <- sqlQuery(myconnect,"select * from temp1") library(RODBC) #library(plyr) channel1 <- odbcConnectExcel2007("C:/Users/Jennifer/Desktop/Book1.xlsx") jbp01<-sqlFetch(channel1 ,"Sheet2") #,as.is=F) odbcClose(channel1) f = file("C:/Users/Jennifer/Desktop/Book1.xlsx") readLines(f,10) #head(jbp01) close(myconnect) #关闭数据库连接 ####数据处理#### #jbp01$month <- as.character(jbp01$month) #月份处理成文本 names(jbp01) <- tolower(names(jbp01)) #字段名调整为小写,易于处理 jbp01$date_id <- as.Date(jbp01$date_id) #日期字段 调整为日期型 head(jbp01) ####线图#### #用sql语句处理 jbp02 <- sqldf("select date_id,categ_lvl1_name,sum(ordr_sale) sale_amt from jbp01 group by date_id,categ_lvl1_name",row.names=T) p <- ggplot(jbp02,aes(x=date_id,y=sale_amt,colour=prov_name)) p+ geom_line(size=0.8)+ xlab("日期")+ ylab("销售金额 单位:K")+ labs(title="生鲜商城数据")+#设置图的标题 scale_y_continuous() #coord_flip() #横纵坐标翻转 #多个线图 jbp02 <- sqldf("select manufacture,date_id,categ_lvl1_name,sum(sale_amt) sale_amt from jbp01 group by manufacture,date_id,categ_lvl1_name",row.names=T) p <- ggplot(jbp02,aes(x=date_id,y=sale_amt/1000,colour=factor(categ_lvl1_name))) p+ geom_line(size=0.8)+ facet_wrap(~manufacture)+ xlab("日期")+ ylab("销售金额 单位:K")+ labs(title="JBP日销售") #设置图的标题 ####饼图#### jbp02 <- sqldf("select categ_lvl1_name,sum(sale_amt) sale_amt from jbp01 where manufacture like '%宝洁%' group by categ_lvl1_name",row.names=T) jbp02 <- transform(jbp02, percent=sale_amt/sum(sale_amt)) p <- ggplot(jbp02,aes(x="", y=percent, fill=categ_lvl1_name)) p+ geom_bar(width = 1, stat = "identity")+ coord_polar("y") ####中国地图#### #library(ggmap) #library(mapproj) #map <- get_map(location='china',zoom=4) #ggmap(map) #此路不通,GOOGLE地图链接不上 library(maps) library(mapdata) map("china") library(maptools) setwd("C:/Users/Jennifer/Documents/R/win-library/3.0/china-province-border-data") #这一步很重要 x=readShapePoly('bou2_4p.shp') #运行之前要设置工作空间 bou2_4p.dbf,bou2_4p.shp,bou2_4p.shx plot(x) plot(x,col=gray(924:0/924)); #设置分割线 getColor=function(mapdata,provname,provcol,othercol) { f=function(x,y) ifelse(x %in% y,which(y==x),0) colIndex=sapply(mapdata@data$NAME,f,provname) col=c(othercol,provcol)[colIndex+1] return(col) } # provname=c("北京市","天津市","上海市","重庆市") #ctr+shift+C # provcol=c("red","green","yellow","purple") # plot(x,col=getColor(x,provname,provcol,"white")) jbp03 <- sqldf("select case when mrchnt_name='武汉' then '湖北省' when mrchnt_name='济南' then '山东省' when mrchnt_name='广州' then '广东省' when mrchnt_name='福建' then '福建省' when mrchnt_name='成都' then '四川省' when mrchnt_name='北京' then '北京市' when mrchnt_name='上海' then '上海市' end prov_name,sum(sale_amt) sale_amt from jbp01 where manufacture like '%宝洁%'group by case when mrchnt_name='武汉' then '湖北省' when mrchnt_name='济南' then '山东省' when mrchnt_name='广州' then '广东省' when mrchnt_name='福建' then '福建省' when mrchnt_name='成都' then '四川省' when mrchnt_name='北京' then '北京市' when mrchnt_name='上海' then '上海市' end",row.names=T) prov_name <-c(jbp03$prov_name) sale_amt <- c(jbp03$sale_amt) prov_col=rgb(red=1-sale_amt/max(sale_amt)/2,green=1-sale_amt/max(sale_amt)/2,blue=0) plot(x,col=getColor(x,prov_name,prov_col,"white"),xlab="",ylab="") ####条形图#### jbp02 <- sqldf("select categ_lvl1_name,sum(sale_amt) sale_amt from jbp01 where manufacture like '%宝洁%' group by categ_lvl1_name",row.names=T) p <- ggplot(jbp02,aes(x=factor(categ_lvl1_name),y=sale_amt/1000)) p+ geom_bar(stat="identity",colour='red',fill='blue') #堆积图 myconnect <- odbcConnect("jbp2014",uid="chengyingbao",pwd="Mis,111",case="tolower") #建立数据库连接 jbp01 <- sqlQuery(myconnect,"select DATEPART(month, date_id) month,t1.* from temp1 t1") #head(jbp01) close(myconnect) #关闭数据库连接 names(jbp01) <- tolower(names(jbp01)) #字段名调整为小写,易于处理 jbp01$date_id <- as.Date(jbp01$date_id) #日期字段 调整为日期型 jbp02 <- sqldf("select month,categ_lvl1_name,sum(sale_amt) sale_amt from jbp01 where manufacture like '%宝洁%' group by month,categ_lvl1_name",row.names=T) p <- ggplot(jbp02,aes(x=factor(categ_lvl1_name),y=sale_amt/1000)) p+ geom_bar(stat="identity",colour='red',fill='blue')+ facet_wrap(~month) #geom_bar(position='dodge',stat="identity",colour='red',fill='blue') #dodge方式是将不同年份的数据并列放置;stack方式是将不同年份数据堆叠放置 ####气泡图#### jbp02 <- sqldf("select categ_lvl1_name,sum(sale_amt) sale_amt,sum(sale_amt)/sum(sale_num) asp from jbp01 where manufacture like '%宝洁%' group by categ_lvl1_name",row.names=T) p <- ggplot(jbp02,aes(x=categ_lvl1_name,y=sale_amt/1000,colour=factor(categ_lvl1_name))) p+ geom_point(aes(size = asp))+ scale_size_continuous(range=c(3,10))+ scale_y_continuous(labels=comma) xlab("一级类目")+ ylab("销售金额 单位:K")+ labs(title="气泡图") #设置图的标题 ####广告PPT地图#### library(maps) library(mapdata) library(maptools) myconnct <- odbcConnect("jbp2014",uid="chengyingbao",pwd="Mis,111") jbp01 <- sqlQuery(myconnct,"select * from prov_vstrs") close(myconnct) names(jbp01) <- tolower(names(jbp01)) jbp01$prov_name <-as.character(jbp01$prov_name) x=readShapePoly('bou2_4p.shp') getColor=function(mapdata,provname,provcol,othercol) { f=function(x,y) ifelse(x %in% y,which(y==x),0) colIndex=sapply(mapdata@data$NAME,f,provname) col=c(othercol,provcol)[colIndex+1] return(col) } prov_name <-c(jbp01$prov_name) vistrs <- c(jbp01$vistrs) prov_col=rgb(red=1-vistrs/max(vistrs)/2,green=1-vistrs/max(vistrs)/2,blue=0) plot(x,col=getColor(x,prov_name,prov_col,"white"),xlab="",ylab="") ####广告PPT图2#### myconnct <- odbcConnect("jbp2014",uid="chengyingbao",pwd="Mis,111") jbp01 <- sqlQuery(myconnct,"select * from ad_chart") close(myconnct) names(jbp01) <- tolower(names(jbp01)) jbp01$date_id<-as.Date(jbp01$date_id) jbp01 <- sqldf("select date_id,channel,sum(vistrs)-sum(invalid_vistrs) valid_vistrs from jbp01 where channel <> 'SEM' group by date_id,channel ",row.names=T) p <- ggplot(jbp01,aes(x=date_id,y=valid_vistrs,group=channel,colour=factor(channel))) p +geom_line(size=0.75)+ facet_wrap(~channel)+ scale_x_date(labels = date_format("%m/%d"), minor_breaks = date_breaks("1 week"))+ scale_y_continuous(labels=comma)+ xlab("日期")+ ylab("有效访客Vstrs")+ labs(title="渠道日有效访客数") #设置图的标题 ####广告PPT 玫瑰图#### myconnct <- odbcConnect("jbp2014",uid="chengyingbao",pwd="Mis,111") jbp01 <- sqlQuery(myconnct,"select * from ad_pie") close(myconnct) names(jbp01) <- tolower(names(jbp01)) names(jbp01) <- c("channel","无效访客","有效访客") mt <- melt(jbp01, id.vars=c("channel"), value.name="visits", variable.name="type") ggplot(mt)+geom_bar(aes(x=channel,y=visits,fill=type),stat="identity")+ coord_polar()+ ##玫瑰图 xlab("渠道")+ ylab("访客数")+ theme(text=element_text(size=12),axis.text.x=element_text(colour="black",size=12,face="bold"))+ scale_y_continuous(label=comma)+ guides(fill=guide_legend(title=NULL))+ labs(title="渠道访客差异") #ylim(0,500) #设立坐标轴的范围 #guides(fill=F)#剔除标签 ####堆积图 玫瑰图#### myconnct <- odbcConnect("jbp2014",uid="chengyingbao",pwd="Mis,111") jbp01 <- sqlQuery(myconnct,"select * from ad_pie") close(myconnct) names(jbp01) <- tolower(names(jbp01)) ggplot(jbp01)+geom_bar(aes(x=channel,y=vstrs,fill=id),stat="identity")+ coord_polar() ##玫瑰图 堆积图衍生 # stat= bin 默认 identity 独立变量 ggplot(jbp01)+geom_bar(aes(x=channel,y=vstrs,fill=id),stat="identity") #堆积柱状图 ggplot(jbp01)+geom_bar(aes(x=channel,y=vstrs,fill=id),stat="identity",position="fill") + #堆积柱状图 scale_y_continuous(labels=percent) #position: stack(数值) fill(百分比) identity() dodge(并排) jitter(增加扰动) ####融合#### mt <- melt(jbp01, id.vars=c("channel"), value.name="visits", variable.name="type") names(mt) str(mt) sem_visigt, seo_vist dcast(mt, channel + dd ~ type, sum, mean) plyr splite-c-com dplyr #####ppt cluser#### myconnct <- odbcConnect(dsn="jbp2014",uid="chengyingbao",pwd="Mis,111") chart01 <- sqlQuery(channel=myconnct,query="select page_categ_name,pro_vstrs_per,yhd_vstrs_per from ad_cluster") close(myconnct) names(chart01) <- tolower(names(chart01)) names(chart01) <- c("page_categ_name","可乐活动访客","全站访客") mt <- melt(data=chart01,id.vars="page_categ_name",value.name="percent",variable.name="type") mt <- sqldf("select page_categ_name,type,case when type='可乐活动访客' then percent*-1 else percent end percent from mt") p = ggplot(mt) p+ geom_bar(aes(x=page_categ_name,y=percent,fill=type),stat="identity",position="identity")+ coord_flip()+ theme(axis.text.x=element_blank()) #p = ggplot(mt,aes(x=interaction(page_categ_name, type),y=percent)) #p+ # geom_bar(aes(x=page_categ_name,y=percent,fill=type),stat="identity",position="identity")+ # coord_flip()+ # geom_text(aes(label=percent, vjust=-0.2))