利用生鲜数据画图

library("ggplot2")

library("RODBC") #加载RODBC包,读取数据库数据

library("sqldf") #sql语句处理数据

library("scales")

library("reshape2")

library(gcookbook) # For the data set
library("tcltk")

#### 生成数据框 ####

library(RODBC)
#library(plyr)


channel1 <- odbcConnectExcel2007("C:/Users/Jennifer/Desktop/生鲜/生鲜二级商家.xlsx")
jbp01<-sqlFetch(channel1 ,"Sheet4")
head(jbp01)
#,as.is=F) 
odbcClose(channel1)


f = file("C:/Users/Jennifer/Desktop/Book1.xlsx")

readLines(f,10)

#head(jbp01)

close(myconnect) #关闭数据库连接


####数据处理####


#jbp01$month <- as.character(jbp01$month) #月份处理成文本

names(jbp01) <- tolower(names(jbp01))   #字段名调整为小写,易于处理

jbp01$date_id <- as.Date(jbp01$date_id) #日期字段 调整为日期型

head(jbp01)
####线图####

#用sql语句处理
jbp02 <- sqldf("select * from jbp01",row.names=T)

p <- ggplot(jbp02,aes(x=categ_lvl2_name,y=sum(mrchnt_num),colour=factor(prov_name)))

p+
  geom_line(size=0.8)+
  
  xlab("日期")+
  ylab("销售金额 单位:K")+
  labs(title="生鲜商城数据")+#设置图的标题
  scale_y_continuous()

#coord_flip() #横纵坐标翻转

#多个线图
jbp02 <- sqldf("select prov_name,categ_lvl2_name,sum(mrchnt_num) mrchnt_num from jbp01 group by prov_name,categ_lvl2_name",row.names=T)

p <- ggplot(jbp02,aes(x=categ_lvl2_name,y=mrchnt_num),fill= categ_lvl2_name)

p+
  geom_bar( stat = "identity")+
  facet_wrap(~prov_name,scale="free_y")+scale_x_continuous(breaks=1:11)+
  
  xlab("二级类目")+
  ylab("商家数")+
  labs(title="不同省份二级类目商家数") #设置图的标题

####饼图####
jbp02 <- sqldf("select categ_lvl2_name,sum(mrchnt_num) mrchnt_num from jbp01 where prov_name ='上海' group by categ_lvl2_name",row.names=T)

jbp02 <- transform(jbp02, percent=mrchnt_num/sum(mrchnt_num))

p <- ggplot(jbp02,aes(x="", y=percent, fill=categ_lvl2_name)) 

p+  geom_bar(width = 1, stat = "identity")+
  
  coord_polar("y")


####中国地图####
#library(ggmap)
#library(mapproj)
#map <- get_map(location='china',zoom=4)
#ggmap(map) #此路不通,GOOGLE地图链接不上


library(maps)
library(mapdata)
map("china")

library(maptools)
setwd("C:/Users/Jennifer/Documents/R/win-library/3.0/china-province-border-data") #这一步很重要
x=readShapePoly('bou2_4p.shp') #运行之前要设置工作空间 bou2_4p.dbf,bou2_4p.shp,bou2_4p.shx

plot(x)

plot(x,col=gray(924:0/924)); #设置分割线

getColor=function(mapdata,provname,provcol,othercol)
{
  f=function(x,y) ifelse(x %in% y,which(y==x),0)
  colIndex=sapply(mapdata@data$NAME,f,provname)
  col=c(othercol,provcol)[colIndex+1]
  return(col)
}
# provname=c("北京市","天津市","上海市","重庆市") #ctr+shift+C
# provcol=c("red","green","yellow","purple")
# plot(x,col=getColor(x,provname,provcol,"white"))


jbp03 <- sqldf("select case when mrchnt_name='武汉' then '湖北省'
               when mrchnt_name='济南' then '山东省'
               when mrchnt_name='广州' then '广东省'
               when mrchnt_name='福建' then '福建省'
               when mrchnt_name='成都' then '四川省'
               when mrchnt_name='北京' then '北京市'
               when mrchnt_name='上海' then '上海市'
               end prov_name,sum(sale_amt) sale_amt from jbp01 where manufacture like '%宝洁%'group by case when mrchnt_name='武汉' then '湖北省'
               when mrchnt_name='济南' then '山东省'
               when mrchnt_name='广州' then '广东省'
               when mrchnt_name='福建' then '福建省'
               when mrchnt_name='成都' then '四川省'
               when mrchnt_name='北京' then '北京市'
               when mrchnt_name='上海' then '上海市'
               end",row.names=T)
prov_name <-c(jbp03$prov_name)
sale_amt <- c(jbp03$sale_amt)
prov_col=rgb(red=1-sale_amt/max(sale_amt)/2,green=1-sale_amt/max(sale_amt)/2,blue=0)
plot(x,col=getColor(x,prov_name,prov_col,"white"),xlab="",ylab="")


####条形图####

jbp02 <- sqldf("select * from jbp01",row.names=T)

p <- ggplot(jbp02,aes(x=factor(categ_lvl2_name),y=mrchnt_num))

p+
  geom_bar(stat="identity",colour='red',fill='blue')

#堆积图
myconnect <- odbcConnect("jbp2014",uid="chengyingbao",pwd="Mis,111",case="tolower") #建立数据库连接

jbp01 <- sqlQuery(myconnect,"select  DATEPART(month, date_id) month,t1.* from temp1 t1")

#head(jbp01)

close(myconnect) #关闭数据库连接

names(jbp01) <- tolower(names(jbp01))   #字段名调整为小写,易于处理

jbp01$date_id <- as.Date(jbp01$date_id) #日期字段 调整为日期型


jbp02 <- sqldf("select categ_lvl2_name,prov_name,count(mrchnt_name) mrchnt_num from jbp01 group by prov_name ",row.names=T)

p <- ggplot(jbp02,aes(x=factor(categ_lvl2_name),y=mrchnt_num))

p+
  geom_bar(stat="identity",colour='red',fill='blue')+
  facet_wrap(~prov_name)

#geom_bar(position='dodge',stat="identity",colour='red',fill='blue')

#dodge方式是将不同年份的数据并列放置;stack方式是将不同年份数据堆叠放置



####气泡图####

jbp02 <- sqldf("select prov_name,categ_lvl2_name,count(mrchnt_name) mrchnt from jbp01 group by categ_lvl2_name  ",row.names=T)

p <- ggplot(jbp02,aes(x=categ_lvl2_name,y=mrchnt,colour=factor(prov_name)))

p+
  geom_point(aes(size = mrchnt))+
  scale_size_continuous(range=c(3,10))+
  scale_y_continuous(labels=comma)+
xlab("一级类目")+
  ylab("销售金额")+
  labs(title="气泡图") #设置图的标题



####广告PPT地图####

library(maps)
library(mapdata)


library(maptools)

myconnct <- odbcConnect("jbp2014",uid="chengyingbao",pwd="Mis,111")

jbp01 <- sqlQuery(myconnct,"select * from prov_vstrs")

close(myconnct)


jbp01 <- sqldf("select prov_name,categ_lvl2_name,count(mrchnt_name) mrchnt from jbp01 group by categ_lvl2_name  ",row.names=T)
names(jbp01) <- tolower(names(jbp01))

jbp01$prov_name <-as.character(jbp01$prov_name)
setwd("C:/Users/Jennifer/Documents/R/win-library/3.0/china-province-border-data") #这一步很重要
x=readShapePoly('bou2_4p.shp') #运行之前要设置工作空间 bou2_4p.dbf,bou2_4p.shp,bou2_4p.shx

getColor=function(mapdata,provname,provcol,othercol)
{
  f=function(x,y) ifelse(x %in% y,which(y==x),0)
  colIndex=sapply(mapdata@data$NAME,f,provname)
  col=c(othercol,provcol)[colIndex+1]
  return(col)
}


prov_name <-c(jbp01$prov_name)
vistrs <- c(jbp01$vistrs)
prov_col=rgb(red=1-vistrs/max(vistrs)/2,green=1-vistrs/max(vistrs)/2,blue=0)
plot(x,col=getColor(x,prov_name,prov_col,"white"),xlab="",ylab="")

####广告PPT图2####

myconnct <- odbcConnect("jbp2014",uid="chengyingbao",pwd="Mis,111")

jbp01 <- sqlQuery(myconnct,"select * from ad_chart")

close(myconnct)

names(jbp01) <- tolower(names(jbp01))

jbp01$date_id<-as.Date(jbp01$date_id)

jbp01 <- sqldf("select date_id,channel,sum(vistrs)-sum(invalid_vistrs) valid_vistrs from jbp01  where channel <> 'SEM' group by date_id,channel  ",row.names=T)

p <- ggplot(jbp01,aes(x=date_id,y=valid_vistrs,group=channel,colour=factor(channel)))

p +geom_line(size=0.75)+
  facet_wrap(~channel)+
  scale_x_date(labels = date_format("%m/%d"),
               minor_breaks = date_breaks("1 week"))+
  scale_y_continuous(labels=comma)+
  xlab("日期")+
  ylab("有效访客Vstrs")+
  labs(title="渠道日有效访客数") #设置图的标题

####广告PPT 玫瑰图####

myconnct <- odbcConnect("jbp2014",uid="chengyingbao",pwd="Mis,111")

jbp01 <- sqlQuery(myconnct,"select * from ad_pie")

close(myconnct)

names(jbp01) <- tolower(names(jbp01))

names(jbp01) <- c("channel","无效访客","有效访客")

mt <- melt(jbp01, id.vars=c("channel"), value.name="visits", variable.name="type")

ggplot(mt)+geom_bar(aes(x=channel,y=visits,fill=type),stat="identity")+
  coord_polar()+  ##玫瑰图
  xlab("渠道")+ ylab("访客数")+
  theme(text=element_text(size=12),axis.text.x=element_text(colour="black",size=12,face="bold"))+
  scale_y_continuous(label=comma)+
  guides(fill=guide_legend(title=NULL))+
  labs(title="渠道访客差异")
#ylim(0,500) #设立坐标轴的范围
#guides(fill=F)#剔除标签


####堆积图 玫瑰图####

myconnct <- odbcConnect("jbp2014",uid="chengyingbao",pwd="Mis,111")

jbp01 <- sqlQuery(myconnct,"select * from ad_pie")

close(myconnct)

names(jbp01) <- tolower(names(jbp01))


ggplot(jbp01)+geom_bar(aes(x=channel,y=vstrs,fill=id),stat="identity")+
  coord_polar()  ##玫瑰图 堆积图衍生
# stat= bin 默认 identity 独立变量 

ggplot(jbp01)+geom_bar(aes(x=channel,y=vstrs,fill=id),stat="identity") #堆积柱状图

ggplot(jbp01)+geom_bar(aes(x=channel,y=vstrs,fill=id),stat="identity",position="fill") + #堆积柱状图
  scale_y_continuous(labels=percent)

#position: stack(数值) fill(百分比) identity() dodge(并排)  jitter(增加扰动)



####融合####
mt <- melt(jbp01, id.vars=c("channel"), value.name="visits", variable.name="type")
names(mt)

str(mt)
sem_visigt, seo_vist

dcast(mt, channel + dd ~ type, sum, mean)

plyr splite-c-com
dplyr



#####ppt cluser####

myconnct <- odbcConnect(dsn="jbp2014",uid="chengyingbao",pwd="Mis,111")

chart01 <- sqlQuery(channel=myconnct,query="select page_categ_name,pro_vstrs_per,yhd_vstrs_per from ad_cluster")

close(myconnct)

names(chart01) <- tolower(names(chart01))

names(chart01) <- c("page_categ_name","可乐活动访客","全站访客")

mt <- melt(data=chart01,id.vars="page_categ_name",value.name="percent",variable.name="type")

mt <- sqldf("select page_categ_name,type,case when type='可乐活动访客' then percent*-1
            else percent
            end percent from mt")


p = ggplot(mt)

p+ geom_bar(aes(x=page_categ_name,y=percent,fill=type),stat="identity",position="identity")+
  coord_flip()+
  theme(axis.text.x=element_blank())


#p = ggplot(mt,aes(x=interaction(page_categ_name, type),y=percent))
#p+ 
#  geom_bar(aes(x=page_categ_name,y=percent,fill=type),stat="identity",position="identity")+
#  coord_flip()+
#  geom_text(aes(label=percent, vjust=-0.2))


library("ggplot2")

library("RODBC") #加载RODBC包,读取数据库数据

library("sqldf") #sql语句处理数据

library("scales")

library("reshape2")

library(gcookbook) # For the data set
library("tcltk")

#### 生成数据框 ####

#myconnect <- odbcConnect("jbp2014",uid="chengyingbao",pwd="Mis,111",case="tolower") #建立数据库连接

#jbp01 <- sqlQuery(myconnect,"select * from temp1")

library(RODBC)
#library(plyr)


channel1 <- odbcConnectExcel2007("C:/Users/Jennifer/Desktop/Book1.xlsx")
jbp01<-sqlFetch(channel1 ,"Sheet2")

#,as.is=F) 
odbcClose(channel1)


f = file("C:/Users/Jennifer/Desktop/Book1.xlsx")

readLines(f,10)

#head(jbp01)

close(myconnect) #关闭数据库连接


####数据处理####


#jbp01$month <- as.character(jbp01$month) #月份处理成文本

names(jbp01) <- tolower(names(jbp01))   #字段名调整为小写,易于处理

jbp01$date_id <- as.Date(jbp01$date_id) #日期字段 调整为日期型

head(jbp01)
####线图####

#用sql语句处理
jbp02 <- sqldf("select date_id,categ_lvl1_name,sum(ordr_sale) sale_amt from jbp01  group by date_id,categ_lvl1_name",row.names=T)

p <- ggplot(jbp02,aes(x=date_id,y=sale_amt,colour=prov_name))

p+
  geom_line(size=0.8)+
  
  xlab("日期")+
  ylab("销售金额 单位:K")+
  labs(title="生鲜商城数据")+#设置图的标题
  scale_y_continuous()

#coord_flip() #横纵坐标翻转

#多个线图
jbp02 <- sqldf("select manufacture,date_id,categ_lvl1_name,sum(sale_amt) sale_amt from jbp01 group by manufacture,date_id,categ_lvl1_name",row.names=T)

p <- ggplot(jbp02,aes(x=date_id,y=sale_amt/1000,colour=factor(categ_lvl1_name)))

p+
  geom_line(size=0.8)+
  facet_wrap(~manufacture)+
  
  xlab("日期")+
  ylab("销售金额 单位:K")+
  labs(title="JBP日销售") #设置图的标题

####饼图####
jbp02 <- sqldf("select categ_lvl1_name,sum(sale_amt) sale_amt from jbp01 where manufacture like '%宝洁%' group by categ_lvl1_name",row.names=T)

jbp02 <- transform(jbp02, percent=sale_amt/sum(sale_amt))

p <- ggplot(jbp02,aes(x="", y=percent, fill=categ_lvl1_name)) 

p+  geom_bar(width = 1, stat = "identity")+
  
  coord_polar("y")


####中国地图####
#library(ggmap)
#library(mapproj)
#map <- get_map(location='china',zoom=4)
#ggmap(map) #此路不通,GOOGLE地图链接不上


library(maps)
library(mapdata)
map("china")

library(maptools)
setwd("C:/Users/Jennifer/Documents/R/win-library/3.0/china-province-border-data") #这一步很重要
x=readShapePoly('bou2_4p.shp') #运行之前要设置工作空间 bou2_4p.dbf,bou2_4p.shp,bou2_4p.shx

plot(x)

plot(x,col=gray(924:0/924)); #设置分割线

getColor=function(mapdata,provname,provcol,othercol)
{
  f=function(x,y) ifelse(x %in% y,which(y==x),0)
  colIndex=sapply(mapdata@data$NAME,f,provname)
  col=c(othercol,provcol)[colIndex+1]
  return(col)
}
# provname=c("北京市","天津市","上海市","重庆市") #ctr+shift+C
# provcol=c("red","green","yellow","purple")
# plot(x,col=getColor(x,provname,provcol,"white"))


jbp03 <- sqldf("select case when mrchnt_name='武汉' then '湖北省'
               when mrchnt_name='济南' then '山东省'
               when mrchnt_name='广州' then '广东省'
               when mrchnt_name='福建' then '福建省'
               when mrchnt_name='成都' then '四川省'
               when mrchnt_name='北京' then '北京市'
               when mrchnt_name='上海' then '上海市'
               end prov_name,sum(sale_amt) sale_amt from jbp01 where manufacture like '%宝洁%'group by case when mrchnt_name='武汉' then '湖北省'
               when mrchnt_name='济南' then '山东省'
               when mrchnt_name='广州' then '广东省'
               when mrchnt_name='福建' then '福建省'
               when mrchnt_name='成都' then '四川省'
               when mrchnt_name='北京' then '北京市'
               when mrchnt_name='上海' then '上海市'
               end",row.names=T)
prov_name <-c(jbp03$prov_name)
sale_amt <- c(jbp03$sale_amt)
prov_col=rgb(red=1-sale_amt/max(sale_amt)/2,green=1-sale_amt/max(sale_amt)/2,blue=0)
plot(x,col=getColor(x,prov_name,prov_col,"white"),xlab="",ylab="")


####条形图####

jbp02 <- sqldf("select categ_lvl1_name,sum(sale_amt) sale_amt from jbp01 where manufacture like '%宝洁%' group by categ_lvl1_name",row.names=T)

p <- ggplot(jbp02,aes(x=factor(categ_lvl1_name),y=sale_amt/1000))

p+
  geom_bar(stat="identity",colour='red',fill='blue')

#堆积图
myconnect <- odbcConnect("jbp2014",uid="chengyingbao",pwd="Mis,111",case="tolower") #建立数据库连接

jbp01 <- sqlQuery(myconnect,"select  DATEPART(month, date_id) month,t1.* from temp1 t1")

#head(jbp01)

close(myconnect) #关闭数据库连接

names(jbp01) <- tolower(names(jbp01))   #字段名调整为小写,易于处理

jbp01$date_id <- as.Date(jbp01$date_id) #日期字段 调整为日期型


jbp02 <- sqldf("select month,categ_lvl1_name,sum(sale_amt) sale_amt from jbp01 where manufacture like '%宝洁%' group by month,categ_lvl1_name",row.names=T)

p <- ggplot(jbp02,aes(x=factor(categ_lvl1_name),y=sale_amt/1000))

p+
  geom_bar(stat="identity",colour='red',fill='blue')+
  facet_wrap(~month)

#geom_bar(position='dodge',stat="identity",colour='red',fill='blue')

#dodge方式是将不同年份的数据并列放置;stack方式是将不同年份数据堆叠放置



####气泡图####

jbp02 <- sqldf("select categ_lvl1_name,sum(sale_amt) sale_amt,sum(sale_amt)/sum(sale_num) asp from jbp01 where manufacture like '%宝洁%' group by categ_lvl1_name",row.names=T)

p <- ggplot(jbp02,aes(x=categ_lvl1_name,y=sale_amt/1000,colour=factor(categ_lvl1_name)))

p+
  geom_point(aes(size = asp))+
  scale_size_continuous(range=c(3,10))+
  scale_y_continuous(labels=comma)
xlab("一级类目")+
  ylab("销售金额 单位:K")+
  labs(title="气泡图") #设置图的标题



####广告PPT地图####

library(maps)
library(mapdata)


library(maptools)

myconnct <- odbcConnect("jbp2014",uid="chengyingbao",pwd="Mis,111")

jbp01 <- sqlQuery(myconnct,"select * from prov_vstrs")

close(myconnct)

names(jbp01) <- tolower(names(jbp01))

jbp01$prov_name <-as.character(jbp01$prov_name)

x=readShapePoly('bou2_4p.shp') 
getColor=function(mapdata,provname,provcol,othercol)
{
  f=function(x,y) ifelse(x %in% y,which(y==x),0)
  colIndex=sapply(mapdata@data$NAME,f,provname)
  col=c(othercol,provcol)[colIndex+1]
  return(col)
}


prov_name <-c(jbp01$prov_name)
vistrs <- c(jbp01$vistrs)
prov_col=rgb(red=1-vistrs/max(vistrs)/2,green=1-vistrs/max(vistrs)/2,blue=0)
plot(x,col=getColor(x,prov_name,prov_col,"white"),xlab="",ylab="")

####广告PPT图2####

myconnct <- odbcConnect("jbp2014",uid="chengyingbao",pwd="Mis,111")

jbp01 <- sqlQuery(myconnct,"select * from ad_chart")

close(myconnct)

names(jbp01) <- tolower(names(jbp01))

jbp01$date_id<-as.Date(jbp01$date_id)

jbp01 <- sqldf("select date_id,channel,sum(vistrs)-sum(invalid_vistrs) valid_vistrs from jbp01  where channel <> 'SEM' group by date_id,channel  ",row.names=T)

p <- ggplot(jbp01,aes(x=date_id,y=valid_vistrs,group=channel,colour=factor(channel)))

p +geom_line(size=0.75)+
  facet_wrap(~channel)+
  scale_x_date(labels = date_format("%m/%d"),
               minor_breaks = date_breaks("1 week"))+
  scale_y_continuous(labels=comma)+
  xlab("日期")+
  ylab("有效访客Vstrs")+
  labs(title="渠道日有效访客数") #设置图的标题

####广告PPT 玫瑰图####

myconnct <- odbcConnect("jbp2014",uid="chengyingbao",pwd="Mis,111")

jbp01 <- sqlQuery(myconnct,"select * from ad_pie")

close(myconnct)

names(jbp01) <- tolower(names(jbp01))

names(jbp01) <- c("channel","无效访客","有效访客")

mt <- melt(jbp01, id.vars=c("channel"), value.name="visits", variable.name="type")

ggplot(mt)+geom_bar(aes(x=channel,y=visits,fill=type),stat="identity")+
  coord_polar()+  ##玫瑰图
  xlab("渠道")+ ylab("访客数")+
  theme(text=element_text(size=12),axis.text.x=element_text(colour="black",size=12,face="bold"))+
  scale_y_continuous(label=comma)+
  guides(fill=guide_legend(title=NULL))+
  labs(title="渠道访客差异")
#ylim(0,500) #设立坐标轴的范围
#guides(fill=F)#剔除标签


####堆积图 玫瑰图####

myconnct <- odbcConnect("jbp2014",uid="chengyingbao",pwd="Mis,111")

jbp01 <- sqlQuery(myconnct,"select * from ad_pie")

close(myconnct)

names(jbp01) <- tolower(names(jbp01))


ggplot(jbp01)+geom_bar(aes(x=channel,y=vstrs,fill=id),stat="identity")+
  coord_polar()  ##玫瑰图 堆积图衍生
# stat= bin 默认 identity 独立变量 

ggplot(jbp01)+geom_bar(aes(x=channel,y=vstrs,fill=id),stat="identity") #堆积柱状图

ggplot(jbp01)+geom_bar(aes(x=channel,y=vstrs,fill=id),stat="identity",position="fill") + #堆积柱状图
  scale_y_continuous(labels=percent)

#position: stack(数值) fill(百分比) identity() dodge(并排)  jitter(增加扰动)



####融合####
mt <- melt(jbp01, id.vars=c("channel"), value.name="visits", variable.name="type")
names(mt)

str(mt)
sem_visigt, seo_vist

dcast(mt, channel + dd ~ type, sum, mean)

plyr splite-c-com
dplyr



#####ppt cluser####

myconnct <- odbcConnect(dsn="jbp2014",uid="chengyingbao",pwd="Mis,111")

chart01 <- sqlQuery(channel=myconnct,query="select page_categ_name,pro_vstrs_per,yhd_vstrs_per from ad_cluster")

close(myconnct)

names(chart01) <- tolower(names(chart01))

names(chart01) <- c("page_categ_name","可乐活动访客","全站访客")

mt <- melt(data=chart01,id.vars="page_categ_name",value.name="percent",variable.name="type")

mt <- sqldf("select page_categ_name,type,case when type='可乐活动访客' then percent*-1
            else percent
            end percent from mt")


p = ggplot(mt)

p+ geom_bar(aes(x=page_categ_name,y=percent,fill=type),stat="identity",position="identity")+
  coord_flip()+
  theme(axis.text.x=element_blank())


#p = ggplot(mt,aes(x=interaction(page_categ_name, type),y=percent))
#p+ 
#  geom_bar(aes(x=page_categ_name,y=percent,fill=type),stat="identity",position="identity")+
#  coord_flip()+
#  geom_text(aes(label=percent, vjust=-0.2))

 

posted @ 2014-11-21 13:53  ilxx1988  阅读(445)  评论(0编辑  收藏  举报