电商数据处理项目
一、案例综述
1.数据文件说明
客户相关文件:
1. UserInfo.csv:用户主表
2. RegionInfo.csv:区域表
3. UserAddress.csv:用户地址表
商品相关文件:
1. GoodsInfo.csv:商品主表
2. GoodsBrand.csv:商品品牌表
3. GoodsColor.csv:商品颜色表
4. GoodsSize.csv:商品尺码表
订单相关文件:
1. OrderInfo.csv:订单主表
2. OrderDetail.csv:订单详情表
表结构信息.xlsx: 建表参考信息
2.目标
1.求出购买产品金额最多的前十名顾客
2.求出购买产品金额最多的前十名顾客的最后登录时间
3.求出购买产品金额最多的前十名顾客的所在城市
4.求出购买力最强的前十个城市
5.求出购买力最强的前十个城市以及他们所在的省份
6.求出最畅销的十个品牌
7.求出最畅销的十种颜色、以及最不畅销的十种颜色
8.求出最畅销的十个商品所属品牌中所有商品的销售额
9.买不同商品种类最多的前十名用户所使用的收货城市都有哪些 (非重复计数count(distinct 字段名))
3.使用工具:MySQL
4.Error1157解决方法:
当用MySQL Workbench进行数据库的批量更新时,执行一个语句会碰到以下错误提示:Error Code: 1175
You are using safe...without a WHERE that uses a KEY column
执行语句:SET SQL_SAFE_UPDATES = 0;即可解决
二、了解表结构数据信息,建表并导入数据
建表举例:
create table goodsinfo(
goodsid varchar(6) not null default '-',
typeid varchar(3) not null default '-',
markid varchar(4) not null default '-',
goodstag varchar(100) not null default '-',
brandtag varchar(100) not null default '-',
customtag varchar(100) not null default '-',
goodsname varchar(100) not null default '-',
clickcount int not null default 0,
clickcr int not null default 0,
goodsnumber int not null default 0,
goodsweight int not null default 0,
marketprice double not null default 0,
shopprice double not null default 0,
addtime varchar(20) not null default 0,
isonsale int not null default 0,
sales int not null default 0,
realsales int not null default 0,
extraprice double not null default 0,
goodsno varchar(9) not null default 0,
pt varchar(9) not null default 0
);
#导入数据
load data local infile 'E:/rawdata/GoodsInfo.csv' into table goodsinfo
fields terminated by ','
ignore 1 lines;
select * from goodsinfo;
共建立9张表
三、统计查询
-- 1求出购买产品金额最多的前十名顾客
select userid, sum(orderamount) as 金额
from orderinfo
group by userid
order by 金额 desc
limit 10;
-- 2求出购买产品金额最多的前十名顾客的最后登录时间
select oi.userid, sum(orderamount) as 金额, lastlogin_ as 最后登录时间
from orderinfo as oi left join userinfo as ui on oi.userid = ui.userid
group by oi.userid
order by 金额 desc
limit 10;
-- 3求出购买产品金额最多的前十名顾客的所在城市
select userid, sum(orderamount) as 金额, regionname as 城市
from orderinfo as oi left join regioninfo as ri on city = regionid
group by userid
order by 金额 desc
limit 10;
-- 4求出购买力最强的前十个城市
select regionname as 城市, sum(orderamount) as 金额
from orderinfo as oi left join regioninfo as ri on city = regionid
group by city
order by 金额 desc
limit 10;
-- 5求出购买力最强的前十个城市以及他们所在的省份
create table a( select regionname as 城市, sum(orderamount) as 金额, province as 省份id
from orderinfo as oi left join regioninfo as ri on city = regionid
group by city order by 金额 desc
limit 10 );
select * from a;
select regionname as 省份, 城市, 金额
from a left join regioninfo on 省份id = regionid
order by 金额 desc;
或者
select a.regionname as 省份, b.regionname as 城市, sum(orderamount) as 金额
from orderinfo as oi left join regioninfo as b on city = b.regionid left join regioninfo as a on province = a.regionid
group by city
order by 金额 desc limit 10;
-- 6求出最畅销的十个品牌
select brandtype as 品牌, sum(amount) as 销量
from orderdetail as od left join goodsinfo as gi on od.goodsid = gi.goodsid left join goodsbrand as gb on typeid = supplierid
group by typeid
order by 销量 desc
limit 10;
-- 7求出最畅销的十种颜色、以及最不畅销的十种颜色
(select colornote as 颜色, sum(amount) as 销量 from orderdetail as od
left join goodscolor as gc on od.colorid = gc.colorid
group by od.colorid order by 销量 desc limit 10)
union
(select colornote as 颜色, sum(amount) as 销量 from orderdetail as od
left join goodscolor as gc on od.colorid = gc.colorid
group by od.colorid order by 销量 asc limit 10);
-- 8求出最畅销的十个商品所属品牌中所有商品的销售额
#求出最畅销的十个商品所属品牌
create table aa (select typeid as 品牌id from orderdetail as od
left join goodsinfo as gs on od.goodsid = gs.goodsid
group by od.goodsid
order by sum(amount) desc
limit 10);
# 品牌中所有商品
create table bb (select goodsid as 商品id from aa
left join goodsinfo on 品牌id = typeid
group by 商品id);
#商品的销售额
select 商品id, sum(amount * goodsprice) as 销售额
from bb left join orderdetail on 商品id = goodsid
group by 商品id
order by 销售额 desc;
-- 9买不同商品种类最多的前十名用户所使用的收货城市都有哪些 -- (非重复计数count(distinct 字段名))
select od.userid as 用户, count(distinct goodsid) as 商品种类, group_concat(distinct regionname) as 城市名
from orderdetail as od
left join orderinfo as oi on od.orderid = oi.orderid
left join regioninfo on city = regionid
group by od.userid
order by 商品种类 desc
limit 10;