1. 案例中首先要厘清表间关系及其表关键字段等信息
2. 厘清第1项内容后,进入MySQL软件进行对应表数据建立
①首先建立数据库并应用
create database Kdd99;
use Kdd99;
②创建各表数据并将对应数据进行上传
创建表
# drop table sale; # 删除表 create table sale( year integer, market varchar(6), sale long, profit long ); select * from sale; # 单表数据查询
上传数据
show variables like "%secure%"; # 查询MySQl数据需要存放的路径 load data infile'C:\\ProgramData\\MySQL\\MySQL Server 8.0\\Uploads\\sale.csv' # 即需要将待分析数据放入MySQL规定的路径下进行上传 into table sale character set gb2312 fields terminated by ',' optionally enclosed by '"' escaped by '"' lines terminated by '\r\n' ignore 1 lines; select * from sale;
其他表建立以及数据上传如下:
# drop table kdd99_trans; create table kdd99_trans( trans_id integer, account_id integer, date date, type varchar(2), operation varchar(20), amount DECIMAL, balance DECIMAL, k_symbol varchar(20), bank varchar(4), account long ); select * from kdd99_trans; load data infile'C:\\ProgramData\\MySQL\\MySQL Server 8.0\\Uploads\\Kdd99_trans.csv' into table kdd99_trans character set gb2312 fields terminated by ',' optionally enclosed by '"' escaped by '"' lines terminated by '\r\n' ignore 1 lines; select * from kdd99_trans; # drop table kdd99_accounts; create table kdd99_accounts ( account_id integer, district_id integer, frequency varchar(20), date DATE ); select * from kdd99_accounts; load data infile 'C:\\ProgramData\\MySQL\\MySQL Server 8.0\\Uploads\\kdd99_accounts.csv' into table kdd99_accounts character set gb2312 fields terminated by ',' optionally enclosed by '"' escaped by '"' lines terminated by '\r\n' ignore 1 lines; select * from kdd99_accounts; # drop table kdd99_card; create table Kdd99_card( card_id integer, disp_id integer, issued DATE, type varchar(10) ); select * from kdd99_card; load data infile 'C:\\ProgramData\\MySQL\\MySQL Server 8.0\\Uploads\\Kdd99_card.csv' into table Kdd99_card CHARACTER SET gb2312 fields terminated by ',' optionally enclosed by '"' escaped by '"' lines terminated by '\r\n' ignore 1 lines; select card_id from kdd99_card; # drop table kdd99_clients; create table Kdd99_clients( client_id integer, sex varchar(2), birth_date DATE, district_id integer ); select * from kdd99_clients; load data infile 'C:\\ProgramData\\MySQL\\MySQL Server 8.0\\Uploads\\Kdd99_clients.csv' into table Kdd99_clients CHARACTER SET gb2312 fields terminated by ',' optionally enclosed by '"' escaped by '"' lines terminated by '\r\n' ignore 1 lines; select * from kdd99_clients; # drop table kdd99_disp; create table Kdd99_disp( disp_id integer, client_id integer, account_id integer, type varchar(6) ); load data infile 'C:\\ProgramData\\MySQL\\MySQL Server 8.0\\Uploads\\Kdd99_disp.csv' into table Kdd99_disp CHARACTER SET gb2312 fields terminated by ',' optionally enclosed by '"' escaped by '"' lines terminated by '\r\n' ignore 1 lines; select * from kdd99_disp; # drop table kdd99_district; create table Kdd99_district( A1 integer, GDP long, A4 DOUBLE, A10 DOUBLE, A11 DOUBLE, A12 DOUBLE, A13 DOUBLE, A14 DOUBLE, A15 DOUBLE, A16 DOUBLE ); load data infile 'C:\\ProgramData\\MySQL\\MySQL Server 8.0\\Uploads\\Kdd99_district.csv' into table Kdd99_district CHARACTER SET gb2312 fields terminated by ',' optionally enclosed by '"' escaped by '"' lines terminated by '\r\n' ignore 1 lines; select * from kdd99_district; # drop table kdd99_loans; create table Kdd99_loans( loan_id integer, account_id integer, date date, amount DECIMAL, duration integer, payments DECIMAL, status varchar(2) ); load data infile 'C:\\ProgramData\\MySQL\\MySQL Server 8.0\\Uploads\\Kdd99_loans.csv' into table Kdd99_loans CHARACTER SET gb2312 fields terminated by ',' optionally enclosed by '"' escaped by '"' lines terminated by '\r\n' ignore 1 lines; select * from kdd99_loans; # drop table kdd99_order; create table Kdd99_order( order_id integer, account_id integer, bank_to varchar(2), account_to integer, amount DECIMAL, k_symbol varchar(20) ); load data infile 'C:\\ProgramData\\MySQL\\MySQL Server 8.0\\Uploads\\Kdd99_order.csv' into table Kdd99_order CHARACTER SET gb2312 fields terminated by ',' optionally enclosed by '"' escaped by '"' lines terminated by '\r\n' ignore 1 lines; select * from kdd99_order; # drop table One; create table One ( X integer, A VARCHAR(4) ); load data infile "C:\\ProgramData\\MySQL\\MySQL Server 8.0\\Uploads\\One.csv" into table One CHARACTER SET gb2312 fields terminated by ',' optionally enclosed by '"' escaped by '"' lines terminated by '\r\n' ignore 1 lines; select * from One; # drop table Two; create table Two ( X integer, A VARCHAR(4) ); load data infile "C:\\ProgramData\\MySQL\\MySQL Server 8.0\\Uploads\\Two.csv" into table Two CHARACTER SET gb2312 fields terminated by ',' optionally enclosed by '"' escaped by '"' lines terminated by '\r\n' ignore 1 lines; select * from two; # drop table table1; create table Table1 ( id integer, A VARCHAR(4) ); load data infile "C:\\ProgramData\\MySQL\\MySQL Server 8.0\\Uploads\\Table1.csv" into table Table1 CHARACTER SET gb2312 fields terminated by ',' optionally enclosed by '"' escaped by '"' lines terminated by '\r\n' ignore 1 lines; select * from table1; # drop table table2; create table Table2 ( id integer, B VARCHAR(4) ); load data infile "C:\\ProgramData\\MySQL\\MySQL Server 8.0\\Uploads\\Table2.csv" into table Table2 CHARACTER SET gb2312 fields terminated by ',' optionally enclosed by '"' escaped by '"' lines terminated by '\r\n' ignore 1 lines; select * from table2 limit 5; show tables; select year as '年份',market,sale,profit as money from kdd99.sale; select * from sale;
3. 根据案例需求进行对应数据分析
数据的基本查询方式 ### 单表查询 #语法:select * from 表名 select * from kdd99.sale; #### 1)查询指定列 #语法:select 字段1[,字段2,…] from 表名; select year,market,sale,profit from kdd99.sale; #### 2)指定列命名 as #语法:select 字段名 [as] 列别名 from 原表名 [[as] 表别名]; select year as `年份`,market,sale,profit from kdd99.sale; select year as `年份`,market,sale,profit as money from kdd99.sale; #### 3)表命名 #语法:select 字段名 [as] 列别名 from 原表名 [[as] 表别名]; SELECT t.year,t.market,t.sale,t.profit from kdd99.sale t; #### 4)删除重复 DISTINCT #语法:select distinct 字段名[,字段名2,…] from 表名; select DISTINCT year from kdd99.sale; #### 5)排序 order by #语法:select 字段1[,字段2,…] from 表名 order by 字段1[ 排序方向,字段2 排序方向,…]; select year,market,sale,profit from kdd99.sale order by year asc,sale; #### 6)限制结果 limit #语法:select 字段1[,字段2,…] from 表名 limit [偏移量,] 行数; select * from kdd99.sale limit 10; #### 7)条件过滤 where # 算数运算 select * FROM KDD99.KDD99_TRANS where amount * 12 < 8000; select * FROM KDD99.KDD99_TRANS where amount / 12 * 1.10 >= 8500; select * FROM KDD99.KDD99_TRANS where (amount / 12 ) * 1.10 >= 8500; select * FROM KDD99.KDD99_TRANS where amount + 1000 <= 10000; #比较运算 select * FROM KDD99.KDD99_TRANS Where k_symbol = '房屋贷款'; select * FROM KDD99.KDD99_TRANS where date >= '1998-12-01'; #逻辑运算 select * FROM KDD99.KDD99_TRANS where date <= 1998-12-31 AND k_symbol = '房屋贷款'; #### 8)空值查询 null #语法:select 字段1[,字段2,…] from 表名 where 空值字段 is [not] null; #### 9)模糊查询 like #语法:select 字段1[,字段2,…] from 表名 where 字段 [not] like 通配符; select * FROM KDD99.KDD99_TRANS where date <= 1998-12-31 AND k_symbol like '%贷款'; select * FROM KDD99.KDD99_TRANS where date <= 1998-12-31 AND k_symbol = '房屋%'; select * FROM KDD99.KDD99_TRANS where date <= 1998-12-31 AND k_symbol like '%贷%'; #### 10)创建新列 select a.*,profit/sale as rate from kdd99.sale a; select a.*,profit/sale as rate from kdd99.sale where profit/sale <0.07;
案例情景分析 ## 商业情景应用-1 # Bank公司的领导希望得到1998年12月发生的“房屋贷款”交易, # 包括帐户号、交易日期、对方银行和金额, # 按照交易日期和交易序号进行升序排序。 SELECT t1.account_id,t1.date,t1.bank,t1.amount FROM KDD99.KDD99_TRANS t1 WHERE t1.date BETWEEN '1998-12-01' AND '1998-12-31' AND t1.k_symbol = '房屋贷款' ORDER BY t1.date,t1.account_id; #### 11)聚合 group by #语法:select 字段1[,字段2,…] from 表名[ where 查询条件] group by 分组字段1[,分组字段2,…]; select count(*) from KDD99.kdd99_tRANS; select date,count(*) from KDD99.kdd99_tRANS group by date; ## 商业情景应用-2 # 汇总每个交易日的交易次数、日均交易额、日总交易额。 SELECT t1.date, (COUNT(t1.account_id)) AS COUNT_of_account_id, (AVG(t1.amount)) AS avg_of_amount, (SUM(t1.amount)) AS SUM_of_amount FROM KDD99.KDD99_TRANS t1 GROUP BY t1.date; #### 12) 分组筛选 having #语法:select 字段1[,字段2,…] from 表名[ where 筛选条件] group by 分组字段1[,分组字段2,…] having 筛选条件; ## 商业情景应用-3 #在刚才的需求基础上,筛选出交易频次大于300的交易日记录。 SELECT t1.date, COUNT(t1.account_id) AS COUNT_of_account_id, AVG(t1.amount) AS avg_of_amount, SUM(t1.amount) AS SUM_of_amount FROM KDD99.KDD99_TRANS t1 GROUP BY t1.date HAVING COUNT_of_account_id>300; #### 13)日期时间函数 select now(); select CURRENT_DATE(); select CURRENT_TIME(); select YEAR(CURRENT_DATE()); select WEEKDAY(CURRENT_DATE()); ## 商业情景应用-4 #在商业情景应用-1的基础之上,需要新生成一个代表交易日星期的变量。 SELECT t1.account_id,t1.date,t1.bank,t1.amount,weekday(t1.date) as weekday FROM KDD99.KDD99_TRANS t1 WHERE t1.date BETWEEN '1998-12-01' AND '1998-12-31' AND t1.k_symbol = '房屋贷款' ORDER BY t1.date,t1.account_id; #### 14)条件判断 case when then/if #语法:IF(condition, value_if_true, value_if_false) #语法:CASE expressing WHEN condition THEN return ## 商业情景应用-5 #根据交易日星期的信息生成是否为工作日的新变量。周一至周五为工作日,周六、周日为非工作日 # if SELECT t1.account_id,t1.date,t1.bank,t1.amount,weekday(t1.date) as weekday, IF(weekday(t1.date)>=5,'周末','工作日') AS Weekday_2 FROM KDD99.KDD99_TRANS t1 WHERE t1.date BETWEEN '1998-12-01' AND '1998-12-31' AND t1.k_symbol = '房屋贷款' ORDER BY t1.date,t1.account_id; #case when SELECT t1.account_id,t1.date,t1.bank,t1.amount,weekday(t1.date) as weekday, (CASE WHEN weekday(t1.date)>=5 THEN '周末' WHEN weekday(t1.date) <5 THEN '工作日' END) AS Weekday_2 FROM KDD99.KDD99_TRANS t1 WHERE t1.date BETWEEN '1998-12-01' AND '1998-12-31' AND t1.k_symbol = '房屋贷款' ORDER BY t1.date,t1.account_id; #### 15)条件求和/条件计数 sumif /countif #语法:sumif(range,criteria,sum_range) Sumif(条件区域,求和条件,实际求和区域) #语法:countif(range,criteria) COUNTIF(订数区域,计数条件) SELECT t1.date, SUM(t1.amount) AS SUM_of_amount FROM KDD99.KDD99_TRANS t1 WHERE t1.date BETWEEN '1998-12-01' AND '1998-12-31' group by t1.date; SELECT t1.date, SUM(if(t1.date BETWEEN '1998-12-01' AND '1998-12-31',t1.amount,0)) AS SUM_of_amount FROM KDD99.KDD99_TRANS t1 group by t1.date; #### 16)时间间隔 select CURRENT_DATE()-DATE('2022-04-01'); select datediff(CURRENT_DATE(),'2022-04-01'); select DATE_ADD(CURRENT_DATE(),INTERVAL +1 day); ## 商业情景应用-6 # 假如现在是1997年5月16日,交易数据截止1997年5月15日(T-1), # 汇总每个账户号借贷类型为“贷”的贷款金额的年度同比,季度同/环比情况。 SET @昨日 = date ('1997-05-15'); SET @本季初 = date ('1997-04-01'); SET @环季初 = date ('1997-01-01'); SET @同季初 = date ('1996-04-01'); SET @本年初 = date ('1997-01-01'); SET @同年初 = date ('1996-01-01'); SET @季度天数 = DATEDIFF(@昨日,@本季初)+1; SET @年度天数 = DATEDIFF(@昨日,@本年初)+1; SET @环季末 = DATE_ADD(@环季初,INTERVAL @季度天数-1 DAY); SET @同季末 = DATE_ADD(@同季初,INTERVAL @季度天数-1 DAY); SET @同年末 = DATE_ADD(@同年初,INTERVAL @年度天数-1 DAY); select @昨日,@本季初,@环季初,@同季初,@本年初,@同年初,@季度天数,@环季末,@同季末,@同年末; select t1.account_id, sum(if(t1.date between @本年初 and @昨日,t1.amount,0)) as `本年`, sum(if(t1.date between @同年初 and @同年末,t1.amount,0)) as `同年`, sum(if(t1.date between @本年初 and @昨日,t1.amount,0)) /sum(if(t1.date between @同年初 and @同年末,t1.amount,0)) -1 as `年同比`, sum(if(t1.date between @本季初 and @昨日,t1.amount,0)) as `本季`, sum(if(t1.date between @同季初 and @同季末,t1.amount,0)) as `同季`, sum(if(t1.date between @环季初 and @环季末,t1.amount,0)) as `环季`, sum(if(t1.date between @本季初 and @昨日,t1.amount,0)) /sum(if(t1.date between @同季初 and @同季末,t1.amount,0))-1 as `季同比`, sum(if(t1.date between @本季初 and @昨日,t1.amount,0)) /sum(if(t1.date between @环季初 and @环季末,t1.amount,0))-1 as `季环比` FROM KDD99.KDD99_TRANS t1 where t1.type='贷' GROUP BY t1.account_id; ### 多表查询 #### 1)union 去重 #语法:select 字段1[,字段2,…] from 表名 union select 字段1[,字段2,…] from 表名; select * from one union select * from two; #### 2)union all 不去重 #语法:select 字段1[,字段2,…] from 表名 union all select 字段1[,字段2,…] from 表名; select * from one union all select * from two; #### 3)笛卡尔积 cross join #语法:select 字段1[,…] from 表1, 表2 ; select * from table1, table2; #### 4)内连接 inner join #语法:select 字段1[,…] from 表1[ inner] join 表2 on 连接条件; select * from table1, table2 Where table1.id= table2.id; select * from table1 inner join table2 on table1.id= table2.id; #### 5)左连接 left join #语法:select 字段1[,…] from 表1 left join 表2 on 连接条件; select * from Table1 left join Table2 on Table1.id = Table2.id; #### 6)右链接 right join #语法:select 字段1[,…] from 表1 right join 表2 on 连接条件; select * from Table1 right join Table2 on Table1.id = Table2.id; ## 商业情景应用-7 #希望得到信用卡持卡人的客户信息 SELECT t1.card_id,t1.disp_id,t1.issued,t1.type,t3.sex,t3.birth_date,t3.district_id from kdd99.kdd99_card t1 left join kdd99.kdd99_disp t2 on t1.disp_id=t2.disp_id left join kdd99.kdd99_clients t3 on t2.client_id =t3.client_id; ## 商业情景应用-8 # Bank公司的贷款风险分析人员希望得到贷款客户在发放贷款前一年的交易数据。 SELECT t1.loan_id,t1.account_id,t1.date,t1.amount,t1.duration, t1.payments, t1.status,t2.trans_id, t2.date AS date1, t2.type, t2.operation,t2.amount AS amount1,t2.balance, t2.k_symbol, t2.bank, t2.account FROM KDD99.KDD99_LOANS t1 LEFT JOIN KDD99.KDD99_TRANS t2 ON t1.account_id = t2.account_id AND t1.date>t2.date AND t1.date <= t2.date+365 ORDER BY t1.loan_id,t1.account_id,t1.date,t2.date; #### 7)子查询 ## 商业情景应用-9 # 获取合同终止,但贷款没有支付的订单明细 select * from kdd99.kdd99_order where account_id in (select account_id from kdd99.kdd99_loans where status="B") order by account_id; select a.* from kdd99.kdd99_order a left join kdd99.kdd99_loans b on a.account_id=b.account_id where b.status="B" ORDER BY account_id; ### 查询创建表/视图 # 创建表 #语法:CREATE TABLE <table-name> AS (select...) # 创建视图 #语法:CREATE VIEW <view-name> AS (select...) ## 商业情景应用-10 # 将上述应用9中的数据存入表名为“loan_tran”中。 CREATE TABLE loan_tran AS SELECT t1.loan_id, t1.account_id, t1.date,t1.amount, t1.duration, t1.payments, t1.status, t2.trans_id, t2.date AS date1, t2.type, t2.operation,t2.amount AS amount1,t2.balance, t2.k_symbol, t2.bank,t2.account FROM KDD99.KDD99_LOANS t1 LEFT JOIN KDD99.KDD99_TRANS t2 ON t1.account_id = t2.account_id AND t1.date>t2.date AND t1.date <= t2.date+365 ORDER BY t1.loan_id,t1.account_id,t1.date,t2.date; #将上述应用9中的查询创建名为“loan_tran_view”的视图。 CREATE view loan_tran_view AS SELECT t1.loan_id, t1.account_id, t1.date,t1.amount, t1.duration, t1.payments, t1.status, t2.trans_id, t2.date AS date1, t2.type, t2.operation,t2.amount AS amount1,t2.balance, t2.k_symbol, t2.bank,t2.account FROM KDD99.KDD99_LOANS t1 LEFT JOIN KDD99.KDD99_TRANS t2 ON t1.account_id = t2.account_id AND t1.date>t2.date AND t1.date <= t2.date+365 ORDER BY t1.loan_id,t1.account_id,t1.date,t2.date; ### 窗口函数 top N 问题 #rank(),dense_rank() 和 row_number() #语法:<窗口函数> over (partition by <用于分组的列名> order by <用于排序的列名>) ## 商业情景应用 # 获取信贷类型为"贷"且交易类型为"信贷资金"的每一天交易金额最高的一笔交易记录 SELECT * FROM (select t1.trans_id,t1.account_id,T1.date,T1.amount, rank() over (PARTITION by date ORDER BY amount desc) as rank1, dense_rank() over (PARTITION by date ORDER BY amount desc) as rank2, row_number() over (PARTITION by date ORDER BY amount desc) as rank3 from KDD99.kdd99_tRANS t1 WHERE t1.type='贷' and t1.operation='信贷资金' #and t1.date='1993-02-08' ) t WHERE rank3=1;
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· DeepSeek 开源周回顾「GitHub 热点速览」
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
· AI与.NET技术实操系列(二):开始使用ML.NET
· 单线程的Redis速度为什么快?