大数据第23天-数据处理之查询-杨大伟
1 cd /d D:\MyWork\MySQL\mysql-5.7.28-winx64\bin rem 目的是为了执行mysql.exe 2 mysql -h127.0.0.1 -P3306 -uroot -p123456 rem 作用是为了让客户端能够正确的连接服务器 3 mysql --host=127.0.0.1 --port=3306 --user=root --password=123456 world 4 5 -- 查看服务器中有哪些数据库 6 show databases; 7 8 -- 创建新的数据库 9 create database company; 10 11 -- 切换工作数据库 12 use company; 13 14 -- 查看当前数据库中的表 15 show tables; 16 17 -- 导入.sql文件中的数据 18 source d:/company.sql; 19 20 -- 查看表中的数据 21 select * from employees; 22 23 -- 练习 : 创建world数据库, 并切换, 再导入world.sql文件中的数据到world库. 24 create database world; 25 26 use world; 27 28 source d:/world.sql; 29 30 -- 丢弃数据库 31 drop database company; 32 33 drop database world; 34 35 -- 查看当前工作数据库 36 select database(); 37 38 -- 查看当前工作数据库的版本 39 select version(); 40 41 -- 查看服务器所有参数 42 show variables; 43 44 -- 和字符集设置相关的参数 : 45 character_set_client | gbk 46 character_set_connection | gbk 47 character_set_database | utf8 48 character_set_filesystem | binary 49 character_set_results | gbk 50 character_set_server | utf8 51 character_set_system | utf8 52 53 -- 如果client,connection和result的设置不是gbk, 必须执行以下语句 54 55 -- 修改客户端编码 56 set names gbk; 57 58 数据库结构 59 mysqld 服务器 60 数据库1(目录形式) 61 表1 62 数据(记录)1 63 数据(记录)2 64 数据(记录)3 65 .... 66 表2 67 表3 68 .... 69 70 数据库2 71 ...... 72 73 -- 查看表中数据 74 select * from 表名; 75 76 -- 查看表结构 77 describe employees; 78 desc employees; 79 80 +----------------+--------------+------+-----+---------+----------------+ 81 | Field | Type | Null | Key | Default | Extra | 82 +----------------+--------------+------+-----+---------+----------------+ 83 | employee_id | int(6) | NO | PRI | NULL | auto_increment | 84 | first_name | varchar(20) | YES | | NULL | | 85 | last_name | varchar(25) | YES | | NULL | | 86 | email | varchar(25) | YES | | NULL | | 87 | phone_number | varchar(20) | YES | | NULL | | 88 | job_id | varchar(10) | YES | MUL | NULL | | 89 | salary | double(10,2) | YES | | NULL | | 90 | commission_pct | double(4,2) | YES | | NULL | | 91 | manager_id | int(6) | YES | | NULL | | 92 | department_id | int(4) | YES | MUL | NULL | | 93 +----------------+--------------+------+-----+---------+----------------+ 94 95 create table customer( 96 id int, 97 name varchar(20), 98 age int, 99 phone varchar(15), 100 birthday date 101 ); 102 103 insert into customer ( 104 id, 105 name, 106 age, 107 phone, 108 birthday 109 ) values ( 110 1, 111 '张三', 112 30, 113 '134234234', 114 '1992-5-8' 115 ); 116 117 insert into customer ( 118 id, 119 name, 120 age, 121 phone, 122 birthday 123 ) values ( 124 2, 125 '李四', 126 40, 127 '1599234234', 128 '1991-5-8' 129 ); 130 131 insert into customer ( 132 id, 133 name, 134 age, 135 phone, 136 birthday 137 ) values ( 138 3, 139 '王五', 140 50, 141 '135234234', 142 '1995-11-8' 143 ); 144 145 -- 更新记录(修改数据) 146 -- 如果更新时不加where过滤, 会导致所有数据被修改 147 update customer set 148 name = '某人', 149 age = 10; 150 151 update customer set 152 name = '张三', 153 age = 30 154 where 155 id = 1; 156 157 -- 删除记录 158 -- 如果没有where过滤, 会全部删除. 159 delete from customer; 160 161 delete from customer 162 where id = 2; 163 164 165 DML 数据操纵语言. 166 insert C 167 select R 168 update U 169 delete D 170 171 +----------------+---------------------------------------------------------------------------------------+------+-----+---------+-------+ 172 | Field | Type | Null | Key | Default | Extra | 173 +----------------+---------------------------------------------------------------------------------------+------+-----+---------+-------+ 174 | Code | char(3) 国家代码 | NO | PRI | | | 175 | Name | char(52) 国家名称 | NO | | | | 176 | Continent | enum('Asia','Europe','North America','Africa','Oceania','Antarctica','South America') | NO | | Asia | | 177 | Region | char(26) 地区 | NO | | | | 178 | SurfaceArea | float(10,2) 国土面积m | NO | | 0.00 | | 179 | IndepYear | smallint(6) 独立年 | YES | | NULL | | 180 | Population | int(11) 国家人口 | NO | | 0 | | 181 | LifeExpectancy | float(3,1) 平均寿命 | YES | | NULL | | 182 | GNP | float(10,2) 国民生产总值 | YES | | NULL | | 183 | GNPOld | float(10,2) | YES | | NULL | | 184 | LocalName | char(45) | NO | | | | 185 | GovernmentForm | char(45) 政府组织 | NO | | | | 186 | HeadOfState | char(60) 领导人 | YES | | NULL | | 187 | Capital | int(11) 首都 | YES | | NULL | | 188 | Code2 | char(2) | NO | | | | 189 +----------------+---------------------------------------------------------------------------------------+------+-----+---------+-------+ 190 city 191 +-------------+----------+------+-----+---------+----------------+ 192 | Field | Type | Null | Key | Default | Extra | 193 +-------------+----------+------+-----+---------+----------------+ 194 | ID | int(11) | NO | PRI | NULL | auto_increment | 195 | Name | char(35) | NO | | | | 196 | CountryCode | char(3) | NO | MUL | | | 197 | District | char(20) | NO | | | | 198 | Population | int(11) | NO | | 0 | | 199 +-------------+----------+------+-----+---------+----------------+ 200 SELECT * 201 FROM departments; 202 203 SELECT department_id, location_id 204 FROM departments; 205 206 select 207 code, 208 name, 209 continent 210 from 211 country; 212 213 select 214 continent, 215 name, 216 code 217 from 218 country; 219 220 SQL注意点 : 221 SQL 语言大小写不敏感。 222 SQL 可以写在一行或者多行 223 关键字不能被缩写也不能分行 224 各子句一般要分行写。 225 使用缩进提高语句的可读性。 226 227 -- 查询国家的人口,名称和代码及首都. 228 select 229 population, 230 name, 231 code, 232 capital 233 from 234 country; 235 236 -- AS用于给列起别名, AS关键字可以省略 237 SELECT last_name AS name, commission_pct comm 238 FROM employees; 239 240 -- 别名可以使用""包围, 目的是让它能原样显示或包含特殊的符号 241 select 242 code 国家代码, 243 name, 244 continent as "国家 大洲" 245 from 246 country; 247 248 -- 过滤行 where 条件布尔 : 工作流程就是从基本表中测试每行数据, 都经过条件布尔测试一下, 如果结果为真留下, 为假则丢弃. 249 SELECT employee_id, last_name, job_id, department_id 250 FROM employees 251 WHERE department_id = 90 ; 252 253 -- 查询所有亚洲国家 254 select 255 code, 256 population, 257 name 258 from 259 country 260 where 261 continent = 'asia'; 262 263 -- 查询所有亚洲国家 , 下面的SQL是错误的, where中不可以使用列的别名, 因为where先执行. 264 select 265 code, 266 population, 267 name, 268 continent cont 269 from 270 country 271 where 272 cont = 'asia'; 273 274 执行顺序 : from => where => select 275 276 --查询所有中国城市的人口和名称和id号和国家代码, 给国家代码起别名, 尝试在where中使用别名. 277 select 278 population, 279 name, 280 id, 281 countrycode code 282 from 283 city 284 where 285 countrycode = 'chn'; 286 287 -- between a and b --等效于salary >= a && salary <= 3500 288 SELECT last_name, salary 289 FROM employees 290 WHERE salary BETWEEN 2500 AND 3500; 291 292 -- id in(a, b, c) -- 等效于id = a || id = b || id = c 293 SELECT employee_id, last_name, salary, manager_id 294 FROM employees 295 WHERE manager_id IN (100, 101, 201); 296 297 select 298 code, 299 continent, 300 name, 301 population 302 from 303 country 304 where 305 name like 'china'; -- name = 'china' 如果模糊查询中的字符串没有通配符, 和=效果一样. 306 307 SELECT first_name 308 FROM employees 309 WHERE first_name LIKE 'S%'; 310 311 % 代表任意个任意字符 312 _ 代表一个任意字符 313 -- 查询国家名称中只要包含ch的都行 314 315 select 316 code, 317 population, 318 name, 319 continent 320 from 321 country 322 where 323 name like '%ch%' 324 325 -- 名字中第3个和第4个是in的国家 326 select 327 code, 328 population, 329 name, 330 continent 331 from 332 country 333 where 334 name like '__in%' 335 336 -- 查询城市表中, 名称的第2个和3个字母是or的城市. 337 select 338 id, 339 name, 340 countrycode 341 from 342 city 343 where 344 name like '_or%'; 345 346 -- 查看哪些国家没有首都 347 -- null和任意的比较运算, 结果一定是false, 处理null, 必须使用特殊的is来判断 348 -- null值在进行统计时会被自动忽略. 349 select 350 code, 351 name, 352 continent, 353 capital 354 from 355 country 356 where 357 capital = null; 358 359 select 360 code, 361 name, 362 continent, 363 capital 364 from 365 country 366 where 367 capital is null; 368 369 select 370 code, 371 name, 372 continent, 373 capital 374 from 375 country 376 where 377 capital is not null; 378 379 -- 哪些国家尚未独立. 380 select 381 name, 382 code, 383 indepYear 384 from 385 country 386 where 387 indepYear is null; 388 389 390 SELECT 391 employee_id, last_name, job_id, salary 392 FROM employees 393 WHERE 394 salary >=10000 395 AND 396 job_id LIKE '%MAN%'; 397 398 399 SELECT 400 employee_id, last_name, job_id, salary 401 FROM employees 402 WHERE 403 salary >= 10000 404 OR 405 job_id LIKE '%MAN%'; 406 407 -- 查询亚洲国家中人口大于5000万的国家. 408 select 409 code, 410 name, 411 continent, 412 population 413 from 414 country 415 where 416 continent = 'asia' 417 and 418 population > 50000000; 419 420 -- 查询中国的城市人口小于20万的城市. 421 select 422 * 423 from 424 city 425 where 426 countrycode = 'chn' 427 and 428 population < 200000; 429 430 -- 去重 distinct, 后面的列最好是有重复数据的 431 select 432 distinct 433 continent, 434 region 435 from 436 country; 437 438 -- 查看中国各有哪些不同的省. 439 select 440 distinct district 441 from 442 city 443 where 444 countrycode = 'chn'; 445 446 SELECT last_name, job_id, department_id, salary 447 FROM employees 448 ORDER BY salary asc; 449 450 SELECT last_name, job_id, department_id, salary 451 FROM employees 452 ORDER BY salary desc; 453 454 select 455 code, 456 name, 457 population pop 458 from 459 country 460 where 461 continent = 'asia' 462 order by 463 pop; 464 465 order by中可以使用列的别名, 原因是它最后执行 466 467 顺序 : from => where => select => order by 468 469 470 SELECT last_name, department_id, salary 471 FROM employees 472 ORDER BY department_id asc, salary DESC; 473 474 -- 查询亚洲人口最少的国家 475 select 476 code, 477 name, 478 continent, 479 population pop 480 from 481 country 482 where 483 continent = 'asia' 484 order by 485 pop desc; 486 487 -- 查询山东省人口最多的城市 488 select 489 id, 490 name, 491 district, 492 population 493 from 494 city 495 where 496 countrycode = 'chn' 497 and 498 district = 'shandong' 499 order by 500 population 501 502 503 504 -- 多表联接 : 会产生笛卡尔集 505 -- 为了学习, 创建简单表 506 create table city2 507 select * from city where name='london'; 508 +------+--------+-------------+----------+------------+ 509 | ID | Name | CountryCode | District | Population | 510 +------+--------+-------------+----------+------------+ 511 | 456 | London | GBR | England | 7285000 | 512 | 1820 | London | CAN | Ontario | 339917 | 513 +------+--------+-------------+----------+------------+ 514 515 create table country2 516 select * from country where code in('gbr', 'can'); 517 +------+----------------+---------------+-----------------+-------------+-----------+------------+----------------+------------+------------+------------- 518 | Code | Name | Continent | Region | SurfaceArea | IndepYear | Population | LifeExpectancy | GNP | GNPOld | LocalName 519 +------+----------------+---------------+-----------------+-------------+-----------+------------+----------------+------------+------------+------------- 520 | CAN | Canada | North America | North America | 9970610.00 | 1867 | 31147000 | 79.4 | 598862.00 | 625626.00 | Canada 521 | GBR | United Kingdom | Europe | British Islands | 242900.00 | 1066 | 59623400 | 77.7 | 1378330.00 | 1296830.00 | United Kingd 522 +------+----------------+---------------+-----------------+-------------+-----------+------------+----------------+------------+------------+------------- 523 524 select * from city2, country2; 525 结果集中的绝大多数都是垃圾. 必须要过滤掉 526 +------+--------+-------------+----------+------------+------+----------------+---------------+-----------------+-------------+-----------+------------+-- 527 | ID | Name | CountryCode | District | Population | Code | Name | Continent | Region | SurfaceArea | IndepYear | Population | L 528 +------+--------+-------------+----------+------------+------+----------------+---------------+-----------------+-------------+-----------+------------+-- 529 | 456 | London | GBR | England | 7285000 | CAN | Canada | North America | North America | 9970610.00 | 1867 | 31147000 | 530 | 1820 | London | CAN | Ontario | 339917 | CAN | Canada | North America | North America | 9970610.00 | 1867 | 31147000 | 531 | 456 | London | GBR | England | 7285000 | GBR | United Kingdom | Europe | British Islands | 242900.00 | 1066 | 59623400 | 532 | 1820 | London | CAN | Ontario | 339917 | GBR | United Kingdom | Europe | British Islands | 242900.00 | 1066 | 59623400 | 533 +------+--------+-------------+----------+------------+------+----------------+---------------+-----------------+-------------+-----------+------------+-- 534 535 select 536 * 537 from 538 city2, 539 country2 540 where 541 countrycode = code; 542 543 +------+--------+-------------+----------+------------+------+----------------+---------------+-----------------+-------------+-----------+------------+- 544 | ID | Name | CountryCode | District | Population | Code | Name | Continent | Region | SurfaceArea | IndepYear | Population | 545 +------+--------+-------------+----------+------------+------+----------------+---------------+-----------------+-------------+-----------+------------+- 546 | 1820 | London | CAN | Ontario | 339917 | CAN | Canada | North America | North America | 9970610.00 | 1867 | 31147000 | 547 | 456 | London | GBR | England | 7285000 | GBR | United Kingdom | Europe | British Islands | 242900.00 | 1066 | 59623400 | 548 +------+--------+-------------+----------+------------+------+----------------+---------------+-----------------+-------------+-----------+------------+- 549 550 -- 下面的SQL是错误的, 因为列名模糊 551 select 552 name, 553 population, 554 name, 555 continent, 556 population 557 from 558 city2, 559 country2 560 where 561 countrycode = code; 562 563 select 564 city2.name as cityName, 565 city2.population cityPop, 566 country2.name countryName, 567 country2.continent, 568 country2.population countryPop 569 from 570 city2, 571 country2 572 where 573 city2.countrycode = country2.code; 574 575 -- 表名的使用也麻烦了. 给表起别名 576 select 577 ci.name as cityName, 578 ci.population cityPop, 579 co.name countryName, 580 co.continent, 581 co.population countryPop 582 from 583 city2 as ci , 584 country2 co 585 where 586 ci.countrycode = co.code; 587 588 -- 表名一旦指定了别名, 原名就不可以使用了. 589 select 590 city2.name as cityName, 591 city2.population cityPop, 592 country2.name countryName, 593 country2.continent, 594 country2.population countryPop 595 from 596 city2 as ci, 597 country2 as co 598 where 599 city2.countrycode = country2.code; 600 601 -- 查询所有国家的名称和国家的首都的名称. 602 select 603 co.name country, 604 ci.name captial, 605 co.population countryPop, 606 ci.population cityPop, 607 co.continent 608 from 609 country co, 610 city ci 611 where 612 co.capital = ci.id 613 614 -- where中有多个条件. 615 select 616 ci.name as cityName, 617 ci.population cityPop, 618 co.name countryName, 619 co.continent, 620 co.population countryPop 621 from 622 city2 as ci , 623 country2 co 624 where 625 ci.countrycode = co.code -- 联接条件, 比普通过滤更重要. 626 and 627 ci.population > 1000000 628 629 +----------+---------+----------------+-----------+------------+ 630 | cityName | cityPop | countryName | continent | countryPop | 631 +----------+---------+----------------+-----------+------------+ 632 | London | 7285000 | United Kingdom | Europe | 59623400 | 633 +----------+---------+----------------+-----------+------------+ 634 635 SQL99标准中 内联接不要用,号来写, 而是使用专门的关键字join.... on 联接条件 636 select 637 ci.name as cityName, 638 ci.population cityPop, 639 co.name countryName, 640 co.continent, 641 co.population countryPop 642 from 643 city2 as ci 644 inner join 645 country2 co 646 on 647 ci.countrycode = co.code 648 where 649 ci.population > 1000000; 650 651 -- on 和 where 效果相同, 但是下面的写法不推荐 652 select 653 ci.name as cityName, 654 ci.population cityPop, 655 co.name countryName, 656 co.continent, 657 co.population countryPop 658 from 659 city2 as ci 660 inner join 661 country2 co 662 where 663 ci.countrycode = co.code 664 and 665 ci.population > 1000000; 666 667 -- on 和 where 效果相同, 但是下面的写法不推荐 668 select 669 ci.name as cityName, 670 ci.population cityPop, 671 co.name countryName, 672 co.continent, 673 co.population countryPop 674 from 675 city2 as ci 676 inner join 677 country2 co 678 on 679 ci.countrycode = co.code 680 and 681 ci.population > 1000000; 682 683 -- 最好的写法是下面的. inner关键字可以省略 684 select 685 ci.name as cityName, 686 ci.population cityPop, 687 co.name countryName, 688 co.continent, 689 co.population countryPop 690 from 691 city2 as ci 692 join 693 country2 co 694 on -- on后面只跟 联接条件 695 ci.countrycode = co.code 696 where -- where 后面只跟 普通行过滤 697 ci.population > 1000000; 698 699 -- 查询所有亚洲和欧洲国家的首都, 使用SQL99 700 select 701 co.name country, 702 co.continent, 703 ci.name capital 704 from 705 country co 706 join 707 city ci 708 on 709 co.capital = ci.id 710 where 711 co.continent in('asia', 'europe'); 712 713 -- 查看所有国家名称和首都及官方语言 714 select 715 co.name country, 716 co.continent, 717 ci.name capital, 718 cl.language 719 from 720 country co 721 join 722 city ci 723 on 724 co.capital = ci.id 725 join 726 countrylanguage cl 727 on 728 cl.countrycode = co.code 729 where 730 cl.isofficial = 't'; 731 732 -- 查询简单表的国家及首都 733 -- 内联接的结果总是所有联接条件为真的记录. 为假的记录全部滤掉. 734 select 735 co.name country, 736 ci.name capital, 737 ci.district 738 from 739 country2 co 740 join 741 city2 ci 742 on 743 co.capital = ci.id; 744 745 746 -- 外联接可以保证某张表的数据完整, 即使联接条件为假, 也要保留数据 747 select 748 co.name country, 749 ci.name capital, 750 ci.district 751 from 752 country2 co 753 left outer join 754 city2 ci 755 on 756 co.capital = ci.id; 757 758 select 759 co.name country, 760 ci.name capital, 761 ci.district 762 from 763 country2 co 764 right outer join 765 city2 ci 766 on 767 co.capital = ci.id; 768 769 -- 查看所有国家名称和首都, 即使没有首都也要显示. 770 select 771 co.name country, 772 co.continent, 773 ci.name capital 774 from 775 country co 776 left join 777 city ci 778 on 779 co.capital = ci.id; 780 781 -- 查询哪些国家没有首都 782 select 783 co.name country, 784 co.continent, 785 ci.name capital 786 from 787 country co 788 left join 789 city ci 790 on 791 co.capital = ci.id 792 where 793 ci.name is null; 794 795 // 伪代码 796 // 内联接 797 Set leftTable; 798 Set rightTable; 799 Set resultSet = new Set(); 800 for (int i = 0; i < leftTable.length; i++) { 801 Row leftRow = leftTable[i]; 802 for (int j = 0; j < rightTable.length; j++) { 803 Row rightRow = rightTable[j]; 804 if (联接条件(leftRow, rightRow)) { 805 Row newRow = leftRow + rightRow; 806 resultSet.add(newRow); 807 } 808 } 809 } 810 811 // 外联接 812 Set leftTable; 813 Set rightTable; 814 Set resultSet = new Set(); 815 for (int i = 0; i < leftTable.length; i++) { 816 Row leftRow = leftTable[i]; 817 boolean flag = false; 818 for (int j = 0; j < rightTable.length; j++) { 819 Row rightRow = rightTable[j]; 820 if (联接条件(leftRow, rightRow)) { 821 Row newRow = leftRow + rightRow; 822 resultSet.add(newRow); 823 flag = true; // 表明联接条件为真的记录保存起来 824 } 825 } 826 827 if (!flag) { 828 Row newRow = leftRow + 空行(右表); 829 resultSet.add(newRow); 830 } 831 } 832 833 -- 查询哪些国家没有官方语言 834 select 835 co.name country, 836 cl.language, 837 cl.isofficial 838 from 839 country co 840 left join 841 countrylanguage cl 842 on 843 co.code = cl.countrycode 844 and 845 cl.isofficial = 'T' 846 where 847 cl.isofficial is null 848 849 -- 查询所有国家的首都和官方语言, 没有首都和官方语言的也要全部显示. 850 851 -- 单行函数, 查询结果中对每一行都单独执行的函数 852 select 853 upper(name), 854 lower(continent) 855 from 856 county; 857 858 select 859 concat(code, name), 860 substr(name, 3, 2) 861 from 862 country; 863 864 -- 把国家的代码和名称及大洲用@符号连接. 如 : CHN@China@asia 865 select 866 concat(concat(concat(concat(code, '@'), name), '@'), continent) 867 from 868 country; 869 870 select 871 concat(code, '@', name, '@', continent) 872 from 873 country; 874 875 876 -- 分组函数 : 作用于一组数据. 默认情况下虚表中的所有记录被当作一个组. 877 分组函数都是统计运算, 特点就是一组数据只能有一个结果. 结果也必须是所有数据处理完后才有的. 878 879 max() 最大值 880 min() 最小值 881 avg() 平均值 882 sum() 求和 883 count() 计数 884 885 select 886 max(population), 887 min(surfacearea) 888 from 889 country ; 890 891 892 select 893 -- name, 不可以再把普通的列放在这里 894 max(name), 895 max(population), 896 min(surfacearea) 897 from 898 country ; 899 900 -- 查询亚洲国家的平均人口 901 902 -- 查询全球平均寿命最高和最低