截止今天学习大数据技术的笔记

Hadoop

启动zookeeper

bin/zkServer.sh start

bin/zkServer.sh stop

启动Hadoop

第一步:hadoop102

sbin/start-dfs.sh

第二步:hadoop103

sbin/start-yarn.sh

 

 

第一步:hadoop103

sbin/stop-yarn.sh

第二步:hadoop102

sbin/stop-dfs.sh

 

启动hbase

bin/hbase-daemon.sh start master

bin/hbase-daemon.sh start regionserver

 

bin/hbase-daemon.sh stop master

bin/hbase-daemon.sh stop regionserver

 

bin/start-hbase.sh

bin/stop-hbase.sh

 

软连接

ln -s /opt/module/hadoop-3.1.3/etc/hadoop/core-site.xml /opt/module/hbase131/conf/core-site.xml

 

ln -s /opt/module/hadoop-3.1.3/etc/hadoop/hdfs-site.xml /opt/module/hbase131/conf/hdfs-site.xml

 

Web端查看HDFS的NameNode,HDFS上存储的数据信息

http://hadoop102:9870

Web端查看YARN的ResourceManager

浏览器中查看YARN上运行的Job信息

http://hadoop103:8088

查看JobHistory

http://hadoop102:19888/jobhistory

 

 

 

 

 

 

 

 

 

myhadoop.sh start

myhadoop.sh stop

jpsall

Group name: ddl

  Commands: alter, alter_async, alter_status, create, describe, disable, disable_all, drop, drop_all, enable, enable_all, exists, get_table, is_disabled, is_enabled, list, locate_region, show_filters

Group name: dml

  Commands: append, count, delete, deleteall, get, get_counter, get_splits, incr, put, scan, truncate, truncate_preserve

  Group name: namespace

  Commands: alter_namespace, create_namespace, describe_namespace, drop_namespace, list_namespace, list_namespace_tables

 

 

 

l  进入hbase shell空间

bin/hbase shell

l  创建 create 'student','info'

l  插入 put 'student','1001','info:sex','male'

l  查看scan 'student'  describe ‘student’

l  put ‘stu’,’1001’,’info1:name’,’zhangsan’

l       表            rowkey    列族:列名      value

 

解决日志 Jar 包冲突

 

 

[atguigu@hadoop102 software]$ mv $HIVE_HOME/lib/log4j-slf4j-impl-2.10.0.jar $HIVE_HOME/lib/log4j-slf4j-impl-2.10.0.bak

sqoop路径:/opt/module/sqoop

把指定文件放到hadoop指定路径:hadoop fs -put stu1.txt /user/hive/warehouse/stu

启动metastore      hive --service metastore

启动hiveserver2   bin/hive --service hiveserver2

hive启动(/opt/module/hive):bin/hive

 

测试流程:

①hive路径下建表:test1

create table test1

(InvoiceNo String, StockCode String, Description String, Quantity String, InvoiceDate String, UnitPrice String, CustomerID String, Country String)

ROW format delimited fields terminated by ',' STORED AS TEXTFILE;

②导入数据:

load data local inpath '/opt/module/data/test.csv' into table test1;

select * from test1;

③进入mysql:mysql -uroot -p000000

(创建数据库命令:create database company;)

(进入对应数据库命令:use company;)

④将汇总结果导出到MySQL:

1.建表(可视化建表):

2.sqoop路径下:

bin/sqoop export \

--connect jdbc:mysql://hadoop102:3306/mysql \

--username root \

--password 000429 \

--table test1 \

--num-mappers 1 \

--export-dir /user/hive/warehouse/test1 \

--input-fields-terminated-by ","

 

数据没有插入完全(报错):

Container [pid=3962,containerID=container_1632883011739_0002_01_000002] is running 270113280B beyond the 'VIRTUAL' memory limit. Current usage: 91.9 MB of 1 GB physical memory used; 2.4 GB of 2.1 GB virtual memory used. Killing container.

 

 

 

 

Sqoop

导出

bin/sqoop import \

--connect jdbc:mysql://hadoop102:3306/company \

--username root \

--password 000429 \

--table staff \

--target-dir /user/company \

--delete-target-dir \

--num-mappers 1 \

--fields-terminated-by  "\t"

 

课堂测试:

一、hive建表

 

create table sale

(day_id String, sale_nbr String, buy_nbr String, cnt String, round String)

ROW format delimited fields terminated by ',' STORED AS TEXTFILE;

 

二、导入hive表

load data local inpath '/opt/module/data/sales.csv' into table sale;

 

 

 

 

 

 

 

Sqoop:

 查询导入
 bin/sqoop import
 --connect     jdbc:mysql://hadoop102:3306/company
 --username root 
--password 123456 
--target-dir /user/company 
--delete-target-dir 
--num-mappers 1 
--fields-terminated-by "\t"
 --query &apos;select name,sex from staff where id<=1 and $CONDITIONS;&apos;

全部导入
 bin/sqoop import 
--connect jdbc:mysql://hadoop102:3306/company
 --username root 
--password 123456 
--table staff 
--target-dir /user/company 
--delete-target-dir
 --num-mappers 1 
--fields-terminated-by "\t"
指定列:--columns id,sex
条件:where ""
导入到HIVE:
bin/sqoop import \
> --connect jdbc:mysql://hadoop102:3306/company \
> --username root \
> --password 123456 \
> --table staff \
> --num-mappers 1 \
> --hive-impo
> --fields-terminated-by "\t" \
> --hive-overwrite \
> --hive-table  数据库名.staff_hive
导出:
bin/sqoop export 
--connect  jdbc:mysql://hadoop102:3306/company 
--username root 
--password 000429
--table sale1 
--num-mappers 1 
--export-dir /user/hive/warehouse/sale 
--input-fields-terminated-by ","

 

导出整个表到mysql:

bin/sqoop export \

--connect jdbc:mysql://hadoop102:3306/company \

--username root \

--password 000429 \

--table sale \

--num-mappers 1 \

--export-dir /user/hive/warehouse/sale \

--input-fields-terminated-by ","

Mysql建表语句:

 

USE `company`;

CREATE TABLE `sale1` (

  `day_id` VARCHAR(50) COLLATE utf8_bin DEFAULT NULL,

  `sale_nbr` VARCHAR(50) COLLATE utf8_bin DEFAULT NULL,

  `cnt` VARCHAR(50) COLLATE utf8_bin DEFAULT NULL,

  `round` VARCHAR(50) COLLATE utf8_bin DEFAULT NULL

) ENGINE=INNODB DEFAULT CHARSET=utf8 COLLATE=utf8_bin;

 

CREATE TABLE `sale2` (

  `day_id` varchar(50) COLLATE utf8_bin DEFAULT NULL,

  `sale_nbr` varchar(50) COLLATE utf8_bin DEFAULT NULL,

  `cnt` varchar(50) COLLATE utf8_bin DEFAULT NULL,

  `round` varchar(50) COLLATE utf8_bin DEFAULT NULL

) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin;

 

USE `company`;

CREATE TABLE `sale3` (

  `day_id` VARCHAR(50) COLLATE utf8_bin DEFAULT NULL,

  `sale_nbr` VARCHAR(50) COLLATE utf8_bin DEFAULT NULL,

  `sale_number` VARCHAR(50) COLLATE utf8_bin DEFAULT NULL

) ENGINE=INNODB DEFAULT CHARSET=utf8 COLLATE=utf8_bin;

 

CREATE TABLE `sale4` (

  `day_id` varchar(50) COLLATE utf8_bin DEFAULT NULL,

  `nbr` varchar(50) COLLATE utf8_bin DEFAULT NULL,

  `buy_cnt` varchar(50) COLLATE utf8_bin DEFAULT NULL,

  `buy_round` varchar(50) COLLATE utf8_bin DEFAULT NULL,

  `sale_cnt` varchar(50) COLLATE utf8_bin DEFAULT NULL,

  `sale_round` varchar(50) COLLATE utf8_bin DEFAULT NULL

) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin;

查询1:

INSERT INTO company.`sale1`(

    day_id,

    sale_nbr,

    cnt,

    ROUND

)SELECT

  day_id,

  sale_nbr,

  SUM(cnt),

  SUM(ROUND)

FROM

  company.sale

WHERE sale_nbr LIKE 'C%'

GROUP BY day_id,sale_nbr;

 

 

 

 

 

 

查询2

INSERT INTO company.`sale2`(

    day_id,

    sale_nbr,

    cnt,

    ROUND

)SELECT

  day_id,

  sale_nbr,

  SUM(cnt),

  SUM(ROUND)

FROM

  company.sale

WHERE sale_nbr LIKE 'O%'

GROUP BY day_id,sale_nbr;

 

 

 

 

 

 

查询3:

INSERT INTO company.sale3(

    day_id,

    sale_nbr,

    sale_number

)SELECT

  day_id,

  sale_nbr,

  COUNT(sale_nbr)

FROM

  company.sale

WHERE sale_nbr LIKE "O%"

GROUP BY sale_nbr,

  day_id ;

 

 

 

查询4

买:

SELECT day_id, buy_nbr,SUM(cnt) AS buy_cnt ,SUM(ROUND) AS buy_round FROM company.sale WHERE (buy_nbr!='PAX') AND (buy_nbr!='') GROUP BY  buy_nbr,day_id;

卖:

SELECT day_id, sale_nbr,SUM(cnt) AS sale_cnt,SUM(ROUND) AS sale_round FROM company.sale WHERE sale_nbr LIKE "O%" GROUP BY  sale_nbr,day_id;

 

插入:

INSERT INTO company.`sale4_b`(

    day_id,

    buy_nbr,

    buy_cnt,

    buy_round

)SELECT day_id, buy_nbr,SUM(cnt) AS buy_cnt ,SUM(ROUND) AS buy_round FROM company.sale WHERE (buy_nbr!='PAX') AND (buy_nbr!='') GROUP BY buy_nbr,day_id;

 

 

插入sale4:

INSERT INTO company.sale4(

    day_id,

    nbr,

    buy_cnt,

    buy_round,

    sale_cnt,

    sale_round,

    w

)SELECT

  sale4_b.day_id,

  buy_nbr,

  buy_cnt,

  buy_round,

  sale_cnt,

  sale_round,

  (sale_round-buy_round)

FROM

  sale4_b

  JOIN sale4_s

WHERE sale4_b.day_id = sale4_s.day_id

  AND sale4_b.buy_nbr = sale4_s.sale_nbr ;

 

posted @ 2021-10-01 21:57  靠谱杨  阅读(44)  评论(0编辑  收藏  举报