10.19
今天做了大数据作业流程如下:
1.创建input来放文件
hadoop fs -mkdir /input
2.将文件导入input
[hadoop@node1 hadoop]$ hdfs dfs -put /data/file1.txt /input
[hadoop@node1 hadoop]$ hdfs dfs -put /data/file2.txt /input
3.先进入放置jar包的文件夹
cd /export/server/hadoop/share/hadoop/mapreduce
4.运行下列命令(可在网页查看node1:9870)
hadoop jar hadoop-mapreduce-examples-3.3.4.jar wordcount /input /output
5.进入hive
bin/hive
6.创建表
create table docs(line string);
7.load data inpath '/input' overwrite into table docs;
8.
create table word_count as
> select word, count(1) as count from
> (select explode(split(line,' '))as word from docs) w
> group by word
> order by word;