hadoop集群搭建和wordcount demo编写
1、上传hadoop包解压,上传jdk包解压,修改文件夹名为hadoop和java
2、配置环境变量
export JAVA_HOME=/usr/local/software/java export HADOOP_HOME=/usr/local/software/hadoop export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
3、配置hadoop配置文件如下
core-site.xml
<configuration> <property> <name>hadoop.tmp.dir</name> <value>/usr/local/software/hadoop/tmp</value> </property> <property> <name>fs.defaultFS</name> <value>hdfs://hadoop1/</value> </property> </configuration>
hdfs-site.xml
<configuration> <property> <name>dfs.name.dir</name> <value>/usr/local/software/hadoop/data/namenode</value> </property> <property> <name>dfs.data.dir</name> <value>/usr/local/software/hadoop/data/datanode</value> </property> <property> <name>dfs.tmp.dir</name> <value>/usr/local/software/hadoop/data/tmp</value> </property> <property> <name>dfs.replication</name> <value>2</value> </property> </configuration>
mapred-site.xml
<configuration> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> </configuration>
yarn-site.xml
<configuration> <!-- Site specific YARN configuration properties --> <property> <name>yarn.resourcemanager.hostname</name> <value>hadoop1</value> </property> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> </configuration>
slaves
hadoop2
hadoop3
4、关闭防火墙
systemctl disable firewalld.service
systemctl stop firewalld
5、ssh免密登录
ssh-keygen -t rsa
ssh-copy-id -i hadoop2
6、配置host文件
vi /etc/hosts
192.168.0.106 hadoop1
192.168.0.107 hadoop2
192.168.0.108 hadoop3
7、配置静态ip,配置为桥接网卡
vim /etc/sysconfig/network-scripts/ifcfj-enp0s3
BOOTPROTO=static
IPADDR=192.168.0.106 跟自己主机同一网段
GATEWAY=192.168.0.1
NETMASK=255.255.255.0
ONBOOT=yes
8、设置namsever
/etc/resolv.conf,添加nameserver xxx.xxx.xxx.xxx 设置为自己主机的dns
9、修改主机名
hostnamectl set-name hadoop1
10、启动hadoop
hdfs namenode -format
start-dfs.sh
11、编码如下
Driver负责组织job运行和提交的类
package com.xiaofeiyang; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; /** * @author: yangchun * @description: * @date: Created in 2020-07-10 9:51 */ public class WordcountDriver { public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { if(args == null||args.length==0){ args = new String[2]; args[0] = "hdfs://hadoop1:8020/input/file*"; args[1] = "hdfs://hadoop1:8020/input/output"; } Configuration configuration = new Configuration(); Job job = Job.getInstance(configuration); job.setJarByClass(WordcountDriver.class); job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.setInputPaths(job,new Path(args[0])); FileOutputFormat.setOutputPath(job,new Path(args[1])); boolean res = job.waitForCompletion(true); System.exit(res?0:1); } }
map负责归类的
package com.xiaofeiyang; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; /** * @author: yangchun * @description: * @date: Created in 2020-07-09 20:44 */ public class WordCountMapper extends Mapper<LongWritable,Text, Text, IntWritable> { @Override protected void map(LongWritable key,Text value,Context context) throws IOException, InterruptedException { String line = value.toString(); String[] words = line.split(" "); for(String word:words){ context.write(new Text(word),new IntWritable(1)); } } }
reduce负责结果进行统计总结的
package com.xiaofeiyang; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; /** * @author: yangchun * @description: * @date: Created in 2020-07-10 9:30 */ public class WordCountReducer extends Reducer<Text, IntWritable,Text,IntWritable> { @Override protected void reduce(Text key,Iterable<IntWritable> values,Context context) throws IOException, InterruptedException { int count =0; for(IntWritable value:values){ count +=value.get(); } context.write(key,new IntWritable(count)); } }
执行命令如下,在/home/hadoop/input/下面建立file1.txt,file2.txt
hadoop fs -put /home/hadoop/input/fi* /wc_input
hadoop jar hadoop.jar com.xiaofeiyang.WordcountDriver /wc_input /output1