hadoop虚拟机安装步骤

以下为本人计算机具体操作内容及截屏。

　　刚开始在本机windows上安装hadoop,但是网址localhost:8088和localhost:50070可以进去，但eclipse运行hadoop报错，所以在linux虚拟机上配置hadoop，由于之前已配置过JDK，所以下面不展示JDK配置，比较简单。

　　1.添加用户并添加用户权限：

　　　　添加用户不作演示。

　　　　添加用户权限：

　　　　　　下面标红部分为添加用户权限，个人新建用户名为hadoop，根据自己设置进行改变。

#
# This file MUST be edited with the 'visudo' command as root.
#
# Please consider adding local content in /etc/sudoers.d/ instead of
# directly modifying this file.
#
# See the man page for details on how to write a sudoers file.
#
Defaults        env_reset
Defaults        mail_badpass
Defaults        secure_path="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin"

# Host alias specification

# User alias specification

# Cmnd alias specification

# User privilege specification
root    ALL=(ALL:ALL) ALL
hadoop  ALL=(ALL:ALL)  ALL  

# Members of the admin group may gain root privileges
%admin ALL=(ALL) ALL

# Allow members of group sudo to execute any command
%sudo   ALL=(ALL:ALL) ALL

# See sudoers(5) for more information on "#include" directives:

#includedir /etc/sudoers.d

　　　　　　2.全局变量设置：

　　　　　　　　根据个人具体目录及文件名进行修改

# /etc/profile: system-wide .profile file for the Bourne shell (sh(1))
# and Bourne compatible shells (bash(1), ksh(1), ash(1), ...).

if [ "$PS1" ]; then
  if [ "$BASH" ] && [ "$BASH" != "/bin/sh" ]; then
    # The file bash.bashrc already sets the default PS1.
    # PS1='\h:\w\$ '
    if [ -f /etc/bash.bashrc ]; then
      . /etc/bash.bashrc
    fi
  else
    if [ "`id -u`" -eq 0 ]; then
      PS1='# '
    else
      PS1='$ '
    fi
  fi
fi

if [ -d /etc/profile.d ]; then
  for i in /etc/profile.d/*.sh; do
    if [ -r $i ]; then
      . $i
    fi
  done
  unset i
fi
export JAVA_HOME=/opt/jdk
export JRE_HOME=${JAVA_HOME}/jre
export CLASSPATH=.:${JAVA_HOME}/lib:${JRE_HOME}/lib
export PATH=${JAVA_HOME}/bin:$PATH
export HADOOP_HOME=/opt/hadoop
export PATH=$PATH:$JAVA_HOME/bin:$JRE_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin

　　　　注意：配置完要进行配置生效

　　　　　　sudo source /etc/profile 或者进行重启虚拟机操作

　　　　此时输入java 或 hadoop 具体显示页面如下：

　　若全局变量无效，则在当前用户权限文件进行添加：

　　参考博客：　　https://www.cnblogs.com/haore147/p/3633116.html

　　　　　到这里hadoop的本地配置可以运行。

　---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

　　　　HADOOP与本机eclipse结合：

　　　　1.hdfs-site.xml

<!-- Put site-specific property overrides in this file. -->
<configuration>

    <!-- 配置HDFS的冗余度 -->
        <property>
                <name>dfs.replication</name>
                <value>1</value>
        </property>

        <!-- 配置是否检查权限 -->
        <property>
                <name>dfs.permissions.enabled</name>
                <value>false</value>
        </property>
</configuration>

　　　　　　2.　　core-site.xml

<configuration>

    <!--配置HDFS的NameNode -->
        <property>
                <name>fs.defaultFS</name>
                <value>hdfs://ubuntu:9000</value>
        </property>

        <!--配置DataNode保存数据的位置 -->
        <property>
                <name>hadoop.tmp.dir</name>
                <value>/opt/hadoop_tmp</value>
        </property>

</configuration>

　　　　3.在opt下面创建tmp文件

　　　　4.mapred-site.xm

<configuration>

    <!-- 配置Mapreduce运行框架 -->
        <property>
                <name>mapreduce.framework.name </name>
                <value>yarn</value>
        </property>
</configuration>

　　　　5.　yarn-site.xml

<configuration>

        <!-- 配置ResourceManager的地址 -->
        <property>
                <name>yarn.resourcemanager.hostname</name>
                <value>ubuntu</value>
        </property>

        <!-- 配置NodeManager执行任务的方式 -->
        <property>
                <name>yarn.nodemanager.aux-services</name>
                <value>mapreduce_shuffle</value>
        </property>

</configuration>

　　　　6.　修改：　C:\Windows\System32\drivers\etc\hosts

# Copyright (c) 1993-2009 Microsoft Corp.
#
# This is a sample HOSTS file used by Microsoft TCP/IP for Windows.
#
# This file contains the mappings of IP addresses to host names. Each
# entry should be kept on an individual line. The IP address should
# be placed in the first column followed by the corresponding host name.
# The IP address and the host name should be separated by at least one
# space.
#
# Additionally, comments (such as these) may be inserted on individual
# lines or following the machine name denoted by a '#' symbol.
#
# For example:
#
#      102.54.94.97     rhino.acme.com          # source server
#       38.25.63.10     x.acme.com              # x client host

# localhost name resolution is handled within DNS itself.
#    127.0.0.1       localhost
#    ::1             localhost
127.0.0.1    localhost
192.168.2.131 ubuntu

　　　　7.　注意要将本机上的hadoop/bin下的内容加上

　　　　8.配置好后 8088 和 50070均可访问

　　　　　wordcount代码：

package worcount;


import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;

/**
 * 
 * 描述：WordCount explains by Felix
 * 
 * @author Hadoop Dev Group
 */
public class Wordcount {
    /**
     * MapReduceBase类:实现了Mapper和Reducer接口的基类（其中的方法只是实现接口，而未作任何事情） Mapper接口：
     * WritableComparable接口：实现WritableComparable的类可以相互比较。所有被用作key的类应该实现此接口。
     * Reporter 则可用于报告整个应用的运行进度，本例中未使用。
     * 
     */
    public static class Map extends MapReduceBase implements
            Mapper<LongWritable, Text, Text, IntWritable> {
        /**
         * LongWritable, IntWritable, Text 均是 Hadoop 中实现的用于封装 Java
         * 数据类型的类，这些类实现了WritableComparable接口，
         * 都能够被串行化从而便于在分布式环境中进行数据交换，你可以将它们分别视为long,int,String 的替代品。
         */
        private final static IntWritable one = new IntWritable(1);
        private Text word = new Text();

        /**
         * Mapper接口中的map方法： void map(K1 key, V1 value, OutputCollector<K2,V2>
         * output, Reporter reporter) 映射一个单个的输入k/v对到一个中间的k/v对
         * 输出对不需要和输入对是相同的类型，输入对可以映射到0个或多个输出对。
         * OutputCollector接口：收集Mapper和Reducer输出的<k,v>对。
         * OutputCollector接口的collect(k, v)方法:增加一个(k,v)对到output
         */
        public void map(LongWritable key, Text value,
                OutputCollector<Text, IntWritable> output, Reporter reporter)
                throws IOException {
            String line = value.toString();
            StringTokenizer tokenizer = new StringTokenizer(line);
            while (tokenizer.hasMoreTokens()) {
                word.set(tokenizer.nextToken());
                output.collect(word, one);
            }
        }
    }

    public static class Reduce extends MapReduceBase implements
            Reducer<Text, IntWritable, Text, IntWritable> {
        public void reduce(Text key, Iterator<IntWritable> values,
                OutputCollector<Text, IntWritable> output, Reporter reporter)
                throws IOException {
            int sum = 0;
            while (values.hasNext()) {
                sum += values.next().get();
            }
            output.collect(key, new IntWritable(sum));
        }
    }

    public static void main(String[] args) throws Exception {
        
        // Hadoop的安装目录
        System.setProperty("hadoop.home.dir", "D:\\hadoop");
        
        /**
         * JobConf：map/reduce的job配置类，向hadoop框架描述map-reduce执行的工作
         * 构造方法：JobConf()、JobConf(Class exampleClass)、JobConf(Configuration
         * conf)等
         */
        JobConf conf = new JobConf(Wordcount.class);
        conf.setJobName("wordcount"); // 设置一个用户定义的job名称
        conf.setOutputKeyClass(Text.class); // 为job的输出数据设置Key类
        conf.setOutputValueClass(IntWritable.class); // 为job输出设置value类
        conf.setMapperClass(Map.class); // 为job设置Mapper类
        conf.setCombinerClass(Reduce.class); // 为job设置Combiner类
        conf.setReducerClass(Reduce.class); // 为job设置Reduce类
        conf.setInputFormat(TextInputFormat.class); // 为map-reduce任务设置InputFormat实现类
        conf.setOutputFormat(TextOutputFormat.class); // 为map-reduce任务设置OutputFormat实现类
        /**
         * InputFormat描述map-reduce中对job的输入定义 setInputPaths():为map-reduce
         * job设置路径数组作为输入列表 setInputPath()：为map-reduce job设置路径数组作为输出列表
         */
        FileInputFormat.setInputPaths(conf, new Path("hdfs://ubuntu:9000/input"));
        FileOutputFormat.setOutputPath(conf, new Path("hdfs://ubuntu:9000/output"));
//        FileInputFormat.setInputPaths(conf, new Path(args[0]));
//        FileOutputFormat.setOutputPath(conf, new Path(args[1]));
        JobClient.runJob(conf); // 运行一个job
//        System.out.println("eee");
    }
}

　　log4j.properties代码

log4j.rootLogger=INFO, stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d %p [%c] - %m%n
log4j.appender.logfile=org.apache.log4j.FileAppender
log4j.appender.logfile.File=target/spring.log
log4j.appender.logfile.layout=org.apache.log4j.PatternLayout
log4j.appender.logfile.layout.ConversionPattern=%d %p [%c] - %m%n

posted on 2017-12-09 10:50 NEU-2015 阅读(1418) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

hadoop虚拟机安装步骤

导航

公告