Hadoop 代码实现文件上传

 

本项目主要实现Windows下利用代码实现Hadoop中文件上传至HDFS

实现上传文本文件中单词个数的计数

1、项目结构

package com.hadoop.worldcount;
  2 
  3 import java.io.FileInputStream;
  4 
  5 import java.io.IOException;
  6 
  7 import org.apache.hadoop.conf.Configuration;
  8 
  9 import org.apache.hadoop.fs.FSDataOutputStream;
 10 
 11 import org.apache.hadoop.fs.FileSystem;
 12 
 13 import org.apache.hadoop.fs.Path;
 14 
 15 public class CopyFromLocalFile {
 16 
 17 /**
 18 
 19     * 判断路径是否存在
 20 
 21  */
 22 
 23 public static boolean test(Configuration conf, String path) {
 24 
 25        try (FileSystem fs = FileSystem.get(conf)) {
 26 
 27            return fs.exists(new Path(path));
 28 
 29        } catch (IOException e) {
 30 
 31  
 32 
 33            e.printStackTrace();
 34 
 35            return false;
 36 
 37        }
 38 
 39  
 40 
 41    }
 42 
 43    /**
 44 
 45  
 46 
 47     * 复制文件到指定路径 若路径已存在,则进行覆盖
 48 
 49  
 50 
 51     */
 52 
 53  
 54 
 55    public static void copyFromLocalFile(Configuration conf,
 56 
 57            String localFilePath, String remoteFilePath) {
 58 
 59        Path localPath = new Path(localFilePath);
 60 
 61        Path remotePath = new Path(remoteFilePath);
 62 
 63        try (FileSystem fs = FileSystem.get(conf)) {
 64 
 65            /* fs.copyFromLocalFile 第一个参数表示是否删除源文件,第二个参数表示是否覆盖 */
 66 
 67            fs.copyFromLocalFile(false, true, localPath, remotePath);
 68 
 69        } catch (IOException e) {
 70 
 71            e.printStackTrace();
 72 
 73        }
 74 
 75    }
 76 
 77  
 78 
 79    /**
 80 
 81  
 82 
 83     * 追加文件内容
 84 
 85  
 86 
 87     */
 88 
 89  
 90 
 91    public static void appendToFile(Configuration conf, String localFilePath,
 92 
 93            String remoteFilePath) {
 94 
 95        Path remotePath = new Path(remoteFilePath);
 96 
 97        try (FileSystem fs = FileSystem.get(conf);
 98 
 99                FileInputStream in = new FileInputStream(localFilePath);) {
100 
101            FSDataOutputStream out = fs.append(remotePath);
102 
103            byte[] data = new byte[1024];
104 
105            int read = -1;
106 
107            while ((read = in.read(data)) > 0) {
108 
109                out.write(data, 0, read);
110 
111            }
112 
113            out.close();
114 
115        } catch (IOException e) {
116 
117            e.printStackTrace();
118 
119        }
120 
121    }
122 
123  
124 
125    /**
126 
127  
128 
129     * 主函数
130 
131  
132 
133     */
134 
135  
136 
137    public static void main(String[] args) {
138 
139        Configuration conf = new Configuration();
140 
141        conf.set("fs.defaultFS", "hdfs://localhost:9000");
142 
143        String localFilePath = "/usr/hadoop/test/test.txt"; // 本地路径
144 
145        String remoteFilePath = "/user/hadoop/test/test.txt"; // HDFS路径
146 
147        String choice = "append"; // 若文件存在则追加到文件末尾
148 
149        //String choice = "overwrite"; // 若文件存在则覆盖
150 
151        try {
152 
153            /* 判断文件是否存在 */
154 
155            boolean fileExists = false;
156 
157            if (CopyFromLocalFile.test(conf, remoteFilePath)) {
158 
159                fileExists = true;
160 
161                System.out.println(remoteFilePath + " 已存在.");
162 
163            } else {
164 
165                System.out.println(remoteFilePath + " 不存在.");
166 
167            }
168 
169            /* 进行处理 */
170 
171            if (!fileExists) { // 文件不存在,则上传
172 
173                CopyFromLocalFile.copyFromLocalFile(conf, localFilePath,
174 
175                        remoteFilePath);
176 
177                System.out.println(localFilePath + " 已上传至 " + remoteFilePath);
178 
179            } else if (choice.equals("overwrite")) { // 选择覆盖      
180 
181                CopyFromLocalFile.copyFromLocalFile(conf, localFilePath,
182 
183                        remoteFilePath);
184 
185                System.out.println(localFilePath + " 已覆盖 " + remoteFilePath);
186 
187            } else if (choice.equals("append")) { // 选择追加                
188 
189                CopyFromLocalFile.appendToFile(conf, localFilePath,
190 
191                        remoteFilePath);
192 
193                System.out.println(localFilePath + " 已追加至 " + remoteFilePath);
194 
195            }
196 
197        } catch (Exception e) {
198 
199    
200 
201            e.printStackTrace();
202 
203        }
204 
205    }
206 
207 }

  

package com.hadoop.worldcount;

import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.util.Progressable;

public class File {

/**
  * @param args
  * @throws IOException 
  */

public static void main(String[] args) throws Exception {
        String localSrc = "E:\\Hadoop\\work\\bashrc.txt";//本地文件
        String dst = "hdfs://localhost:9000/user/hadoop/test/bashrc.txt";//复制到hdfs目录下
        InputStream in = new BufferedInputStream(new FileInputStream(localSrc));
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(URI.create(dst), conf);
        OutputStream out = fs.create(new Path(dst), new Progressable() {//进度条信息
            public void progress() {
                System.out.print(".");
            }
        });
        IOUtils.copyBytes(in, out, 4096, true);//复制
    }
}

  

package com.hadoop.worldcount;
 2 
 3 import java.io.IOException;
 4 import java.util.StringTokenizer;
 5 import org.apache.hadoop.conf.Configuration;
 6 import org.apache.hadoop.fs.Path;
 7 import org.apache.hadoop.io.IntWritable;
 8 import org.apache.hadoop.io.LongWritable;
 9 import org.apache.hadoop.io.Text;
10 import org.apache.hadoop.mapreduce.Job;
11 import org.apache.hadoop.mapreduce.Mapper;
12 import org.apache.hadoop.mapreduce.Reducer;
13 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
14 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
15  
16 public class MyWordCount {
17     
18     public static class TokenizerMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
19         /**
20          * Mapper中的map方法:
21          * void map(K1 key, V1 value, Context context)
22          * 映射一个单个的输入k/v对到一个中间的k/v对
23          * 输出对不需要和输入对是相同的类型,输入对可以映射到0个或多个输出对。
24          * Context:收集Mapper输出的<k,v>对。
25          * Context的write(k, v)方法:增加一个(k,v)对到context
26          * 程序员主要编写Map和Reduce函数.这个Map函数使用StringTokenizer函数对字符串进行分隔,通过write方法把单词存入word中
27          * write方法存入(单词,1)这样的二元组到context中
28         */  
29         @Override
30         protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context)
31                 throws IOException, InterruptedException {
32             StringTokenizer itr = new StringTokenizer(value.toString());
33             while (itr.hasMoreTokens()) {
34                 context.write(new Text(itr.nextToken()), new IntWritable(1));
35             }
36         }
37     }
38     
39     public static class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
40         /**
41          * Reducer类中的reduce方法:
42          * void reduce(Text key, Iterable<IntWritable> values, Context context)
43          * 中k/v来自于map函数中的context,可能经过了进一步处理(combiner),同样通过context输出           
44          */
45         @Override
46         protected void reduce(Text key, Iterable<IntWritable> values,
47                 Context context) throws IOException, InterruptedException {
48             int sum = 0;
49             for (IntWritable val : values) {
50                 sum += val.get();
51             }
52             context.write(key, new IntWritable(sum));
53         }
54     }
55     
56     public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
57          /**
58          * Configuration:map/reduce的j配置类,向hadoop框架描述map-reduce执行的工作
59          */
60         Configuration conf = new Configuration();
61         Job job = Job.getInstance(conf, "myWordCount"); //设置一个用户定义的job名称
62         job.setJarByClass(MyWordCount.class);
63         job.setMapperClass(TokenizerMapper.class); //为job设置Mapper类
64         job.setCombinerClass(IntSumReducer.class);    //为job设置Combiner类
65         job.setReducerClass(IntSumReducer.class); //为job设置Reducer类
66         job.setOutputKeyClass(Text.class);        //为job的输出数据设置Key类
67         job.setOutputValueClass(IntWritable.class);    //为job输出设置value类
68              
69         FileInputFormat.addInputPath(job, new Path("hdfs://localhost:9000/user/root/input/bashrc.txt"));
70         FileOutputFormat.setOutputPath(job, new Path("hdfs://localhost:9000/user/root/output"));
71  
72         System.exit(job.waitForCompletion(true) ?0 : 1);        //运行job
73     }
74  
75 }

  

 

posted @ 2021-09-09 02:01  Zwyooo  阅读(207)  评论(0编辑  收藏  举报