hadoop_worddistinct_1030
.xml
1 <?xml version="1.0" encoding="UTF-8"?> 2 <project xmlns="http://maven.apache.org/POM/4.0.0" 3 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 4 xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 5 <modelVersion>4.0.0</modelVersion> 6 7 <groupId>cn.siit</groupId> 8 <artifactId>countdelete</artifactId> 9 <version>1.0-SNAPSHOT</version> 10 11 <dependencies> 12 <dependency> 13 <groupId>org.apache.hadoop</groupId> 14 <artifactId>hadoop-common</artifactId> 15 <version>2.7.4</version> 16 </dependency> 17 18 <dependency> 19 <groupId>org.apache.hadoop</groupId> 20 <artifactId>hadoop-hdfs</artifactId> 21 <version>2.7.4</version> 22 </dependency> 23 24 <dependency> 25 <groupId>org.apache.hadoop</groupId> 26 <artifactId>hadoop-mapreduce-client-core</artifactId> 27 <version>2.7.4</version> 28 </dependency> 29 30 <dependency> 31 <groupId>org.apache.hadoop</groupId> 32 <artifactId>hadoop-mapreduce-client-jobclient</artifactId> 33 <version>2.7.4</version> 34 </dependency> 35 36 </dependencies> 37 </project>
MyMap
1 package s26; 2 3 import java.io.IOException; 4 5 import org.apache.hadoop.io.LongWritable; 6 import org.apache.hadoop.io.NullWritable; 7 import org.apache.hadoop.io.Text; 8 import org.apache.hadoop.mapreduce.Mapper; 9 public class MyMap extends Mapper<LongWritable, Text, Text, NullWritable> { 10 11 public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { 12 context.write(value, NullWritable.get()); 13 } 14 }
MyRed
1 package s26; 2 3 import java.io.IOException; 4 import org.apache.hadoop.io.NullWritable; 5 import org.apache.hadoop.io.Text; 6 import org.apache.hadoop.mapreduce.Reducer; 7 public class MyRed extends Reducer<Text, NullWritable, Text, NullWritable> { 8 public void reduce(Text key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException { 9 context.write(key, NullWritable.get()); 10 } 11 }
MyJob
1 package s26; 2 3 import org.apache.hadoop.conf.Configuration; 4 import org.apache.hadoop.fs.FileSystem; 5 import org.apache.hadoop.fs.Path; 6 import org.apache.hadoop.io.IntWritable; 7 import org.apache.hadoop.io.NullWritable; 8 import org.apache.hadoop.io.Text; 9 import org.apache.hadoop.mapreduce.Job; 10 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 11 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 12 13 import java.io.IOException; 14 15 public class MyJob { 16 17 public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { 18 19 //1.conf 20 Configuration conf = new Configuration(); 21 conf.set("fs.defaultFS","hdfs://master:9000"); 22 23 //2.job 24 Job job = Job.getInstance(conf); 25 // jar-package 26 job.setJarByClass(MyJob.class); 27 // 28 job.setMapperClass(MyMap.class); 29 // 30 job.setReducerClass(MyRed.class); 31 job.setOutputKeyClass(Text.class); 32 job.setOutputValueClass(NullWritable.class); 33 34 35 //3.io 36 Path pin = new Path("/sjw"); 37 Path pout = new Path("/out"); 38 // 39 FileSystem fs = FileSystem.get(conf); 40 if(fs.exists(pout)){ 41 fs.delete(pout,true); 42 } 43 FileInputFormat.setInputPaths(job,pin); 44 FileOutputFormat.setOutputPath(job,pout); 45 46 //4.run 47 job.waitForCompletion(true); 48 49 50 } 51 }
小石小石摩西摩西的学习笔记,欢迎提问,欢迎指正!!!