常见关系代数运算包括:选择、投影、并、交、差以及自然连接操作等,都可以十分容易利用MapReduce框架进行并行化计算
NAME | SEX | AGE |
小明 | 男 | 25 |
小红 | 女 | 18 |
小张 | 男 | 22 |
小米 | 女 | 23 |
小丽 | 女 | 21 |
小王 | 男 | 19 |
小美 | 女 | 25 |
小朱 | 女 | 26 |
选择操作
将关系R的数据存储在relationR文件,然后移入HDFS下的data文件夹,如代码1-1
代码1-1
1 2 3 4 5 6 7 8 9 10 11 12 | root@lejian: /data # cat relationR 小明 男 25 小红 女 18 小张 男 22 小米 女 23 小丽 女 21 小王 男 19 小美 女 25 小朱 女 26 root@lejian: /data # hadoop fs -put selection /data root@lejian: /data # hadoop fs -ls -R /data -rw-r--r-- 1 root supergroup 112 2017-01-07 15:03 /data/relationR |
对于关系R的应用条件C,选择性别为女的数据,只需在Map阶段对每个输入的记录进行判断,将满足条件的数据输出即可,输出键值为(key,null)。Reduce阶段无需做额外的工作
代码1-2
1 2 3 4 5 6 7 | <? xml version="1.0"?> < configuration > < property > < name >sex</ name > < value >女</ value > </ property > </ configuration > |
代码1-3
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 | package com.hadoop.mapreduce; public class Person { private String name; private String sex; private int age; public Person(String line) { super (); String[] lines = line.split( " " ); this .name = lines[ 0 ]; this .sex = lines[ 1 ]; this .age = Integer.parseInt(lines[ 2 ]); } public String getName() { return name; } public String getSex() { return sex; } public int getAge() { return age; } public String getVal(String col) { if ( "name" .equals(col)) { return name; } if ( "sex" .equals(col)) { return sex; } return age + "" ; } @Override public String toString() { return name + " " + sex + " " + age; } } |
代码1-4
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 | package com.hadoop.mapreduce; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; public class SelectionMap extends Mapper<LongWritable, Text, Text, NullWritable> { private String sex = "" ; private Text val = new Text(); protected void setup(Context context) throws java.io.IOException, InterruptedException { Configuration conf = context.getConfiguration(); sex = conf.get( "sex" ); }; protected void map(LongWritable key, Text value, Context context) throws java.io.IOException, InterruptedException { Person person = new Person(value.toString()); if (sex.equals(person.getVal( "sex" ))) { val.set(person.toString()); context.write(val, NullWritable.get()); } }; } |
代码1-5
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | package com.hadoop.mapreduce; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class Selection { public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { if (args == null || args.length != 2 ) { throw new RuntimeException( "请输入输入路径、输出路径" ); } Configuration conf = new Configuration(); conf.addResource( "conf.xml" ); Job job = Job.getInstance(conf); job.setJobName( "Selection" ); job.setMapperClass(SelectionMap. class ); job.setOutputKeyClass(Text. class ); job.setOutputValueClass(NullWritable. class ); FileInputFormat.addInputPaths(job, args[ 0 ]); FileOutputFormat.setOutputPath(job, new Path(args[ 1 ])); System.exit(job.waitForCompletion( true ) ? 0 : 1 ); } } |
运行代码1-5,运行结果如代码1-6
代码1-6
1 2 3 4 5 6 7 8 9 10 11 | root@lejian: /data # hadoop jar selection.jar com.hadoop.mapreduce.Selection /data /output ………… root@lejian: /data # hadoop fs -ls -R /output -rw-r--r-- 1 root supergroup 0 2017-01-07 15:05 /output/_SUCCESS -rw-r--r-- 1 root supergroup 70 2017-01-07 15:05 /output/part-r-00000 root@lejian: /data # hadoop fs -cat /output/part-r-00000 小丽 女 21 小朱 女 26 小米 女 23 小红 女 18 小美 女 25 |
投影操作
例如在关系R上应用投影操作获得属性AGE的所有值,我们只需要在Map阶段将每条记录的AGE属性和NullWritable输出,而Reduce端仅获取key即可,注意,此时投影操作具有去重功能
代码1-7
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 | package com.hadoop.mapreduce; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; public class ProjectionMap extends Mapper<LongWritable, Text, IntWritable, NullWritable> { private IntWritable age = new IntWritable(); protected void map(LongWritable key, Text value, Context context) throws java.io.IOException, InterruptedException { Person person = new Person(value.toString()); age.set(person.getAge()); context.write(age, NullWritable.get()); }; } |
代码1-8
1 2 3 4 5 6 7 8 9 10 11 12 13 | package com.hadoop.mapreduce; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.Reducer; public class ProjectionReduce extends Reducer<IntWritable, NullWritable, IntWritable, NullWritable> { protected void reduce(IntWritable key, Iterable<NullWritable> values, Context context) throws java.io.IOException, InterruptedException { context.write(key, NullWritable.get()); }; } |
代码1-9
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | package com.hadoop.mapreduce; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class Projection { public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { if (args == null || args.length != 2 ) { throw new RuntimeException( "请输入输入路径、输出路径" ); } Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setJobName( "Projection" ); job.setMapperClass(ProjectionMap. class ); job.setReducerClass(ProjectionReduce. class ); job.setOutputKeyClass(IntWritable. class ); job.setOutputValueClass(NullWritable. class ); FileInputFormat.addInputPaths(job, args[ 0 ]); FileOutputFormat.setOutputPath(job, new Path(args[ 1 ])); System.exit(job.waitForCompletion( true ) ? 0 : 1 ); } } |
运行代码1-9,运行结果如代码1-10
代码1-10
1 2 3 4 5 6 7 8 9 10 11 12 13 | root@lejian: /data # hadoop jar projection.jar com.hadoop.mapreduce.Projection /data /output ………… root@lejian: /data # hadoop fs -ls -R /output -rw-r--r-- 1 root supergroup 0 2017-01-07 15:52 /output/_SUCCESS -rw-r--r-- 1 root supergroup 21 2017-01-07 15:52 /output/part-r-00000 root@lejian: /data # hadoop fs -cat /output/part-r-00000 18 19 21 22 23 25 26 |
交运算
如果有一个关系A和关系B为同一个模式,希望得到关系A和关系B的交集,那么在Map阶段对于A和B中的每一条记录r输出(r,1),在Reduce阶段汇总计数,如果计数为2,则将该条记录输出。依旧以Person类为例,这里把Person作为主键,为了使得关系A和关系B相同的Person发送到同一个Reduce节点进行计算,需要对原先代码1-3的Person类进行修改,如代码1-11,MapReduce默认会先调用对象的compareTo方法进行对象间的比较,如果对象相等,再比较其hashCode,如果hashCode相等,则认为这两个对象为同一个对象
修改代码1-3的Person类为代码1-11
代码1-11
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 | package com.hadoop.mapreduce; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import org.apache.hadoop.io.WritableComparable; public class Person implements WritableComparable<Person> { private String name; private String sex; private int age; public Person() { super (); // TODO Auto-generated constructor stub } public Person(String line) { super (); String[] lines = line.split( " " ); this .name = lines[ 0 ]; this .sex = lines[ 1 ]; this .age = Integer.parseInt(lines[ 2 ]); } public String getName() { return name; } public String getSex() { return sex; } public int getAge() { return age; } public String getVal(String col) { if ( "name" .equals(col)) { return name; } if ( "sex" .equals(col)) { return sex; } return age + "" ; } @Override public String toString() { return name + " " + sex + " " + age; } @Override public int hashCode() { int res = 20 ; res = name.hashCode() + 10 * res; res = sex.hashCode() + 10 * res; res = age + 10 * res; return res; } @Override public void write(DataOutput out) throws IOException { out.writeUTF(name); out.writeUTF(sex); out.writeInt(age); } @Override public void readFields(DataInput in) throws IOException { name = in.readUTF(); sex = in.readUTF(); age = in.readInt(); } @Override public int compareTo(Person o) { // TODO Auto-generated method stub if (hashCode() > o.hashCode()) { return 1 ; } if (hashCode() < o.hashCode()) { return - 1 ; } return 0 ; } public static void main(String[] args) { System.out.println( new Person( "Lily female 22" ).hashCode()); } } |
将关系A和关系B移入HDFS下的data文件夹,如代码1-12
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 | root@lejian: /data # cat relationA Tom male 21 Amy female 19 Daivd male 16 Lily female 22 Lucy female 20 John male 19 Rose female 19 Jojo female 26 root@lejian: /data # cat relationB Daivd male 16 Jack male 15 Lily female 22 Lucy female 20 Tom male 25 root@lejian: /data # hadoop fs -put relation* /data root@lejian: /data # hadoop fs -ls -R /data -rw-r--r-- 1 root supergroup 113 2017-01-07 20:48 /data/relationA -rw-r--r-- 1 root supergroup 69 2017-01-07 20:48 /data/relationB |
代码1-13
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 | package com.hadoop.mapreduce; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; public class IntersectionMap extends Mapper<LongWritable, Text, Person, IntWritable> { private static final IntWritable ONE = new IntWritable( 1 ); protected void map(LongWritable key, Text value, Context context) throws java.io.IOException, InterruptedException { Person person = new Person(value.toString()); context.write(person, ONE); }; } |
代码1-14
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 | package com.hadoop.mapreduce; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.Reducer; public class IntersectionReduce extends Reducer<Person, IntWritable, Person, NullWritable> { protected void reduce(Person key, Iterable<IntWritable> values, Context context) throws java.io.IOException, InterruptedException { int count = 0 ; for (IntWritable val : values) { count += val.get(); } if (count == 2 ) { context.write(key, NullWritable.get()); } }; } |
代码1-15
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 | package com.hadoop.mapreduce; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class Intersection { public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { if (args == null || args.length != 2 ) { throw new RuntimeException( "请输入输入路径、输出路径" ); } Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setJobName( "Intersection" ); job.setJarByClass(Intersection. class ); job.setMapperClass(IntersectionMap. class ); job.setMapOutputKeyClass(Person. class ); job.setMapOutputValueClass(IntWritable. class ); job.setReducerClass(IntersectionReduce. class ); job.setOutputKeyClass(Person. class ); job.setOutputValueClass(NullWritable. class ); FileInputFormat.addInputPaths(job, args[ 0 ]); FileOutputFormat.setOutputPath(job, new Path(args[ 1 ])); System.exit(job.waitForCompletion( true ) ? 0 : 1 ); } } |
运行代码1-15,运行结果如代码1-16
代码1-16
1 2 3 4 5 6 7 8 9 | root@lejian: /data # hadoop jar intersection.jar com.hadoop.mapreduce.Intersection /data /output ………… root@lejian: /data # hadoop fs -ls -R /output -rw-r--r-- 1 root supergroup 0 2017-01-07 20:30 /output/_SUCCESS -rw-r--r-- 1 root supergroup 44 2017-01-07 20:30 /output/part-r-00000 root@lejian: /data # hadoop fs -cat /output/part-r-00000 Daivd male 12 Lily female 22 Lucy female 20 |
差运算
计算关系A-关系B的差集,即找出在关系A中存在而在关系B中不存在的记录,在Map阶段,对于关系A和关系B中每一条记录r输出键值对(r,A),(r,B),在Reduce阶段检查每一条记录r和其对应的关系名称,只有关系名称只存在A,才输出记录
先显示HDFS中data文件夹下得relationA和relationB的文件内容,如代码1-17
代码1-17
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 | root@lejian: /data # hadoop fs -ls -R /data -rw-r--r-- 1 root supergroup 113 2017-01-07 20:48 /data/relationA -rw-r--r-- 1 root supergroup 69 2017-01-07 20:48 /data/relationB root@lejian: /data # hadoop fs -cat /data/relationA Tom male 21 Amy female 19 Daivd male 16 Lily female 22 Lucy female 20 John male 19 Rose female 19 Jojo female 26 root@lejian: /data # hadoop fs -cat /data/relationB Daivd male 16 Jack male 15 Lily female 22 Lucy female 20 Tom male 25 |
代码1-18
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 | package com.hadoop.mapreduce; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.input.FileSplit; public class DifferenceMap extends Mapper<LongWritable, Text, Person, Text> { private Text relationName = new Text(); protected void setup(Context context) throws java.io.IOException, InterruptedException { FileSplit fileSplit = (FileSplit) context.getInputSplit(); relationName.set(fileSplit.getPath().getName()); }; protected void map(LongWritable key, Text value, Context context) throws java.io.IOException, InterruptedException { Person person = new Person(value.toString()); context.write(person, relationName); }; } |
代码1-19
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 | package com.hadoop.mapreduce; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; public class DifferenceReduce extends Reducer<Person, Text, Person, NullWritable> { private String remove = "" ; protected void setup(Context context) throws java.io.IOException, InterruptedException { Configuration conf = context.getConfiguration(); remove = conf.get( "remove" ); }; protected void reduce(Person key, Iterable<Text> values, Context context) throws java.io.IOException, InterruptedException { for (Text val : values) { if (remove.equals(val.toString())) { return ; } } context.write(key, NullWritable.get()); }; } |
代码1-20
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 | package com.hadoop.mapreduce; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class Difference { public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { if (args == null || args.length != 3 ) { throw new RuntimeException( "请输入输入路径、输出路径和被减集合" ); } Configuration conf = new Configuration(); conf.set( "remove" , args[ 2 ]); Job job = Job.getInstance(conf); job.setJobName( "Difference" ); job.setJarByClass(Difference. class ); job.setMapperClass(DifferenceMap. class ); job.setMapOutputKeyClass(Person. class ); job.setMapOutputValueClass(Text. class ); job.setReducerClass(DifferenceReduce. class ); job.setOutputKeyClass(Person. class ); job.setOutputValueClass(NullWritable. class ); FileInputFormat.addInputPaths(job, args[ 0 ]); FileOutputFormat.setOutputPath(job, new Path(args[ 1 ])); System.exit(job.waitForCompletion( true ) ? 0 : 1 ); } } |
运行代码1-20,运行结果如代码1-21
代码1-21
1 2 3 4 5 6 7 8 9 10 11 | root@lejian: /data # hadoop jar difference.jar com.hadoop.mapreduce.Difference /data /output relationB ………… root@lejian: /data # hadoop fs -ls -R /output -rw-r--r-- 1 root supergroup 0 2017-01-08 08:59 /output/_SUCCESS -rw-r--r-- 1 root supergroup 69 2017-01-08 08:59 /output/part-r-00000 root@lejian: /data # hadoop fs -cat /output/part-r-00000 Tom male 21 Amy female 19 John male 19 Jojo female 26 Rose female 19 |
自然连接
如代码1-22,student集合的第一列是id,第二列是姓名,第三列是性别,第四列是年龄,grade集合第一列是id,第二列是科目,第三列是科目成绩,需要对student集合和grade集合做自然连接。在Map阶段将student和grade中每一条记录r作为value,而记录中的id作为key输出。在Reduce阶段则将同一键收集而来的数据根据它们的来源(student或grade)做笛卡尔积然后将结果输出
代码1-22中,将student集合和grade集合存储在HDFS下的data文件夹中
代码1-22
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 | root@lejian: /data # cat student 1 Amy female 18 2 Tom male 19 3 Sam male 21 4 John male 19 5 Lily female 21 6 Rose female 20 root@lejian: /data # cat grade 1 Math 89 2 Math 75 4 English 85 3 English 95 5 Math 91 5 English 88 6 Math 78 6 English 99 2 English 80 root@lejian: /data # hadoop fs -put student /data root@lejian: /data # hadoop fs -put grade /data root@lejian: /data # hadoop fs -ls -R /data -rw-r--r-- 1 root supergroup 105 2017-01-08 09:59 /data/grade -rw-r--r-- 1 root supergroup 93 2017-01-08 09:59 /data/student |
代码1-23
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 | package com.hadoop.mapreduce; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.input.FileSplit; public class NaturalJoinMap extends Mapper<LongWritable, Text, IntWritable, Text> { private String fileName = "" ; private Text val = new Text(); private IntWritable stuKey = new IntWritable(); protected void setup(Context context) throws java.io.IOException, InterruptedException { FileSplit fileSplit = (FileSplit) context.getInputSplit(); fileName = fileSplit.getPath().getName(); }; protected void map(LongWritable key, Text value, Context context) throws java.io.IOException, InterruptedException { String[] arr = value.toString().split( " " ); stuKey.set(Integer.parseInt(arr[ 0 ])); val.set(fileName + " " + value.toString()); context.write(stuKey, val); }; } |
代码1-24
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 | package com.hadoop.mapreduce; import java.util.ArrayList; import java.util.List; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; public class NaturalJoinReduce extends Reducer<IntWritable, Text, Text, NullWritable> { private Text student = new Text(); private Text value = new Text(); protected void reduce(IntWritable key, Iterable<Text> values, Context context) throws java.io.IOException, InterruptedException { List<String> grades = new ArrayList<String>(); for (Text val : values) { if (val.toString().contains( "student" )) { student.set(studentStr(val.toString())); } else { grades.add(gradeStr(val.toString())); } } for (String grade : grades) { value.set(student.toString() + grade); context.write(value, NullWritable.get()); } }; private String studentStr(String line) { String[] arr = line.split( " " ); StringBuilder str = new StringBuilder(); for ( int i = 1 ; i < arr.length; i++) { str.append(arr[i] + " " ); } return str.toString(); } private String gradeStr(String line) { String[] arr = line.split( " " ); StringBuilder str = new StringBuilder(); for ( int i = 2 ; i < arr.length; i++) { str.append(arr[i] + " " ); } return str.toString(); } } |
代码1-25
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 | package com.hadoop.mapreduce; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class NaturalJoin { public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { if (args == null || args.length != 2 ) { throw new RuntimeException( "请输入输入路径、输出路径" ); } Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setJobName( "NaturalJoin" ); job.setJarByClass(NaturalJoin. class ); job.setMapperClass(NaturalJoinMap. class ); job.setMapOutputKeyClass(IntWritable. class ); job.setMapOutputValueClass(Text. class ); job.setReducerClass(NaturalJoinReduce. class ); job.setOutputKeyClass(IntWritable. class ); job.setOutputValueClass(NullWritable. class ); FileInputFormat.addInputPaths(job, args[ 0 ]); FileOutputFormat.setOutputPath(job, new Path(args[ 1 ])); System.exit(job.waitForCompletion( true ) ? 0 : 1 ); } } |
运行代码1-25,运行结果如代码1-26
代码1-26
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 | root@lejian: /data # hadoop jar naturalJoin.jar com.hadoop.mapreduce.NaturalJoin /data /output ………… root@lejian: /data # hadoop fs -ls -R /output -rw-r--r-- 1 root supergroup 0 2017-01-08 11:19 /output/_SUCCESS -rw-r--r-- 1 root supergroup 237 2017-01-08 11:19 /output/part-r-00000 root@lejian: /data # hadoop fs -cat /output/part-r-00000 1 Amy female 18 Math 89 2 Tom male 19 English 80 2 Tom male 19 Math 75 3 Sam male 21 English 95 4 John male 19 English 85 5 Lily female 21 English 88 5 Lily female 21 Math 91 6 Rose female 20 English 99 6 Rose female 20 Math 78 |
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】博客园社区专享云产品让利特惠,阿里云新客6.5折上折
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· C++代码改造为UTF-8编码问题的总结
· DeepSeek 解答了困扰我五年的技术问题
· 为什么说在企业级应用开发中,后端往往是效率杀手?
· 用 C# 插值字符串处理器写一个 sscanf
· Java 中堆内存和栈内存上的数据分布和特点
· 为DeepSeek添加本地知识库
· .NET程序员AI开发基座:Microsoft.Extensions.AI
· 精选4款基于.NET开源、功能强大的通讯调试工具
· 数据不出内网:基于Ollama+OneAPI构建企业专属DeepSeek智能中台
· 大模型工具KTransformer的安装