MapReduce案例-自定义数据类型
统计第一季度用户的手机上网流量,通话时间,短信数量。
数据格式:
电话 流量 时间 短信数
这里自定义了一个类实现writable接口,用来读取数据。
package demo; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; /** * 统计第一季度用户的手机上网流量,通话时间,短信数量 * @author youxiangyang * */ public class PhoneCount { public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException { if (args.length!=2) { System.err.println("user path err!"); System.exit(-1); } @SuppressWarnings("deprecation") Job job=new Job(new Configuration(), "jobName"); job.setJarByClass(PhoneCount.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(phmap.class); job.setReducerClass(phreduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(PhoneNum.class); job.waitForCompletion(true); } public static class phmap extends Mapper<Object, Text, Text,PhoneNum>{ @Override protected void map(Object key, Text value, Mapper<Object, Text, Text, PhoneNum>.Context context) throws IOException, InterruptedException { String[] lines = value.toString().split("\t"); String pnum=lines[0]; int gprs=Integer.parseInt(lines[1].trim()); int phtime=Integer.parseInt(lines[2]); int phmail=Integer.parseInt(lines[3]); PhoneNum phoneNum = new PhoneNum(gprs,phtime,phmail); context.write(new Text(pnum), phoneNum); } } public static class phreduce extends Reducer<Text, PhoneNum,Text,PhoneNum>{ @Override protected void reduce(Text key, Iterable<PhoneNum> value, Reducer<Text, PhoneNum, Text, PhoneNum>.Context context) throws IOException, InterruptedException { int gsum=0; int pt=0; int pm=0; for (PhoneNum phoneNum : value) { gsum+=phoneNum.getInternate(); pt+=phoneNum.getCalltime(); pm+=phoneNum.getMailcount(); } PhoneNum phoneNum2 = new PhoneNum(gsum,pt,pm); context.write(key, phoneNum2); } } }
package demo; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import org.apache.hadoop.io.Writable; /** * 创建一个自定义的类实现writable接口 * @author youxiangyang * */ public class PhoneNum implements Writable{ private int internate; private int calltime; private int mailcount; //添加一个无参的构造函数 public PhoneNum() { // TODO Auto-generated constructor stub } public PhoneNum(int internate, int calltime, int mailcount) { super(); this.internate = internate; this.calltime = calltime; this.mailcount = mailcount; } //get和setter public int getInternate() { return internate; } public void setInternate(int internate) { this.internate = internate; } public int getCalltime() { return calltime; } public void setCalltime(int calltime) { this.calltime = calltime; } public int getMailcount() { return mailcount; } public void setMailcount(int mailcount) { this.mailcount = mailcount; } @Override public void readFields(DataInput input) throws IOException { // TODO Auto-generated method stub this.internate=input.readInt(); this.calltime=input.readInt(); this.mailcount=input.readInt(); } @Override public void write(DataOutput output) throws IOException { // TODO Auto-generated meouhod stub output.writeInt(internate); output.writeInt(calltime); output.writeInt(mailcount); } @Override public String toString() { return internate + "\t" + calltime + "\t" + mailcount; } }