import; import; import; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reporter; public class MapBus extends MapReduceBase implements Mapper<LongWritable, Text, Text, LongWritable> { @Override public void map(LongWritable key, Text date, OutputCollector<Text, LongWritable> output, Reporter reporter) throws IOException { //2013-01-11,-200 String line = date.toString(); if(line.contains(",")){ String[] tmp = line.split(","); String month = tmp[0].substring(5, 7); int money = Integer.valueOf(tmp[1]).intValue(); output.collect(new Text(month), new LongWritable(money)); } } }
import; import java.util.Iterator; import; import; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; public class ReduceBus extends MapReduceBase implements Reducer<Text, LongWritable, Text, LongWritable> { @Override public void reduce(Text month, Iterator<LongWritable> money, OutputCollector<Text, LongWritable> output, Reporter reporter) throws IOException { int total_money = 0; while(money.hasNext()){ total_money +=; } output.collect(month, new LongWritable(total_money)); } }
import org.apache.hadoop.fs.Path; import; import; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; public class Wallet { public static void main(String[] args){ if(args.length != 2){ System.err.println("param error!"); System.exit(-1); } JobConf jobConf = new JobConf(Wallet.class); jobConf.setJobName("My Wallet"); FileInputFormat.addInputPath(jobConf, new Path(args[0])); FileOutputFormat.setOutputPath(jobConf, new Path(args[1])); jobConf.setMapperClass(MapBus.class); jobConf.setReducerClass(ReduceBus.class); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(LongWritable.class); try{ JobClient.runJob(jobConf); }catch(Exception e){ e.printStackTrace(); } } }
2013-01-01,100 2013-01-02,-100 2013-01-07,100 2013-01-10,-100 2013-01-11,100 2013-01-21,-100 2013-01-22,100 2013-01-25,-100 2013-01-27,100 2013-01-18,-100 2013-01-09,500
设置好运行参数后,就可以通过Run As -> Java Application运行MapReduce程序了 Failed to set permissions of path: \tmp\hadoop-linkage\mapred\staging\linkage1150562408\.staging to 0700
14/02/11 10:54:16 INFO jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId= 14/02/11 10:54:16 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same. 14/02/11 10:54:16 WARN mapred.JobClient: No job jar file set. User classes may not be found. See JobConf(Class) or JobConf#setJar(String). 14/02/11 10:54:16 INFO mapred.FileInputFormat: Total input paths to process : 2 14/02/11 10:54:17 INFO mapred.JobClient: Running job: job_local_0001 14/02/11 10:54:17 INFO mapred.FileInputFormat: Total input paths to process : 2 14/02/11 10:54:17 INFO mapred.MapTask: numReduceTasks: 1 14/02/11 10:54:17 INFO mapred.MapTask: io.sort.mb = 100 14/02/11 10:54:17 INFO mapred.MapTask: data buffer = 79691776/99614720 14/02/11 10:54:17 INFO mapred.MapTask: record buffer = 262144/327680 14/02/11 10:54:17 INFO mapred.MapTask: Starting flush of map output 14/02/11 10:54:18 INFO mapred.MapTask: Finished spill 0 14/02/11 10:54:18 INFO mapred.TaskRunner: Task:attempt_local_0001_m_000000_0 is done. And is in the process of commiting 14/02/11 10:54:18 INFO mapred.LocalJobRunner: file:/E:/cygwin_root/home/input/2013-01.txt:0+179 14/02/11 10:54:18 INFO mapred.TaskRunner: Task 'attempt_local_0001_m_000000_0' done. 14/02/11 10:54:18 INFO mapred.MapTask: numReduceTasks: 1 14/02/11 10:54:18 INFO mapred.MapTask: io.sort.mb = 100 14/02/11 10:54:18 INFO mapred.MapTask: data buffer = 79691776/99614720 14/02/11 10:54:18 INFO mapred.MapTask: record buffer = 262144/327680 14/02/11 10:54:18 INFO mapred.MapTask: Starting flush of map output 14/02/11 10:54:18 INFO mapred.MapTask: Finished spill 0 14/02/11 10:54:18 INFO mapred.TaskRunner: Task:attempt_local_0001_m_000001_0 is done. And is in the process of commiting 14/02/11 10:54:18 INFO mapred.LocalJobRunner: file:/E:/cygwin_root/home/input/2013-02.txt:0+16 14/02/11 10:54:18 INFO mapred.TaskRunner: Task 'attempt_local_0001_m_000001_0' done. 14/02/11 10:54:18 INFO mapred.LocalJobRunner: 14/02/11 10:54:18 INFO mapred.Merger: Merging 2 sorted segments 14/02/11 10:54:18 INFO mapred.Merger: Down to the last merge-pass, with 2 segments left of total size: 160 bytes 14/02/11 10:54:18 INFO mapred.LocalJobRunner: 14/02/11 10:54:18 INFO mapred.TaskRunner: Task:attempt_local_0001_r_000000_0 is done. And is in the process of commiting 14/02/11 10:54:18 INFO mapred.LocalJobRunner: 14/02/11 10:54:18 INFO mapred.TaskRunner: Task attempt_local_0001_r_000000_0 is allowed to commit now 14/02/11 10:54:18 INFO mapred.FileOutputCommitter: Saved output of task 'attempt_local_0001_r_000000_0' to file:/E:/cygwin_root/home/output 14/02/11 10:54:18 INFO mapred.LocalJobRunner: reduce > reduce 14/02/11 10:54:18 INFO mapred.TaskRunner: Task 'attempt_local_0001_r_000000_0' done. 14/02/11 10:54:18 INFO mapred.JobClient: map 100% reduce 100% 14/02/11 10:54:18 INFO mapred.JobClient: Job complete: job_local_0001 14/02/11 10:54:18 INFO mapred.JobClient: Counters: 13 14/02/11 10:54:18 INFO mapred.JobClient: FileSystemCounters 14/02/11 10:54:18 INFO mapred.JobClient: FILE_BYTES_READ=39797 14/02/11 10:54:18 INFO mapred.JobClient: FILE_BYTES_WRITTEN=80473 14/02/11 10:54:18 INFO mapred.JobClient: Map-Reduce Framework 14/02/11 10:54:18 INFO mapred.JobClient: Reduce input groups=2 14/02/11 10:54:18 INFO mapred.JobClient: Combine output records=0 14/02/11 10:54:18 INFO mapred.JobClient: Map input records=12 14/02/11 10:54:18 INFO mapred.JobClient: Reduce shuffle bytes=0 14/02/11 10:54:18 INFO mapred.JobClient: Reduce output records=2 14/02/11 10:54:18 INFO mapred.JobClient: Spilled Records=24 14/02/11 10:54:18 INFO mapred.JobClient: Map output bytes=132 14/02/11 10:54:18 INFO mapred.JobClient: Map input bytes=195 14/02/11 10:54:18 INFO mapred.JobClient: Combine input records=0 14/02/11 10:54:18 INFO mapred.JobClient: Map output records=12 14/02/11 10:54:18 INFO mapred.JobClient: Reduce input records=12
01 500 02 100
org.apache.hadoop.mapred.FileAlreadyExistsException: Output directory file:/E:/cygwin_root/home/output already exists