scala JobConf实现

 

package first_mapreduce_scala

import org.apache.hadoop.mapreduce.Job
import java.util._
import org.apache.hadoop.fs.Path
import org.apache.hadoop.conf._
import org.apache.hadoop.io._
import org.apache.hadoop.mapred._
import org.apache.hadoop.util._
import flowsum.FlowBean
import org.apache.hadoop.io.NullWritable

class Map extends MapReduceBase with Mapper[LongWritable, Text, Text, FlowBean] {
  private val one = new IntWritable(1)
  private val word = new Text()
  override def map(key:LongWritable, value:Text, output:OutputCollector[Text, FlowBean], reporter:Reporter) {
     val line = value.toString()
     if (line.isEmpty) return
     val tokens: Array[String] = line.split(" ")
     var username : String = tokens(1)
     output.collect(new Text(username), new FlowBean(username, tokens(2).toLong, 1))
  }
}

class Reduce extends MapReduceBase with Reducer[Text, FlowBean, Text, FlowBean] {
   override def reduce(key: Text, values: Iterator[FlowBean], output: OutputCollector[Text, FlowBean],  reporter: Reporter) {
     var numberReadCount:Long = 0;
     var numberArticleCount:Long = 0;
     while(values.hasNext){
             var bean = values.next;
          numberReadCount += bean.getNumberRead;
          numberArticleCount += bean.getNumberArticle;
     }
      output.collect(key, new FlowBean(key.toString(), numberReadCount, numberArticleCount));
   }
}

class RemovallMap extends MapReduceBase with Mapper[Object, Text, Text, NullWritable] {
  private val one = new IntWritable(1)
  private val word = new Text()
  override def map(key:Object, value:Text, output:OutputCollector[Text, NullWritable], reporter:Reporter) {
    output.collect(value, NullWritable.get);
  }
}

class RemovallReduce extends MapReduceBase with Reducer[Text, NullWritable, Text, NullWritable] {
   override def reduce(key: Text, values: Iterator[NullWritable], output: OutputCollector[Text, NullWritable],  reporter: Reporter) {
     output.collect(key, NullWritable.get);
   }
}

object WordCount {
  def main(args: Array[String]) {
    var conf = new JobConf(this.getClass)
    conf.setJobName("yiyi")
    conf.setOutputKeyClass(classOf[Text])
    conf.setOutputValueClass(classOf[FlowBean])
    conf.setMapperClass(classOf[Map])
    conf.setCombinerClass(classOf[Reduce])
    conf.setReducerClass(classOf[Reduce])
    conf.setInputFormat(classOf[TextInputFormat])
    conf.setOutputFormat(classOf[TextOutputFormat[Text, FlowBean]])
    FileInputFormat.setInputPaths(conf, new Path("/Users/lihu/Desktop/crawle/tap.txt"))
    FileOutputFormat.setOutputPath(conf, new Path("/Users/lihu/Desktop/crawle/logsfajflkawa"))
    JobClient.runJob(conf)

  }
}

 

posted on 2017-02-04 15:03  sunyaxue  阅读(384)  评论(0编辑  收藏  举报

导航