02_Hadoop序列化_2.2 自定义Bean对象 实现序列化接口(Writable)

 

 代码示例

复制代码
package GroupByPoneNumPk {

  import java.io.{DataInput, DataOutput}
  import java.lang

  import org.apache.hadoop.conf.Configuration
  import org.apache.hadoop.fs.Path
  import org.apache.hadoop.io.{LongWritable, Text, Writable}
  import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
  import org.apache.hadoop.mapreduce.{Job, Mapper, Reducer}

  // Mapper 类
  // 每个Mapper类实例 处理一个切片文件
  class GroupByPoneNumMapper extends Mapper[LongWritable, Text, Text, FlowBean] {
    var text = new Text

    // 每行记录调用一次map方法
    override def map(key: LongWritable, value: Text, context: Mapper[LongWritable, Text, Text, FlowBean]#Context) = {
      //1363157985066   13726230503 00-FD-07-A4-72-B8:CMCC  120.196.100.82  i02.c.aliimg.com        24  27  2481    24681   200
      //1. 切割数据
      var line: Array[String] = value.toString.split(" +")
      println("第一行 : " + line.mkString("-"))
      var phone = line(1)
      var upflow = line.reverse(2)
      var downflow = line.reverse(1)

      //2. 创建FlowBean对象
      var flowBean = new FlowBean(upflow.toInt, downflow.toInt, 0)

      //3. 写入到环形缓冲区
      text.set(phone)
      context.write(text, flowBean)
      println(flowBean)
    }
  }


  // Reducer 类
  // 所有Mapper实例 执行完毕后 Reducer才会执行
  // Mapper类的输出类型 = Reducer类的输入类型
  class GroupByPoneNumReducer extends Reducer[Text, FlowBean, Text, FlowBean] {

    // 每个key调用一次
    override def reduce(key: Text, values: lang.Iterable[FlowBean], context: Reducer[Text, FlowBean, Text, FlowBean]#Context) = {
      println("reduce into ....")
      //1. 对 upflow、downflow求和
      var sumUpflow = 0
      var sumDownflow = 0
      values.forEach(
        bean => {
          sumUpflow += bean.upflow
          sumDownflow += bean.downflow
        }
      )

      //2. 求 总流量
      var flowBean = new FlowBean(sumUpflow, sumDownflow, sumUpflow + sumDownflow)

      //2. 写出数据
      context.write(key, flowBean)
      println("第二行 :" + flowBean)

    }
  }

  // Driver
  object Driver {
    def main(args: Array[String]): Unit = {
      //1. 获取配置信息以及 获取job对象
      var configuration =
        new Configuration
      var job: Job =
        Job.getInstance(configuration)

      //2. 注册本Driver程序的jar
      job.setJarByClass(this.getClass)

      job.setJobName("scala mr")

      //3. 注册 Mapper 和 Reducer的jar
      job.setMapperClass(classOf[GroupByPoneNumMapper])
      job.setReducerClass(classOf[GroupByPoneNumReducer])

      //4. 设置Mapper 类输出key-value 数据类型
      job.setMapOutputKeyClass(classOf[Text])
      job.setMapOutputValueClass(classOf[FlowBean])

      //5. 设置最终输出key-value 数据类型
      job.setOutputKeyClass(classOf[Text])
      job.setOutputValueClass(classOf[FlowBean])

      //6. 设置输入输出路径
      FileInputFormat.setInputPaths(job, new Path("src/main/data/input/phone_data.txt"))
      FileOutputFormat.setOutputPath(job, new Path("src/main/data/output"))

      //7. 提交job
      val bool: Boolean =
        job.waitForCompletion(true)
      System.exit(bool match {
        case true => "0".toInt
        case false => "1".toInt
      })

    }

  }


  //  1363157985066 id
  //  13726230503 手机号
  //  00-FD-07-A4-72-B8:CMCC MAC地址
  //  120.196.100.82 网络ip
  //  i02.c.aliimg.com 域名
  //  24
  //  27
  //  2481 上行流量
  //  24681 下行流量
  //  200  网络状态码
  class FlowBean() extends Writable {
    var upflow = 0
    var downflow = 0
    var sumflow = 0

    //辅助构造器
    def this(upflow: Int, downflow: Int, sumflow: Int) {
      this()
      this.upflow = upflow
      this.downflow = downflow
      this.sumflow = sumflow
    }

    override def write(out: DataOutput): Unit = {
      out.writeInt(upflow)
      out.writeInt(downflow)
      out.writeInt(sumflow)
    }

    override def readFields(in: DataInput): Unit = {
      upflow = in.readInt
      downflow = in.readInt
      sumflow = in.readInt
    }

    override def toString: String = {
      s"${upflow} \t ${downflow} \t ${sumflow}"
    }
  }


}
复制代码

 

posted @   学而不思则罔!  阅读(143)  评论(0编辑  收藏  举报
编辑推荐:
· 10年+ .NET Coder 心语,封装的思维:从隐藏、稳定开始理解其本质意义
· .NET Core 中如何实现缓存的预热?
· 从 HTTP 原因短语缺失研究 HTTP/2 和 HTTP/3 的设计差异
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 基于Microsoft.Extensions.AI核心库实现RAG应用
阅读排行:
· 10年+ .NET Coder 心语 ── 封装的思维:从隐藏、稳定开始理解其本质意义
· 地球OL攻略 —— 某应届生求职总结
· 提示词工程——AI应用必不可少的技术
· 字符编码:从基础到乱码解决
· SpringCloud带你走进微服务的世界
点击右上角即可分享
微信分享提示