spark mr 文件统计

lyzx1,19
lyzx2,20
lyzx3,21
lyzx4,22
lyzx5,23
lyzx6,24
lyzx7,25
托塔天王
lyzx7,25,哈哈
package com.zxwa.live.process.test

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object ScalaTs {


  def main(args: Array[String]): Unit = {

    val sparkContext = new SparkContext(new SparkConf().setAppName("ProductSalesStat").setMaster("local[*]"))
    val rdd = sparkContext.textFile("E:\\Data\\LIVE-DATA-SPARK\\src\\main\\resources\\people.txt")
    rdd.map(line => line.split(","))
      .map(rt => 
if (rt.length == 1) rt(0) else if (rt.length == 2) (rt(0), rt(1)) else (rt(0), rt(1), rt(2)) ) .map { case (one: String) => "one:" + one case (name: String, age: String) => ("name:" + name, "age:" + age) case _ => ("_name", "_age", "_") } .foreach(println) } }

 

posted @ 2022-05-06 21:22  Bonnie_ξ  阅读(22)  评论(0编辑  收藏  举报