SparkSQL与SparkCore出指标

scala：2.10.6

pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>job2</groupId>
    <artifactId>JobNew</artifactId>
    <version>1.0-SNAPSHOT</version>
    <dependencies>

        <!--<dependency>-->
        <!--<groupId>org.apache</groupId>-->
        <!--<artifactId>spark-assembly-1.6.0-hadoop-2.6.0</artifactId>-->
        <!--<version>1.6.0</version>-->
        <!--</dependency>-->

        <dependency>
            <groupId>log4j</groupId>
            <artifactId>log4j</artifactId>
            <version>1.2.17</version>
        </dependency>
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>5.1.31</version>
        </dependency>



        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-graphx_2.10</artifactId>
            <version>1.6.0</version>
        </dependency>



        <dependency>
            <groupId>com.typesafe</groupId>
            <artifactId>config</artifactId>
            <version>1.3.1</version>
        </dependency>

        <dependency>
            <groupId>redis.clients</groupId>
            <artifactId>jedis</artifactId>
            <version>2.9.0</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase</artifactId>
            <version>1.1.1</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-common</artifactId>
            <version>1.1.1</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-server</artifactId>
            <version>1.1.1</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>1.1.1</version>
        </dependency>

        <dependency>
            <groupId>com.google.code.gson</groupId>
            <artifactId>gson</artifactId>
            <version>2.8.4</version>
        </dependency>


        <dependency>
            <groupId>ch.hsr</groupId>
            <artifactId>geohash</artifactId>
            <version>1.3.0</version>
        </dependency>

        <dependency>
            <groupId>org.scalikejdbc</groupId>
            <artifactId>scalikejdbc-core_2.10</artifactId>
            <version>2.5.0</version>
        </dependency>

        <dependency>
            <groupId>org.scalikejdbc</groupId>
            <artifactId>scalikejdbc-config_2.10</artifactId>
            <version>2.5.0</version>
        </dependency>

        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql_2.10</artifactId>
            <version>1.6.0</version>
        </dependency>
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.36</version>
        </dependency>
    </dependencies>
    <build>
        <sourceDirectory>src/main/scala</sourceDirectory>
        <plugins>
            <plugin>
                <!-- see http://davidb.github.com/scala-maven-plugin -->
                <groupId>net.alchim31.maven</groupId>
                <artifactId>scala-maven-plugin</artifactId>
                <version>3.1.3</version>
                <executions>
                    <execution>
                        <goals>
                            <goal>compile</goal>
                            <goal>testCompile</goal>
                        </goals>
                        <configuration>
                            <args>
                                <arg>-make:transitive</arg> 
                               <arg>-dependencyfile</arg>
                                <arg>${project.build.directory}/.scala_dependencies</arg> 
                           </args>                        
</configuration>                    
</execution>                
</executions>            
</plugin>            
<plugin>                
<groupId>org.apache.maven.plugins</groupId>                
<artifactId>maven-surefire-plugin</artifactId>               
 <version>2.13</version>               
 <configuration>                    
<useFile>false</useFile>                    
<disableXmlReport>true</disableXmlReport>                   
 <!-- If you have classpath issue like NoDefClassError,... -->                   
 <!-- useManifestOnlyJar>false</useManifestOnlyJar -->                    
<includes>                        <include>**/*Test.*</include>                       
 <include>**/*Suite.*</include>                   
 </includes>                
</configuration>          
  </plugin>        </plugins>    </build></project>

SparkSQL方式

package com.devicetype

import java.util.Properties

import com.typesafe.config.ConfigFactory
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.{SQLContext, SaveMode}

object Devicetype {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName(this.getClass.getName).setMaster("local[*]").set("spark.serializer","org.apache.spark.serializer.KryoSerializer")
    val sc = new SparkContext(conf)
    val sQLContext = new SQLContext(sc)

    val file = sQLContext.read.parquet("D:\\Job1\\parquet5\\part-r-00000-dcccc879-86a0-47d5-91e1-83636cd561d0.gz.parquet")
    sQLContext.setConf("spark.sql.parquet.compression.codec","snappy")

    file.registerTempTable("devicetype")
    //devicetype
    val df = sQLContext.sql(
      """
        |select case when devicetype = 1 then "手机" when devicetype = 2 then "平板" else "未知" end devicetype,
        |sum(case when requestmode = 1 and processnode >=1 then 1 else 0 end) ysrequest,
        |sum(case when requestmode = 1 and processnode >=2 then 1 else 0 end) yxrequest,
        |sum(case when requestmode = 1 and processnode =3 then 1 else 0 end) adrequest,
        |sum(case when iseffective = 1 and isbilling =1 and isbid = 1 then 1 else 0 end) cybid,
        |sum(case when iseffective = 1 and isbilling =1 and iswin = 1 and adorderid != 0 then 1 else 0 end) cybidsuccees,
        |sum(case when requestmode = 2 and iseffective =1 then 1 else 0 end) shows,
        |sum(case when requestmode = 3 and iseffective =1 then 1 else 0 end) clicks,
        |sum(case when iseffective = 1 and isbilling =1 and iswin = 1 then winprice/1000 else 0 end) dspcost,
        |sum(case when iseffective = 1 and isbilling =1 and iswin = 1 then adpayment/1000 else 0 end) dsppay
        |from devicetype group by devicetype
      """.stripMargin)

    val load = ConfigFactory.load()
    val properties = new Properties()
    properties.setProperty("user",load.getString("jdbc.user"))
    properties.setProperty("password",load.getString("jdbc.password"))
    // 存入数据库
    df.write.mode(SaveMode.Append).jdbc(load.getString("jdbc.url"),"devicetype1",properties)
  }
}






/*************************************************/
SparkCore

package com.driver

import com.utils.RptUtils
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.{DataFrame, SQLContext}

object DriverData_SparkCore {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName(this.getClass.getName).setMaster("local[*]").set("spark.serializer","org.apache.spark.serializer.KryoSerializer")
    val sc = new SparkContext(conf)
    val sQLContext = new SQLContext(sc)
    //设置反序列化方式
    sQLContext.setConf("spark.sql.parquet.compression.codec","snappy")
    val file: DataFrame = sQLContext.read.parquet("D:\\Job1\\parquet5\\part-r-00000-dcccc879-86a0-47d5-91e1-83636cd561d0.gz.parquet")

    file.map(row=>{
      // 先获取 原始请求，有效请求，广告请求
      val requestmode = row.getAs[Int]("requestmode")
      val processnode = row.getAs[Int]("processnode")
      // 参与竞价数，成功数，展示数  点击数
      val iseffective = row.getAs[Int]("iseffective")
      val isbilling = row.getAs[Int]("isbilling")
      val isbid = row.getAs[Int]("isbid")
      val iswin = row.getAs[Int]("iswin")
      val adorderid = row.getAs[Int]("adorderid")
      val winprice = row.getAs[Double]("winprice")
      val ad = row.getAs[Double]("adpayment")
      // 编写业务方法，进行调用 原始请求，有效请求，广告请求
      val reqlist = RptUtils.req(requestmode,processnode)
      // 参与竞价数，成功数，展示数  点击数
      val adlist = RptUtils.addap(iseffective,isbilling,isbid,iswin,adorderid,winprice,ad)
      // 点击数 展示数
      val adCountlist = RptUtils.Counts(requestmode,iseffective)
      // 取值地域维度
      (row.getAs[String]("ispname"),
        reqlist ++ adlist ++ adCountlist
      )
    }).reduceByKey((list1,list2)=>{
      // list1(0,1,1,0) list2(1,1,1,1) zip((0,1),(1,1),(1,1),(0,1))
      list1.zip(list2).map(t=>t._1+t._2)
    })// 调整下方位
      .map(t=>t._1+" , "+t._2.mkString(","))
      // 将结果数据存入hdfs
      .saveAsTextFile("D:\\Job1\\SparkCore")
  }
}

package com.utils

object RptUtils {
  def req(reqMode:Int,proMode:Int):List[Double]={
    if(reqMode == 1 && proMode ==1){
      // 第一个元素 原始请求数
      // 第二个元素 有效请求数
      // 第三个元素 广告请求数
      List[Double](1,0,0)
    }else if(reqMode == 1 && proMode ==2){
      List[Double](1,1,0)
    }else if(reqMode == 1 && proMode ==3){
      List[Double](1,1,1)
    }else{
      List[Double](0,0,0)
    }
  }
  def addap(iseffective:Int,isbilling:Int,
            isbid:Int,iswin:Int,adorderid:Int,winprice:Double,ad:Double):List[Double]={
    if(iseffective==1 && isbilling==1 && isbid ==1){
      if(iseffective==1 && isbilling==1 && iswin ==1 && adorderid !=0){
        List[Double](1,1,winprice/1000.0,ad/1000.0)
      }else{
        List[Double](1,0,0,0)
      }
    }else{
      List[Double](0,0,0,0)
    }
  }
  def Counts(requestmode:Int,iseffective:Int): List[Double] ={
    if(requestmode ==2 && iseffective ==1){
      List[Double](1,0)
    }else if(requestmode ==3 && iseffective ==1){
      List[Double](0,1)
    }else{
      List[Double](0,0)
    }
  }
}

posted @ 2019-03-12 22:03 VIP8cnbl 阅读(526) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

零壹

SparkSQL与SparkCore出指标

公告