SparkSQL与SparkCore出指标

scala:2.10.6

pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>job2</groupId>
<artifactId>JobNew</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>

<!--<dependency>-->
<!--<groupId>org.apache</groupId>-->
<!--<artifactId>spark-assembly-1.6.0-hadoop-2.6.0</artifactId>-->
<!--<version>1.6.0</version>-->
<!--</dependency>-->

<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.31</version>
</dependency>



<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-graphx_2.10</artifactId>
<version>1.6.0</version>
</dependency>



<dependency>
<groupId>com.typesafe</groupId>
<artifactId>config</artifactId>
<version>1.3.1</version>
</dependency>

<dependency>
<groupId>redis.clients</groupId>
<artifactId>jedis</artifactId>
<version>2.9.0</version>
</dependency>

<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase</artifactId>
<version>1.1.1</version>
</dependency>

<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>
<version>1.1.1</version>
</dependency>

<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>1.1.1</version>
</dependency>

<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>1.1.1</version>
</dependency>

<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.8.4</version>
</dependency>


<dependency>
<groupId>ch.hsr</groupId>
<artifactId>geohash</artifactId>
<version>1.3.0</version>
</dependency>

<dependency>
<groupId>org.scalikejdbc</groupId>
<artifactId>scalikejdbc-core_2.10</artifactId>
<version>2.5.0</version>
</dependency>

<dependency>
<groupId>org.scalikejdbc</groupId>
<artifactId>scalikejdbc-config_2.10</artifactId>
<version>2.5.0</version>
</dependency>

<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.10</artifactId>
<version>1.6.0</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.36</version>
</dependency>
</dependencies>
<build>
<sourceDirectory>src/main/scala</sourceDirectory>
<plugins>
<plugin>
<!-- see http://davidb.github.com/scala-maven-plugin -->
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>3.1.3</version>
<executions>
<execution>
<goals>
<goal>compile</goal>
<goal>testCompile</goal>
</goals>
<configuration>
<args>
<arg>-make:transitive</arg>
<arg>-dependencyfile</arg>
<arg>${project.build.directory}/.scala_dependencies</arg>
</args>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.13</version>
<configuration>
<useFile>false</useFile>
<disableXmlReport>true</disableXmlReport>
<!-- If you have classpath issue like NoDefClassError,... -->
<!-- useManifestOnlyJar>false</useManifestOnlyJar -->
<includes> <include>**/*Test.*</include>
<include>**/*Suite.*</include>
</includes>
</configuration>
</plugin> </plugins> </build></project>

SparkSQL方式

package com.devicetype

import java.util.Properties

import com.typesafe.config.ConfigFactory
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.{SQLContext, SaveMode}

object Devicetype {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName(this.getClass.getName).setMaster("local[*]").set("spark.serializer","org.apache.spark.serializer.KryoSerializer")
val sc = new SparkContext(conf)
val sQLContext = new SQLContext(sc)

val file = sQLContext.read.parquet("D:\\Job1\\parquet5\\part-r-00000-dcccc879-86a0-47d5-91e1-83636cd561d0.gz.parquet")
sQLContext.setConf("spark.sql.parquet.compression.codec","snappy")

file.registerTempTable("devicetype")
//devicetype
val df = sQLContext.sql(
"""
|select case when devicetype = 1 then "手机" when devicetype = 2 then "平板" else "未知" end devicetype,
|sum(case when requestmode = 1 and processnode >=1 then 1 else 0 end) ysrequest,
|sum(case when requestmode = 1 and processnode >=2 then 1 else 0 end) yxrequest,
|sum(case when requestmode = 1 and processnode =3 then 1 else 0 end) adrequest,
|sum(case when iseffective = 1 and isbilling =1 and isbid = 1 then 1 else 0 end) cybid,
|sum(case when iseffective = 1 and isbilling =1 and iswin = 1 and adorderid != 0 then 1 else 0 end) cybidsuccees,
|sum(case when requestmode = 2 and iseffective =1 then 1 else 0 end) shows,
|sum(case when requestmode = 3 and iseffective =1 then 1 else 0 end) clicks,
|sum(case when iseffective = 1 and isbilling =1 and iswin = 1 then winprice/1000 else 0 end) dspcost,
|sum(case when iseffective = 1 and isbilling =1 and iswin = 1 then adpayment/1000 else 0 end) dsppay
|from devicetype group by devicetype
""".stripMargin)

val load = ConfigFactory.load()
val properties = new Properties()
properties.setProperty("user",load.getString("jdbc.user"))
properties.setProperty("password",load.getString("jdbc.password"))
// 存入数据库
df.write.mode(SaveMode.Append).jdbc(load.getString("jdbc.url"),"devicetype1",properties)
}
}






/*************************************************/
SparkCore
package com.driver

import com.utils.RptUtils
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.{DataFrame, SQLContext}

object DriverData_SparkCore {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName(this.getClass.getName).setMaster("local[*]").set("spark.serializer","org.apache.spark.serializer.KryoSerializer")
val sc = new SparkContext(conf)
val sQLContext = new SQLContext(sc)
//设置反序列化方式
sQLContext.setConf("spark.sql.parquet.compression.codec","snappy")
val file: DataFrame = sQLContext.read.parquet("D:\\Job1\\parquet5\\part-r-00000-dcccc879-86a0-47d5-91e1-83636cd561d0.gz.parquet")

file.map(row=>{
// 先获取 原始请求,有效请求,广告请求
val requestmode = row.getAs[Int]("requestmode")
val processnode = row.getAs[Int]("processnode")
// 参与竞价数,成功数,展示数 点击数
val iseffective = row.getAs[Int]("iseffective")
val isbilling = row.getAs[Int]("isbilling")
val isbid = row.getAs[Int]("isbid")
val iswin = row.getAs[Int]("iswin")
val adorderid = row.getAs[Int]("adorderid")
val winprice = row.getAs[Double]("winprice")
val ad = row.getAs[Double]("adpayment")
// 编写业务方法,进行调用 原始请求,有效请求,广告请求
val reqlist = RptUtils.req(requestmode,processnode)
// 参与竞价数,成功数,展示数 点击数
val adlist = RptUtils.addap(iseffective,isbilling,isbid,iswin,adorderid,winprice,ad)
// 点击数 展示数
val adCountlist = RptUtils.Counts(requestmode,iseffective)
// 取值地域维度
(row.getAs[String]("ispname"),
reqlist ++ adlist ++ adCountlist
)
}).reduceByKey((list1,list2)=>{
// list1(0,1,1,0) list2(1,1,1,1) zip((0,1),(1,1),(1,1),(0,1))
list1.zip(list2).map(t=>t._1+t._2)
})// 调整下方位
.map(t=>t._1+" , "+t._2.mkString(","))
// 将结果数据存入hdfs
.saveAsTextFile("D:\\Job1\\SparkCore")
}
}

package com.utils

object RptUtils {
def req(reqMode:Int,proMode:Int):List[Double]={
if(reqMode == 1 && proMode ==1){
// 第一个元素 原始请求数
// 第二个元素 有效请求数
// 第三个元素 广告请求数
List[Double](1,0,0)
}else if(reqMode == 1 && proMode ==2){
List[Double](1,1,0)
}else if(reqMode == 1 && proMode ==3){
List[Double](1,1,1)
}else{
List[Double](0,0,0)
}
}
def addap(iseffective:Int,isbilling:Int,
isbid:Int,iswin:Int,adorderid:Int,winprice:Double,ad:Double):List[Double]={
if(iseffective==1 && isbilling==1 && isbid ==1){
if(iseffective==1 && isbilling==1 && iswin ==1 && adorderid !=0){
List[Double](1,1,winprice/1000.0,ad/1000.0)
}else{
List[Double](1,0,0,0)
}
}else{
List[Double](0,0,0,0)
}
}
def Counts(requestmode:Int,iseffective:Int): List[Double] ={
if(requestmode ==2 && iseffective ==1){
List[Double](1,0)
}else if(requestmode ==3 && iseffective ==1){
List[Double](0,1)
}else{
List[Double](0,0)
}
}
}
posted @ 2019-03-12 22:03  VIP8cnbl  阅读(526)  评论(0编辑  收藏  举报