Spark 之 读取配置连接Mysql和上传到HDFS

一、读取配置

driver.properties

#mysql
driver=com.mysql.jdbc.Driver
url=jdbc:mysql://192.168.56.111:3306/myshops2
user=root
password=root

#hadoop
hadoop_url=hdfs://192.168.56.111:9000
package com.njbdqn.util

import java.io.FileInputStream
import java.util.Properties

object ReadPropertiesFileTool {
  def readProperties(flag:String): Map[String,String] ={
    val prop = new Properties()
    prop.load(new FileInputStream
      (ReadPropertiesFileTool.getClass.getClassLoader.getResource("driver.properties").getPath))
    var map:Map[String,String] = Map.empty
    if(flag.equalsIgnoreCase("mysql")){
      map+=("driver"->prop.getProperty("driver"))
      map+=("url"->prop.getProperty("url"))
      map+=("user"->prop.getProperty("user"))
      map+=("password"->prop.getProperty("password"))
    }else{
      map+=("hadoop_url"->prop.getProperty("hadoop_url"))
    }
    map
  }

}

二、读取resource中配置,操作Mysql

package com.njbdqn.util
import java.util.Properties

import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}

object MYSQLConnection {

  val paramMap = ReadPropertiesFileTool.readProperties("mysql")

    // 读取数据库中指定的表
    def readMySql(spark:SparkSession,tableName:String): DataFrame ={
      val map:Map[String,String] = Map(
        "driver"->paramMap("driver"),
        "url"->paramMap("url"),
        "user"->paramMap("user"),
        "password"->paramMap("password"),
        "dbtable"->tableName
      )
      spark.read.format("jdbc").options(map) // Adds input options for the underlying data source
        .load()
    }

  // 将df写入数据库到指定的表
  def writeTable(spark:SparkSession,df:DataFrame,tableName:String): Unit ={
    val prop = new Properties()
    prop.put("user","root")
    prop.put("password","root")
    df.write.mode(SaveMode.Overwrite).jdbc("jdbc:mysql://192.168.56.111:3306/myshops2",tableName,prop)
  }

}

三、上传/下载HDFS

package com.njbdqn.util

import org.apache.spark.ml.classification.{LogisticRegression, LogisticRegressionModel}
import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}

/**
 * HDFS操作
 */
object HDFSConnection {

  val paramMap = ReadPropertiesFileTool.readProperties("hadoop")

  /**
   * 将数据写入到hdfs
   */
  def writeDataToHDFS(path:String,df:DataFrame): Unit ={
    df.write.mode(SaveMode.Overwrite).save(paramMap("hadoop_url")+path)
  }

  /**
   * 从hdfs的指定位置读到内存中
   */
  def readDataToHDFS(spark:SparkSession,path:String): DataFrame ={
    spark.read.parquet(paramMap("hadoop_url")+path)
  }
  /**
   * 从hdfs读取LR
   */
  def readLRModelToHDFS(path:String): LogisticRegressionModel ={
    LogisticRegressionModel.read.load(paramMap("hadoop_url")+path)
  }

  /**
   *  LR模型写入HDFS
   */
  def writeLRModelToHDFS(lr:LogisticRegressionModel,path:String): Unit ={
    lr.save(paramMap("hadoop_url")+path)
  }

}

 

posted @ 2020-10-25 17:01  PEAR2020  阅读(543)  评论(0编辑  收藏  举报