spark在针对MySQL数据库主键对其余字段进行更新

package com.ustcinfo.SDK
import java.sql.{Connection, PreparedStatement}
import org.apache.hadoop.io.{LongWritable, Text}
import org.apache.hadoop.mapred.TextInputFormat
import org.apache.spark.sql._

object SDK_number {

  val spark: SparkSession = SparkSession
	.builder()
	.appName("SDK数据入库")
	.master("local[2]")
	.config("spark.debug.maxToStringFields", 1000)
	.getOrCreate()
  spark.sparkContext.setLogLevel("WARN")

  import spark.implicits._ //导入隐式转换
  import org.apache.spark.sql.functions._ //导入function函数

  def main(args: Array[String]): Unit = {

	val str = args(0)
	val gbkPath = s"/bdtj/test/app$str.txt"

val res_mysql: DataFrame = spark.sparkContext.hadoopFile(gbkPath, classOf[TextInputFormat], classOf[LongWritable], classOf[Text], 1)
	  .map(p => new String(p._2.getBytes, 0, p._2.getLength, "UTF-8"))
	  //.filter(_.split("\\|\\+\\|", -1).length == 4)
	  .map(lines => {
	  val line: Array[String] = lines.split("\\|\\+\\|", -1)
	  (line(0).drop(1), line(1), line(2), line(3))
	}
	).toDF("ID", "FIVE_ID", "THREE_ID", "TESTTIME")
	res_mysql.show(100, false)

//数据以DF格式进行foreachPartition对每一条数据进行遍历
	res_mysql.select("ID", "FIVE_ID", "THREE_ID").foreachPartition(f => {
	  val connection: Connection = createConnection()//调用createConnection方法,实现对数据库连接
	  connection.setAutoCommit(false)
		//设置connection.setautocommit(false);只有程序调用connection.commit()的时候才会将先前执行的语句一起提交到数据库,这样就实现了数据库的事务。
//true:sql命令的提交(commit)由驱动程序负责
//false:sql命令的提交由应用程序负责,程序必须调用commit或者rollback方法
	  val sql: String = "insert into sdk_grid_num(ID,FIVE_ID,THREE_ID)" +
		"value(?,?,?) on duplicate key update FIVE_ID=?,THREE_ID=?"

  println("更新数据库中的数据")

   val statement: PreparedStatement = connection.prepareStatement(sql)
	  f.foreach(row => {
		  //这里注意更新插入字段顺序要一直
		statement.setString(1, row.getAs[String]("ID"))
		statement.setString(2, row.getAs[String]("FIVE_ID"))
		statement.setString(3, row.getAs[String]("THREE_ID"))
		statement.setString(5, row.getAs[String]("FIVE_ID"))
		statement.setString(6, row.getAs[String]("THREE_ID"))
		statement.addBatch()
	  })
	  statement.executeBatch()
	  connection.commit()
	  statement.close()
	  connection.close()
	})
	res_mysql.unpersist()
  }


  def createConnection(): Connection = {
	Class.forName("com.mysql.cj.jdbc.Driver").newInstaabcsnce()
	java.sql.DriverManager.getConnection("jdbc:mysql://132.123.13.113:3306/ads?useUnicode=true&characterEncoding=utf8", "abc", "123456")
  }

}
posted @ 2022-11-02 09:51  tonggang_bigdata  阅读(219)  评论(0编辑  收藏  举报