spark-SQL实验5.3

3. 编程实现利用 DataFrame 读写 MySQL 的数据
(1) 在 MySQL 数据库中新建数据库 sparktest,再建表 employee,包含下列两行数据;
表 1 employee 表原有数据
id
name
gender
age
1
Alice
F
22
2
John
M
25
答案:
mysql> create database sparktest;
mysql> use sparktest;
import org.apache.spark.sql.types._import org.apache.spark.sql.Encoder
import org.apache.spark.sql.Row
object RDDtoDF {
def main(args: Array[String]) {
val
employeeRDD
=
spark.sparkContext.textFile("file:///usr/local/spark/employee.txt")
val schemaString = "id name age"
val fields = schemaString.split(" ").map(fieldName => StructField(fieldName,
StringType, nullable = true))
val schema = StructType(fields)
val rowRDD =
employeeRDD.map(_.split(",")).map(attributes =>
Row(attributes(0).trim, attributes(1), attributes(2).trim))
val employeeDF = spark.createDataFrame(rowRDD, schema)
employeeDF.createOrReplaceTempView("employee")
val results = spark.sql("SELECT id,name,age FROM employee")
results.map(t => "id:"+t(0)+","+"name:"+t(1)+","+"age:"+t(2)).show()
}
}厦门大学林子雨,赖永炫,陶继平 编著《Spark 编程基础(Scala 版)》 教材配套机房上机实验指南
实验 5 Spark SQL 编程初级实践(答案)
主讲教师:林子雨 http://www.cs.xmu.edu.cn/linziyu 第 5 页
mysql> create table employee (id int(4), name char(20), gender char(4), age
int(4));
mysql> insert into employee values(1,'Alice','F',22);
mysql> insert into employee values(2,'John','M',25);
(2) 配置Spark通过JDBC连接数据库MySQL,编程实现利用DataFrame插入下列数据到MySQL,
最后打印出 age 的最大值和 age 的总和。
表 2 employee 表新增数据
id
name
gender
age
3
Mary
F
26
4
Tom
M
23
答案:假设当前目录为/usr/local/spark/mycode/testmysql,在当前目录下新建一个目录 mkdir -p
src/main/scala , 然 后 在 目 录 /usr/local/spark/mycode/testmysql/src/main/scala 下 新 建 一 个
testmysql.scala,复制下面代码;
import java.util.Properties
import org.apache.spark.sql.types._
import org.apache.spark.sql.Row
object TestMySQL {
 def main(args: Array[String]) {
val employeeRDD = spark.sparkContext.parallelize(Array("3 Mary F 26","4 Tom M 
23")).map(_.split(" "))
val schema = StructType(List(StructField("id", IntegerType, 
true),StructField("name", StringType, true),StructField("gender", StringType, 
true),StructField("age", IntegerType, true)))
val rowRDD = employeeRDD.map(p => Row(p(0).toInt,p(1).trim, 
p(2).trim,p(3).toInt))
val employeeDF = spark.createDataFrame(rowRDD, schema)
val prop = new Properties()
prop.put("user", "root") 
prop.put("password", "hadoop") 
prop.put("driver","com.mysql.jdbc.Driver")
employeeDF.write.mode("append").jdbc("jdbc:mysql://localhost:3306/sparktest", 
sparktest.employee", prop)
val jdbcDF = spark.read.format("jdbc").option("url", 
"jdbc:mysql://localhost:3306/sparktest").option("driver","com.mysql.jdbc.Driver").optio
n("dbtable","employee").option("user","root").option("password", "hadoop").load()
jdbcDF.agg("age" -> "max", "age" -> "sum")
 } }

  

posted @ 2022-02-28 14:27  青竹之下  阅读(167)  评论(0编辑  收藏  举报