每日总结
今日进行spark的数据库操作学习
使用的是python
# coding:utf8 from pyspark.sql import SparkSession from pyspark.sql.types import StructType, StringType, IntegerType from pyspark.sql import functions as F if __name__ == '__main__': spark = SparkSession.builder.appName("test").master("local[*]").config("spark.sql.shuffle.partitions",2).getOrCreate() sc = spark.sparkContext #读取数据 df = spark.read.format("csv").\ option("sep",";").\ option("header",True).\ load("../data/input/sql/input/people.csv") # df.select(F.concat_ws("---","name","age","job")).\ # write.\ # mode("overwrite").\ # format("text").\ # save("../data/input/sql/text") #写入 # df.write.mode("overwrite").\ # format("jdbc").\ # option("url","jdbc:mysql://192.168.0.14:3306/book?useSSL=false&useUnicode=true&allowPublicKeyRetrieval=true&serverTimezone=UTC").\ # option("dbtable","word_count").\ # option("user","root").\ # option("password","020907").\ # save() df2 = spark.read.format("jdbc"). \ option("url","jdbc:mysql://192.168.0.14:3306/book?useSSL=false&useUnicode=true&allowPublicKeyRetrieval=true&serverTimezone=UTC"). \ option("dbtable", "word_count"). \ option("user", "root"). \ option("password", "020907").\ load() df2.printSchema() df2.show()
运行结果
root
|-- name: string (nullable = true)
|-- age: string (nullable = true)
|-- job: string (nullable = true)
+-----+----+---------+
| name| age| job|
+-----+----+---------+
|Jorge| 30|Developer|
| Bob| 32|Developer|
| Ani| 11|Developer|
| Lily| 11| Manager|
| Put| 11|Developer|
|Alice| 9| Manager|
|Alice| 9| Manager|
|Alice| 9| Manager|
|Alice| 9| Manager|
|Alice|null| Manager|
|Alice| 9| null|
+-----+----+---------+