DataFrame通过JDBC读写数据读写到数据mysql
# coding:utf8 import time from pyspark.sql import SparkSession from pyspark.sql.types import StructType, StringType, IntegerType import pandas as pd from pyspark.sql import functions as F if __name__ == '__main__': # 0. 构建执行环境入口对象SparkSession spark = SparkSession.builder.\ appName("test").\ master("local[*]").\ config("spark.sql.shuffle.partitions", 2).\ getOrCreate() sc = spark.sparkContext # 1. 读取数据集 schema = StructType().add("user_id", StringType(), nullable=True). \ add("movie_id", IntegerType(), nullable=True). \ add("rank", IntegerType(), nullable=True). \ add("ts", StringType(), nullable=True) df = spark.read.format("csv"). \ option("sep", "\t"). \ option("header", False). \ option("encoding", "utf-8"). \ schema(schema=schema). \ load("../data/input/sql/u.data") # # 1. 写出df到mysql数据库中 df.write.mode("overwrite").\ format("jdbc").\ option("url", "jdbc:mysql://node1:3306/db_msg?useSSL=false&useUnicode=true").\ option("dbtable", "movie_data").\ option("user", "root").\ option("password", "123456").\ save()