RDD转dataframe
from pyspark.sql import SparkSession,Row
from pyspark.sql.types import StructField, StructType, StringType, IntegerType, LongType
data = [('Alex','male',3),('Nancy','female',6),['Jack','male',9]] # mixed rdd_ = spark.sparkContext.parallelize(data) # schema schema = StructType([ # true代表不为空 StructField("name", StringType(), True), StructField("gender", StringType(), True), StructField("num", StringType(), True) ]) df = spark.createDataFrame(rdd_, schema=schema) # working when the struct of data is same. print(df.show())