pyspark list[dict]转pyspark df

数据处理把 list嵌套字段转成 pyspark dataframe 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# coding=utf-8
from pyspark.sql import SparkSession
from pyspark.sql.types import *
import pandas as pd
from pyspark.sql import Row
 
 
class SparkContext:
    def __init__(self, name="cleaner"):
        self.spark = (
            SparkSession.builder.appName(name)
                .config("hive.exec.dynamic.partition", True)
                .config("hive.exec.dynamic.partition.mode", "nonstrict")
                .enableHiveSupport()
                .getOrCreate()
        )
        self.spark.sparkContext.setLogLevel("ERROR")
 
    def __enter__(self):
        return self.spark
 
    def __exit__(self, exc_type, exc_val, exc_tb):
        self.spark.stop()
 
 
def main():
    data = [{'ent_name': '百度', 'credit_code': '1234567890'},
            {'ent_name': 'abc', 'credit_code': '121212222'}
            ]
    row_data = [Row(**row) for row in data]
    with SparkContext('test_df') as spark:
        df = spark.createDataFrame(row_data)
        df.show()
 
 
if __name__ == '__main__':
    main()

  

posted @   Young_Mo  阅读(192)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· Manus爆火,是硬核还是营销?
· 终于写完轮子一部分:tcp代理 了,记录一下
· 别再用vector<bool>了!Google高级工程师:这可能是STL最大的设计失误
· 单元测试从入门到精通
点击右上角即可分享
微信分享提示