将 数据从数据库 直接通过 pyspark 读入到dataframe
from pyspark.sql import SparkSession spark = SparkSession \ .builder \ .appName("Python Spark SQL basic example") \ .master("local") \ .enableHiveSupport() \ .getOrCreate() #try: result = spark.sql("select * from dev.dev_jiadian_user_yuge limit 10") #result.collect() result.show() #finally: # spark.stop() #print "ends..." +----------------+------------------+ | user_log_acct| label| +----------------+------------------+ | 342523ZMD003|0.8407951425388679| | 流行你的时尚潮流|0.8156848188447681| | a564494278|0.8128615895945835| |jd_6381a0e5b63b5|0.7818056689527976| | weixiong07_m|0.7807439055758115| |jd_56a7706a16617|0.7531679395767472| | long4868632|0.7504257196976702| | huanyi25| 0.745658097419349| | 雪域翔鹰163|0.7359140647098181| | wjl368|0.7340991987507084| +----------------+------------------+ result result.take(5) [Row(user_log_acct=u'342523ZMD003', label=0.8407951425388679), Row(user_log_acct=u'\u6d41\u884c\u4f60\u7684\u65f6\u5c1a\u6f6e\u6d41', label=0.8156848188447681), Row(user_log_acct=u'a564494278', label=0.8128615895945835), Row(user_log_acct=u'jd_6381a0e5b63b5', label=0.7818056689527976), Row(user_log_acct=u'weixiong07_m', label=0.7807439055758115)] df import pandas as pd x = pd.DataFrame(result.take(5), columns=result.columns) x user_log_acct label 0 342523ZMD003 0.840795 1 流行你的时尚潮流 0.815685 2 a564494278 0.812862 3 jd_6381a0e5b63b5 0.781806 4 weixiong07_m 0.780744