pyspark 注册临时表(registerTempTable)

from pyspark import SparkConf, SparkContext
from pyspark.sql import SQLContext, Row

conf = SparkConf().setAppName("spark_sql_table_join")

sc = SparkContext(conf=conf)

sqlCtx = SQLContext(sc)

line1 = sc.parallelize(["name1 a", "name3 c", "name4 d"])

line2 = sc.parallelize(["name1 1", "name2 2", "name3 3"])

word1 = line1.map(lambda line: line.split(" "))

word2 = line2.map(lambda line: line.split(" "))

table1 = word1.map(lambda words: Row(name=words[0], title=words[1]))

table2 = word2.map(lambda words: Row(name=words[0], fraction=words[1]))

tableSchema1 = sqlCtx.inferSchema(table1)

tableSchema2 = sqlCtx.inferSchema(table2)

tableSchema1.registerTempTable("table1")

tableSchema2.registerTempTable("table2")


def printRows(rows):
if rows:
for row in rows:
print row

# inner join
rows = sqlCtx.sql(
"select table1.name, table1.title, table2.fraction from table1 join table2 on table1.name = table2.name").collect()

printRows(rows)

print "============================================="

# left outer join
rows = sqlCtx.sql(
"select table1.name, table1.title, table2.fraction from table1 left outer join table2 on table1.name = table2.name").collect()

printRows(rows)

# right outer join
rows = sqlCtx.sql(
"select table1.name, table1.title, table2.fraction from table1 right outer join table2 on table1.name = table2.name").collect()

print "============================================="

printRows(rows)

# full outer join
rows = sqlCtx.sql(
"select table1.name, table1.title, table2.fraction from table1 full outer join table2 on table1.name = table2.name").collect()

print "============================================="

printRows(rows)

"""
Row(name=u'name1', title=u'a', fraction=u'1')
Row(name=u'name3', title=u'c', fraction=u'3')
=============================================
Row(name=u'name1', title=u'a', fraction=u'1')
Row(name=u'name3', title=u'c', fraction=u'3')
Row(name=u'name4', title=u'd', fraction=None)
=============================================
Row(name=u'name1', title=u'a', fraction=u'1')
Row(name=None, title=None, fraction=u'2')
Row(name=u'name3', title=u'c', fraction=u'3')
=============================================
Row(name=u'name1', title=u'a', fraction=u'1')
Row(name=None, title=None, fraction=u'2')
Row(name=u'name3', title=u'c', fraction=u'3')
Row(name=u'name4', title=u'd', fraction=None)
"""

sc.stop()

posted @ 2017-02-22 20:10  嘻哈怒叱  阅读(7599)  评论(0编辑  收藏  举报