入门

# rate
from
pyspark.sql import SparkSession spark = SparkSession.builder. \ appName("study_structured_streaming"). \ enableHiveSupport(). \ config("spark.debug.maxToStringFields", "100"). \ getOrCreate() df = spark.readStream \ .format(source="rate") \ .option("rowPerSecond", "5")\ .option("rampUpTime", "1s")\ .option("numPartitions", "2")\ .load() df.writeStream.format(source="console").outputMode("append").start().awaitTermination() spark.stop()

 # Sink的例子

from pyspark.sql import SparkSession

spark = SparkSession.builder. \
    appName("study_structured_streaming"). \
    enableHiveSupport(). \
    config("spark.debug.maxToStringFields", "100"). \
    getOrCreate()

df = spark.readStream \
    .format(source="rate") \
    .option("rowPerSecond", "5")\
    .option("rampUpTime", "1s")\
    .option("numPartitions", "2")\
    .load()

df.writeStream.format("csv").option("checkpointLocation", "ck/20210623/").option("path", "output/123.csv") .start().awaitTermination()

spark.stop()

 

posted @ 2021-06-23 20:11  muyue123  阅读(55)  评论(0编辑  收藏  举报