pyspark 实现的wordcount
pyspark 实现的wordcount。
from pyspark.sql import SparkSession, DataFrame spark = SparkSession \ .builder \ .master("local[*]")\ .appName("PythonWordCount") \ .config("spark.driver.host", "localhost")\ .getOrCreate() res_rdd = spark.read.text('E:/test.txt').rdd \ .map(lambda r: r[0]) \ .flatMap(lambda x: x.split()) \ .map(lambda x: (x, 1)) \ .reduceByKey(lambda x, y: x + y) \ .collect()