spark-streaming笔记 ---没用
1.从TCP端口中读取数据
yum install nc
nc -lk 9000
#注意:要指定并行度,如在本地运行设置setMaster("local[2]"),相当于启动两个线程,一个给receiver,一个给computer。
#如果是在集群中运行,必须要求集群中可用core数大于1
#提交spark-streaming任务
bin/spark-submit --class cn.itcast.spark.streaming.TCPDemo /root/streaming-1.0.jar
2.结合flume
2.1 flume push方式
#首先启动spark-streaming应用程序
bin/spark-submit --class cn.itcast.spark.streaming.FlumeWordCount /root/streaming-1.0.jar
#再启动flmue
bin/flume-ng agent -n a1 -c conf/ -f conf/flume-push.conf -Dflume.root.logger=WARN,console
2.2 flume poll方式
#首先将下载好的spark-streaming-flume-sink_2.10-1.3.1.jar和spark的spark-assembly-1.3.1-hadoop2.4.0.jar放入到flume的lib目录下
#启动flume
bin/flume-ng agent -n a1 -c conf/ -f conf/flume-poll.conf -Dflume.root.logger=WARN,console
#再启动spark-streaming应用程序
bin/spark-submit --class cn.itcast.spark.streaming.FlumePollWordCount /root/streaming-1.0.jar
3.结合kafka
#首先启动zk
bin/kafka-server-start.sh config/server.properties
#创建topic
bin/kafka-topics.sh --create --zookeeper 192.168.80.10:2181 --replication-factor 1 --partitions 1 --topic wordcount
#查看主题
bin/kafka-topics.sh --list --zookeeper 192.168.80.10:2181
#启动一个生产者发送消息
bin/kafka-console-producer.sh --broker-list 192.168.80.10:9092 --topic wordcount
#启动spark-streaming应用程序
bin/spark-submit --class cn.itcast.spark.streaming.KafkaWordCount /root/streaming-1.0.jar 192.168.80.10:2181 group1 wordcount 1
bin/spark-submit --class cn.itcast.spark.UrlCount --master spark://node1.itcast.cn:7077 --executor-memory 1G --total-executor-cores 2 /root/SparkDemo-1.0.jar node1.itcast.cn:2181,node2.itcast.cn:2181,node3.itcast.cn:2181 group1 weblog 2