SparkStreaming 自定义数据采集器

本文的前提条件: SparkStreaming in Java
参考地址:Spark Streaming Custom Receivers

1.自定义数据采集器

package cn.coreqi.receiver;

import org.apache.spark.storage.StorageLevel;
import org.apache.spark.streaming.receiver.Receiver;

import java.util.Random;

/**
 * 自定义数据采集器
 */
public class MyReceiver extends Receiver<String> {
    private boolean flg = true;

    public boolean isFlg() {
        return flg;
    }

    public void setFlg(boolean flg) {
        this.flg = flg;
    }

    public MyReceiver(){
        super(StorageLevel.MEMORY_ONLY());
    }

    /**
     *
     * @param storageLevel 存储级别
     */
    public MyReceiver(StorageLevel storageLevel) {
        super(storageLevel);
    }

    /**
     * 启动采集器时的操作
     */
    @Override
    public void onStart() {
        new Thread(() -> {
            while (flg){
                try {
                    String message = "采集的数据为:" + new Random().nextInt(10);
                    store(message);
                    Thread.sleep(500);
                } catch (InterruptedException e) {
                    throw new RuntimeException(e);
                }
            }
        }).start();
    }

    /**
     * 停止采集器时的操作
     */
    @Override
    public void onStop() {
        setFlg(false);
    }
}

2.注册并使用

package cn.coreqi;

import cn.coreqi.receiver.MyReceiver;
import org.apache.spark.SparkConf;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.api.java.*;

public class Main {
    public static void main(String[] args) throws InterruptedException {
        // 创建SparkConf对象
        SparkConf sparkConf = new SparkConf()
                .setMaster("local[*]")
                .setAppName("sparkSql");

        // 第一个参数表示环境配置,第二个参数表示批量处理的周期(采集周期)
        JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(3));

        JavaReceiverInputDStream<String> messageDS = ssc.receiverStream(new MyReceiver());
        messageDS.print();
        
        // 由于SparkStreaming采集器是长期执行的任务,所以不能直接关闭
        // 如果main方法执行完毕,应用程序也会自动结束,所以不能让main执行完毕
        ssc.start();              // 启动采集器

        ssc.awaitTermination();   // 等待采集器的关闭
    }
}
posted @ 2024-01-15 19:51  SpringCore  阅读(38)  评论(0编辑  收藏  举报