kafka-stream数据清洗

1、数据清洗业务类LogProcessor

package com.css.kafka.kafka_stream;

import org.apache.kafka.streams.processor.Processor;
import org.apache.kafka.streams.processor.ProcessorContext;

/**
 * 数据清洗*/
public class LogProcessor implements Processor<byte[], byte[]>{

    private ProcessorContext context;
    
    //初始化
    public void init(ProcessorContext context) {
        //传输
        this.context = context;
    }

    //具体业务逻辑
    public void process(byte[] key, byte[] value) {
        //1.拿到消息数据,转成字符串
        String message = new String(value);
        
        //2.如果包含-  去除
        if (message.contains("-")) {
            //3.把- 去掉 之后去掉左侧数据
            message = message.split("-")[1];
        }
        //4.发送数据
        context.forward(key, message.getBytes());
    }

    //释放资源
    public void close() {
    }
}

2、Application类

package com.css.kafka.kafka_stream;

import java.util.Properties;

import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.Topology;
import org.apache.kafka.streams.processor.Processor;
import org.apache.kafka.streams.processor.ProcessorSupplier;

/**
 * 需求:对数据进行清洗操作
 * 
 * 思路:wo-henshuai  把-和wo清洗掉*/
public class Application {

    public static void main(String[] args) {
        //1.定义主题 发送到 另外一个主题中 数据清洗
        String oneTopic = "t1";
        String twoTopic = "t2";
        
        //2.设置属性
        Properties prop = new Properties();
        prop.put(StreamsConfig.APPLICATION_ID_CONFIG, "logProcessor");
        prop.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "192.168.146.132:9092,192.168.146.133:9092,192.168.146.134:9092");
        
        //3.实例对象
        StreamsConfig config = new StreamsConfig(prop);
        
        //4.流计算 拓扑
        Topology builder = new Topology();
        
        //5.定义kafka组件数据源
        builder.addSource("Source", oneTopic).addProcessor("Processor", new ProcessorSupplier<byte[], byte[]>() {

            public Processor<byte[], byte[]> get() {
                return new LogProcessor();
            }
            //从哪里来
        }, "Source")
        //到哪里去
        .addSink("Sink", twoTopic, "Processor");

        //6.实例化kafkaStream
        KafkaStreams kafkaStreams = new KafkaStreams(builder, prop);
        kafkaStreams.start();
    }
}

3、运行Application类的main方法

4、在hd09-1机器上创建主题t1

bin/kafka-topics.sh --zookeeper hd09-1:2181 --create --replication-factor 3 --partition 1 --topic t1

5、在hd09-2机器上启动消费者

bin/kafka-console-consumer.sh --bootstrap-server hd09-2:9092 --topic t2 --from-beginning --consumer.config config/consumer.properties

6、在hd09-1机器上启动生产者

bin/kafka-console-producer.sh --broker-list hd09-1:9092 --topic t1

7、此时在hd09-1机器kafka生产者上输入  wo-henshuai,在hd09-2消费者机器上会显示henshuai,即完成了数据清洗,如下图。

 

posted on 2018-12-18 20:29    阅读(2021)  评论(0编辑  收藏  举报