flume+kafka+storm打通过程
0.有的地方我已经整理成脚本了,有的命令是脚本里面截取的
1.启动hadoop和yarn
$HADOOP_HOME/sbin/start-dfs.sh;$HADOOP_HOME/sbin/start-yarn.sh
2.启动zk
#主机名是mini-mini3所以这里可以遍历
echo "start zkserver "
for i in 1 2 3
do
ssh mini$i "source /etc/profile;$ZK_HOME/bin/zkServer.sh start"
done
3.启动mysqld
service mysqld start
4.启动kafka,集群都要启动
bin/kafka-server-start.sh config/server.properties
5.启动storm
在nimbus.host所属的机器上启动 nimbus服务
nohup ./storm nimbus &
在nimbus.host所属的机器上启动ui服务
nohup ./storm ui &
在其它机器上启动supervisor服务
nohup ./storm supervisor &
6.启动flume
#exec.conf a1.channels = r1 a1.sources = c1 a1.sinks = k1 #a1.sources.c1.type = spooldir #实时性要求不高的话,可以用这种方式,ta #a1.sources.c1.channels = r1 #a1.sources.c1.spoolDir = /opt/flumeSpool/ #a1.sources.c1.fileHeader = false a1.sources.c1.type = exec a1.sources.c1.command = tail -F /home/hadoop/kafkastudy/data/flume_sour a1.sources.c1.channels = r1 a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink a1.sinks.k1.topic = orderMq a1.sinks.k1.brokerList = mini1:9092,mini2:9092,mini3:9092 a1.sinks.k1.requiredAcks = 1 a1.sinks.k1.batchSize = 20 a1.sinks.k1.channel = r1 a1.channels.r1.type = memory a1.channels.r1.capacity = 10000 a1.channels.r1.transactionCapacity = 1000
bin/flume-ng agent --conf conf --conf-file conf/myconf/exec.conf --name a1 -Dflume.root.logger=INFO,console
7.启动造数据的程序
#!/bin/bash for((i=0;i<50000;i++)) do echo "msg-"+$i >> /home/hadoop/kafkastudy/data/flume_sources/click_log/1.log done
8在mini1:8080上观察
总结
a.造数据和flume之间的链接是在exec.conf文件中配置了flume监听了文件,这个文件是造数据成员生成的,这里相当于数据源
b.flume和kafka之间的链接1在exec.conf中配置了.使用kafka的shell消费消息命令可以查看到
bin/kafka-console-consumer.sh --zookeeper mini1:2181 --topic test1
c.kafka和storm之间的链接,是由于我们在storm上运行了自己定义的一个程序,这个程序就是kafka2tostorm,在程序中指定了KafaSpout.同时还包含了自己的业务
d.
package kafkaAndStorm2; import backtype.storm.Config; import backtype.storm.LocalCluster; import backtype.storm.StormSubmitter; import backtype.storm.generated.AlreadyAliveException; import backtype.storm.generated.InvalidTopologyException; import backtype.storm.topology.TopologyBuilder; import storm.kafka.BrokerHosts; import storm.kafka.KafkaSpout; import storm.kafka.SpoutConfig; import storm.kafka.ZkHosts; /** */ public class KafkaAndStormTopologyMain { public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException, InterruptedException { TopologyBuilder topologyBuilder = new TopologyBuilder(); SpoutConfig config = new SpoutConfig(new ZkHosts("mini1:2181,mini2:2181,mini3:2181"), "orderMq", "/mykafka", "kafkaSpout"); topologyBuilder.setSpout("kafkaSpout",new KafkaSpout(config),1 ); topologyBuilder.setBolt("mybolt1",new MyKafkaBolt2(),1).shuffleGrouping("kafkaSpout"); Config conf = new Config(); //打印调试信息 // conf.setDebug(true); if (args!=null && args.length>0) { StormSubmitter.submitTopology(args[0], conf, topologyBuilder.createTopology()); }else { LocalCluster localCluster = new LocalCluster(); localCluster.submitTopology("storm2kafka", conf, topologyBuilder.createTopology()); } } }
package kafkaAndStorm2; import backtype.storm.task.OutputCollector; import backtype.storm.task.TopologyContext; import backtype.storm.topology.BasicOutputCollector; import backtype.storm.topology.IBasicBolt; import backtype.storm.topology.IRichBolt; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.topology.base.BaseRichBolt; import backtype.storm.tuple.Tuple; import java.util.Map; /** */ public class MyKafkaBolt2 extends BaseRichBolt { @Override public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) { } @Override public void execute(Tuple input) { byte[] value = (byte[]) input.getValue(0); String msg = new String(value); System.out.println(Thread.currentThread().getId()+" msg "+msg); } @Override public void cleanup() { } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { } @Override public Map<String, Object> getComponentConfiguration() { return null; } }
maven依赖,这里可能需要根据错误提示调一下
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>cn.itcast.learn</groupId> <artifactId>kafka2Strom</artifactId> <version>1.0-SNAPSHOT</version> <dependencies> <dependency> <groupId>org.apache.storm</groupId> <artifactId>storm-core</artifactId> <version>0.9.5</version> <scope>provided</scope> <!--<scope>provided</scope>--> </dependency> <dependency> <groupId>org.apache.storm</groupId> <artifactId>storm-kafka</artifactId> <version>0.9.5</version> <exclusions> <exclusion> <groupId>org.slf4j</groupId> <artifactId>slf4j-log4j12</artifactId> </exclusion> <exclusion> <groupId>org.slf4j</groupId> <artifactId>slf4j-api</artifactId> </exclusion> </exclusions> </dependency> <dependency> <groupId>org.clojure</groupId> <artifactId>clojure</artifactId> <version>1.5.1</version> </dependency> <dependency> <groupId>org.apache.kafka</groupId> <artifactId>kafka_2.8.2</artifactId> <version>0.8.1</version> <exclusions> <exclusion> <artifactId>jmxtools</artifactId> <groupId>com.sun.jdmk</groupId> </exclusion> <exclusion> <artifactId>jmxri</artifactId> <groupId>com.sun.jmx</groupId> </exclusion> <exclusion> <artifactId>jms</artifactId> <groupId>javax.jms</groupId> </exclusion> <exclusion> <groupId>org.apache.zookeeper</groupId> <artifactId>zookeeper</artifactId> </exclusion> <exclusion> <groupId>org.slf4j</groupId> <artifactId>slf4j-log4j12</artifactId> </exclusion> <exclusion> <groupId>org.slf4j</groupId> <artifactId>slf4j-api</artifactId> </exclusion> </exclusions> </dependency> <dependency> <groupId>com.google.code.gson</groupId> <artifactId>gson</artifactId> <version>2.4</version> </dependency> <dependency> <groupId>redis.clients</groupId> <artifactId>jedis</artifactId> <version>2.7.3</version> </dependency> </dependencies> <build> <plugins> <plugin> <artifactId>maven-assembly-plugin</artifactId> <configuration> <descriptorRefs> <descriptorRef>jar-with-dependencies</descriptorRef> </descriptorRefs> <archive> <manifest> <mainClass>cn.itcast.bigdata.hadoop.mapreduce.wordcount.WordCount</mainClass> </manifest> </archive> </configuration> <executions> <execution> <id>make-assembly</id> <phase>package</phase> <goals> <goal>single</goal> </goals> </execution> </executions> </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId> <configuration> <!-- <source>1.7</source> <target>1.7</target>--> <source>1.8</source> <target>1.8</target> </configuration> </plugin> </plugins> </build> </project>