Storm-jdbc-2讲 高级API及Trident

                      之前对Storm集成JDBC写了一个简单的demo,最近深度研究了下,代码如下

首先,先写一个抽象类,便于减少代码的重复性:


import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.StormTopology;
import org.apache.storm.jdbc.common.Column;
import org.apache.storm.jdbc.common.ConnectionProvider;
import org.apache.storm.jdbc.common.HikariCPConnectionProvider;
import org.apache.storm.jdbc.common.JdbcClient;
import org.apache.storm.jdbc.mapper.JdbcLookupMapper;
import org.apache.storm.jdbc.mapper.JdbcMapper;
import org.apache.storm.jdbc.mapper.SimpleJdbcLookupMapper;
import org.apache.storm.jdbc.mapper.SimpleJdbcMapper;
import org.apache.storm.tuple.Fields;

import java.sql.Types;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

/**
 * @author cwc
 * @date 2018年6月30日  
 * @description:这里创建一个抽象类,提高代码的重用性
 * @version 1.0.0 
 */
public abstract  class AbstractUserTopology {
	//sql语句 建标,建字段,自己灵活使用
    private static final List<String> setupSqls = Lists.newArrayList(
            "drop table if exists user",
            "drop table if exists department",
            "drop table if exists user_department",
            "create table if not exists user (user_id integer, user_name varchar(100), dept_name varchar(100), create_date date)",
            "create table if not exists department (dept_id integer, dept_name varchar(100))",
            "create table if not exists user_department (user_id integer, dept_id integer)",
            "insert into department values (1, 'R&D')",
            "insert into department values (2, 'Finance')",
            "insert into department values (3, 'HR')",
            "insert into department values (4, 'Sales')",
            "insert into user_department values (1, 1)",
            "insert into user_department values (2, 2)",
            "insert into user_department values (3, 3)",
            "insert into user_department values (4, 4)"
    );
    
    protected JdbcSpout jdbcSpout;//测试使用的spout
    protected JdbcMapper jdbcMapper;//用于映射的Mapper
    protected JdbcLookupMapper jdbcLookupMapper;
    
    //线程安全的 实现了ConnectionProvider接口  有三个方法 prepare(),getConnection()  获取连接,cleanUp(),接口采用直接赋值
    protected ConnectionProvider connectionProvider;
    
    protected static final String TABLE_NAME = "storms";//表名
    protected static final String JDBC_CONF = "jdbc.conf";//jdbc配置
    protected static final String  SELECT_QUERY = "select dept_name from department, user_department where department.dept_id = user_department.dept_id" +
            " and user_department.user_id = ?";//查询sql语句
    
    public void execute(String[] args) throws Exception {
    	//将配置放入map当中
        Map map = Maps.newHashMap();
        map.put("dataSourceClassName", "com.mysql.jdbc.jdbc2.optional.MysqlDataSource");
        map.put("dataSource.url", "jdbc:mysql://localhost:3306/mytest?useUnicode=true&characterEncoding=UTF-8&serverTimezone=Asia/Shanghai");
        map.put("dataSource.user", "root");
        map.put("dataSource.password", "密码");
        
        Config config = new Config();
        config.put(JDBC_CONF, map);//加载到配置中

        ConnectionProvider connectionProvider = new HikariCPConnectionProvider(map);
        //对数据库连接池进行初始化
        connectionProvider.prepare();
        //数据查找超时时间
        int queryTimeoutSecs = 60;
        //获得数据库连接
        JdbcClient jdbcClient = new JdbcClient(connectionProvider, queryTimeoutSecs);
        //创建表及字段
        for (String sql : setupSqls) {
            System.err.println("sql:" + sql);
            //执行sql语句
            jdbcClient.executeSql(sql);
        }

        this.jdbcSpout = new JdbcSpout();
        //通过connectionProvider和table自己去获取数据表的metadata(元数据)表字段的类型,名称,初始化schemaColumns
        // 使用tableName进行插入数据,需要指定表中的所有字段
        this.jdbcMapper = new SimpleJdbcMapper(TABLE_NAME, connectionProvider);
        //关闭数据库连接池
        connectionProvider.cleanup();
        //上面的代码可以独立运行
        Fields outputFields = new Fields("user_id", "user_name", "dept_name", "create_date");
        //指定查询条件字段  user_id的值是spout中发射出user_id的值
        List<Column> queryParamColumns = Lists.newArrayList(new Column("user_id", Types.INTEGER));
        //通过查询为outputFields中的 dept_name赋值   其他三个字段是原始spout中的
        this.jdbcLookupMapper = new SimpleJdbcLookupMapper(outputFields, queryParamColumns);
        //拿到还未初始化的连接
        this.connectionProvider = new HikariCPConnectionProvider(map);
        String topoName = "test";
        if (args.length == 0||args ==null) {
        	//当args为0,就本地使用
            LocalCluster cluster = new LocalCluster();
            cluster.submitTopology(topoName, config, getTopology());
            Thread.sleep(1000000);//这个时为了防止你忘记关闭程序,造成内存爆炸,但是不要设置时间太小,太小程序没跑完就终止了,要报错。
            cluster.shutdown();
        } else {
            StormSubmitter.submitTopology(args[4], config, getTopology());
        }
    }

    public abstract StormTopology getTopology();
    }

接下来是普通的storm方法来写入数据:


import com.google.common.collect.Lists;
import org.apache.storm.generated.StormTopology;
import org.apache.storm.jdbc.bolt.JdbcInsertBolt;
import org.apache.storm.jdbc.bolt.JdbcLookupBolt;
import org.apache.storm.jdbc.common.Column;
import org.apache.storm.jdbc.mapper.JdbcMapper;
import org.apache.storm.jdbc.mapper.SimpleJdbcMapper;
import org.apache.storm.topology.TopologyBuilder;

import java.sql.Types;
import java.util.List;
/**
  * @author cwc
 * @date 2018年7月4日  
 * @version 2.0.0 
 * @description:将数据批量写入表中
 */
public class PersistanceTopology extends AbstractUserTopology {
    private static final String USER_SPOUT = "USER_SPOUT";
    private static final String LOOKUP_BOLT = "LOOKUP_BOLT";
    private static final String PERSISTANCE_BOLT = "PERSISTANCE_BOLT";

    public static void main(String[] args) throws Exception {
        new PersistanceTopology().execute(args);//继承的方法,从而获得了连接
    }

    @Override
    public StormTopology getTopology() {
        JdbcLookupBolt departmentLookupBolt = new JdbcLookupBolt(connectionProvider, SELECT_QUERY, this.jdbcLookupMapper);

        //获取映射字段
        List<Column> schemaColumns = Lists.newArrayList(new Column("create_date", Types.DATE),
                new Column("dept_name", Types.VARCHAR), new Column("user_id", Types.INTEGER), new Column("user_name", Types.VARCHAR));
        JdbcMapper mapper = new SimpleJdbcMapper(schemaColumns);
        //创建bolt
        JdbcInsertBolt userPersistanceBolt = new JdbcInsertBolt(connectionProvider, mapper)
                .withInsertQuery("insert into user (create_date, dept_name, user_id, user_name) values (?,?,?,?)");

        TopologyBuilder builder = new TopologyBuilder();

        builder.setSpout(USER_SPOUT, this.jdbcSpout, 1);
        builder.setBolt(LOOKUP_BOLT, departmentLookupBolt, 1).shuffleGrouping(USER_SPOUT);
        builder.setBolt(PERSISTANCE_BOLT, userPersistanceBolt, 1).shuffleGrouping(LOOKUP_BOLT);
        return builder.createTopology();
    }
}

使用Trident方法写入数据库:



import org.apache.storm.generated.StormTopology;
import org.apache.storm.tuple.Fields;
import com.google.common.collect.Lists;
import com.sunsheen.jfids.bigdata.storm.demo.count.TestSpout;
import com.sunsheen.jfids.bigdata.storm.demo.jdbc.AbstractUserTopology;
import com.sunsheen.jfids.bigdata.storm.demo.jdbc.JdbcSpout;

import org.apache.storm.jdbc.common.Column;
import org.apache.storm.jdbc.mapper.SimpleJdbcLookupMapper;
import org.apache.storm.jdbc.trident.state.JdbcQuery;
import org.apache.storm.jdbc.trident.state.JdbcState;
import org.apache.storm.jdbc.trident.state.JdbcStateFactory;
import org.apache.storm.jdbc.trident.state.JdbcUpdater;
import org.apache.storm.trident.Stream;
import org.apache.storm.trident.TridentState;
import org.apache.storm.trident.TridentTopology;

import java.sql.Types;
/**
 * @author cwc
 * @date 2018年7月4日  
 * @version 1.0.0 
 * @description:将数据通过Trident的方法写入数据库表中
 */
public class UserPersistanceTridentTopology extends AbstractUserTopology {

    public static void main(String[] args) throws Exception {
        new UserPersistanceTridentTopology().execute(args);
    }

    @Override
    public StormTopology getTopology() {
        TridentTopology topology = new TridentTopology();
        //这里通过Trident的方式将数据写入数据库,代替了bolt等类的使用
        JdbcState.Options options = new JdbcState.Options()
                .withConnectionProvider(connectionProvider)
                .withMapper(this.jdbcMapper)
                .withJdbcLookupMapper(new SimpleJdbcLookupMapper(new Fields("dept_name"), Lists.newArrayList(new Column("user_id", Types.INTEGER))))
                .withTableName(TABLE_NAME)
                .withSelectQuery(SELECT_QUERY);

        JdbcStateFactory jdbcStateFactory = new JdbcStateFactory(options);

        Stream stream = topology.newStream("userSpout", new JdbcSpout());
        TridentState state = topology.newStaticState(jdbcStateFactory);
        stream = stream.stateQuery(state, new Fields("user_id","user_name","create_date"), new JdbcQuery(), new Fields("dept_name"));
        stream.partitionPersist(jdbcStateFactory, new Fields("user_id","user_name","dept_name","create_date"),  new JdbcUpdater(), new Fields());
        return topology.build();
    }
}

spout类:



import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;

import com.google.common.collect.Lists;

import java.util.List;
import java.util.Map;
import java.util.Random;
/**
 * @author cwc
 * @date 2018年5月31日  
 * @description:存储数据的spout,我的读与写共用的这一个spout
 * @version 1.0.0 
 */
public class JdbcSpout extends BaseRichSpout {
	private static final long serialVersionUID = 1L;
	private SpoutOutputCollector collector;
	//模拟数据
	public static final List<Values> rows = Lists.newArrayList(
	            new Values(1,"peter",System.currentTimeMillis()),
	            new Values(2,"bob",System.currentTimeMillis()),
	            new Values(3,"alice",System.currentTimeMillis()));

	@Override
	public void nextTuple() {
		  Random rand = new Random();
	      Values row = rows.get(rand.nextInt(rows.size() - 1));
	      this.collector.emit(row);
	        Thread.yield();
        System.out.println("信息加载中---------------------");
	}

	@Override
	public void open(Map arg0, TopologyContext arg1, SpoutOutputCollector collector) {
		this.collector =collector;
	}

	@Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		 declarer.declare(new Fields("user_id","user_name","create_date"));
	}

}

今天的代码就分享到这,各位共勉,努力、

posted @ 2018-07-04 19:07  wanchen  阅读(115)  评论(0编辑  收藏  举报