Distinct
1.topology
package com.suning.yystorm.business.adunion.uv; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.suning.yystorm.business.adunion.uv.bolt.UVAppFilterBolt; import com.suning.yystorm.business.adunion.uv.bolt.UVAppStockFilterBolt; import com.suning.yystorm.business.adunion.uv.bolt.UVCacheBolt; import com.suning.yystorm.business.adunion.uv.bolt.UVWapFilterBolt; import com.suning.yystorm.business.adunion.uv.bolt.UVWapStockFilterBolt; import com.suning.yystorm.utils.PropertyUtils; import backtype.storm.Config; import backtype.storm.StormSubmitter; import backtype.storm.spout.SchemeAsMultiScheme; import backtype.storm.topology.TopologyBuilder; import backtype.storm.tuple.Fields; import storm.kafka.BrokerHosts; import storm.kafka.KafkaSpout; import storm.kafka.SpoutConfig; import storm.kafka.StringScheme; import storm.kafka.ZkHosts; public class UVTopology { private static final Logger LOGGER = LoggerFactory.getLogger(UVTopology.class); public static void main(String[] args) { try { Config config = new Config(); config.setNumWorkers(8); config.setDebug(false); config.setMaxSpoutPending(3000); config.setMessageTimeoutSecs(180); String kafkaZKNode = PropertyUtils.getInstance().getValueByKey("kafka.zookeeper.nodes"); String kafkaZKRoot = "/xx"; String kafkaConsumerId = "tweeter_uv_v10"; BrokerHosts brokerHosts = new ZkHosts(kafkaZKNode); // 接入kafka数据 SpoutConfig wapSpoutConfig = new SpoutConfig(brokerHosts, "a", kafkaZKRoot, kafkaConsumerId + "_wap"); wapSpoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme()); wapSpoutConfig.forceFromStart = false; wapSpoutConfig.socketTimeoutMs = 3000; SpoutConfig appSpoutConfig = new SpoutConfig(brokerHosts, "b", kafkaZKRoot, kafkaConsumerId + "_app"); appSpoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme()); appSpoutConfig.forceFromStart = false; appSpoutConfig.socketTimeoutMs = 3000; SpoutConfig stockWapSpoutConfig = new SpoutConfig(brokerHosts, "c", kafkaZKRoot, kafkaConsumerId + "_stockWap"); stockWapSpoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme()); stockWapSpoutConfig.forceFromStart = false; stockWapSpoutConfig.socketTimeoutMs = 3000; SpoutConfig stockAppSpoutConfig = new SpoutConfig(brokerHosts, "d", kafkaZKRoot, kafkaConsumerId + "_stockApp"); stockAppSpoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme()); stockAppSpoutConfig.forceFromStart = false; stockAppSpoutConfig.socketTimeoutMs = 3000; // 数据处理 TopologyBuilder builder = new TopologyBuilder(); builder.setSpout("wapSpout", new KafkaSpout(wapSpoutConfig), 10); builder.setBolt("wapFilterBolt", new UVWapFilterBolt(), 40).shuffleGrouping("wapSpout"); builder.setSpout("appSpout", new KafkaSpout(appSpoutConfig), 36); builder.setBolt("appFilterBolt", new UVAppFilterBolt(), 80).shuffleGrouping("appSpout"); builder.setSpout("stockWapSpout", new KafkaSpout(stockWapSpoutConfig), 10); builder.setBolt("stockWapFilterBolt", new UVWapStockFilterBolt(), 20).shuffleGrouping("stockWapSpout"); builder.setSpout("stockAppSpout", new KafkaSpout(stockAppSpoutConfig), 10); builder.setBolt("stockAppFilterBolt", new UVAppStockFilterBolt(), 40).shuffleGrouping("stockAppSpout"); builder.setBolt("uvCacheBolt", new UVCacheBolt(), 60).fieldsGrouping("appFilterBolt", new Fields("dimension")).fieldsGrouping("wapFilterBolt", new Fields("dimension")).fieldsGrouping("stockWapFilterBolt", new Fields("dimension")).fieldsGrouping("stockAppFilterBolt", new Fields("dimension")); runCluster(config, builder); } catch (Exception e) { LOGGER.info("error,", e); } } private static void runCluster(Config config, TopologyBuilder builder) { try { StormSubmitter.submitTopology("TweeterUVTopology_v10", config, builder.createTopology()); } catch (Exception e) { LOGGER.info("TweeterUVTopology_v10提交拓扑异常", e); } } }
2.缓存,10秒记录一次
package com.suning.yystorm.business.adunion.uv.bolt; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.Date; import java.util.HashMap; import java.util.Map; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.suning.yystorm.business.adunion.AdUnionConsts; import com.suning.yystorm.comm.hbase.HbaseClient; import com.suning.yystorm.comm.hbase.HbaseClientImpl; import com.suning.yystorm.utils.Distinct; import com.suning.yystorm.utils.FixedMap; import com.suning.yystorm.utils.TupleHelpers; import backtype.storm.Config; import backtype.storm.task.OutputCollector; import backtype.storm.task.TopologyContext; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.topology.base.BaseRichBolt; import backtype.storm.tuple.Tuple; /** * * 〈一句话功能简述〉<br> * 〈功能详细描述〉 * * @author 15060063 * @see [相关类/方法](可选) * @since [产品/模块版本] (可选) */ @SuppressWarnings("rawtypes") public class UVCacheBolt extends BaseRichBolt { private static final long serialVersionUID = 3384989044692629267L; private static final Logger LOGGER = LoggerFactory.getLogger(UVCacheBolt.class); private OutputCollector collector = null; private FixedMap<String, Long> map = null; private Distinct distinct = null; private HbaseClient client = null; private DateFormat format; @Override public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) { this.collector = collector; this.map = new FixedMap<String, Long>(AdUnionConsts.MAP_COUNT_UV_MAX); this.distinct = new Distinct(AdUnionConsts.MAP_COUNT_UV_MAX, "yyyyMMdd"); this.client = new HbaseClientImpl(); this.format = new SimpleDateFormat("yyyyMMddHHmmss"); } @Override public void execute(Tuple tuple) { if (TupleHelpers.isTickTuple(tuple)) { if (!map.isEmpty()) { try { client.insertBatchDataByLongMd5(AdUnionConsts.TWEETER_UV, map, format.format(new Date()), "info", false); map.clear(); } catch (Exception e) { LOGGER.error("推客UV数UVCacheBolt存储异常", e); } } } else { try { String dimension = tuple.getStringByField("dimension"); String uvid = tuple.getStringByField("uvid"); long old = distinct.cardinality(dimension); distinct.easyOffer(dimension, uvid); long newCard = distinct.cardinality(dimension); if (old != newCard) { map.put(dimension, newCard); } } catch (Exception e) { LOGGER.info("tweeter UVCache异常", e); } this.collector.ack(tuple); } } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { } @Override public Map<String, Object> getComponentConfiguration() { Map<String, Object> conf = new HashMap<String, Object>(); conf.put(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, 10); return conf; } }
hour
package com.suning.yystorm.business.adunion.houruv.bolt; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.Date; import java.util.HashMap; import java.util.Map; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.suning.yystorm.business.adunion.AdUnionConsts; import com.suning.yystorm.comm.hbase.HbaseClient; import com.suning.yystorm.comm.hbase.HbaseClientImpl; import com.suning.yystorm.utils.Distinct; import com.suning.yystorm.utils.FixedMap; import com.suning.yystorm.utils.TupleHelpers; import backtype.storm.Config; import backtype.storm.task.OutputCollector; import backtype.storm.task.TopologyContext; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.topology.base.BaseRichBolt; import backtype.storm.tuple.Tuple; /** * * 〈一句话功能简述〉<br> * 〈功能详细描述〉 * * @author 15060063 * @see [相关类/方法](可选) * @since [产品/模块版本] (可选) */ @SuppressWarnings("rawtypes") public class HourUVCacheBolt extends BaseRichBolt { private static final long serialVersionUID = 3384989044692629267L; private static final Logger LOGGER = LoggerFactory.getLogger(HourUVCacheBolt.class); private OutputCollector collector = null; private FixedMap<String, Long> map = null; private Distinct distinct = null; private HbaseClient client = null; private DateFormat format; @Override public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) { this.collector = collector; this.map = new FixedMap<String, Long>(AdUnionConsts.MAP_COUNT_UV_MAX); this.distinct = new Distinct(AdUnionConsts.MAP_COUNT_UV_MAX, "yyyyMMddHH"); this.client = new HbaseClientImpl(); this.format = new SimpleDateFormat("yyyyMMddHHmmss"); } @Override public void execute(Tuple tuple) { if (TupleHelpers.isTickTuple(tuple)) { if (!map.isEmpty()) { try { client.insertBatchDataByLongMd5(AdUnionConsts.TWEETER_HOUR_UV, map, format.format(new Date()), "info", false); map.clear(); } catch (Exception e) { LOGGER.error("推客hour UV数HourUVCacheBolt存储异常", e); } } } else { try { String dimension = tuple.getStringByField("dimension"); String uvid = tuple.getStringByField("uvid"); long old = distinct.cardinality(dimension); distinct.easyOffer(dimension, uvid); long newCard = distinct.cardinality(dimension); if (old != newCard) { map.put(dimension, newCard); } } catch (Exception e) { LOGGER.info("tweeter hour UVCache异常", e); } this.collector.ack(tuple); } } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { } @Override public Map<String, Object> getComponentConfiguration() { Map<String, Object> conf = new HashMap<String, Object>(); conf.put(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, 10); return conf; } }
3.去重
package com.suning.yystorm.utils; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.Date; import com.clearspring.analytics.stream.cardinality.AdaptiveCounting; import com.clearspring.analytics.stream.cardinality.ICardinality; public class Distinct { private FixedMap<String, ICardinality> cardinalityMap = null; private DateFormat format = null; private String time = null; /** * 在不同时间范围内排重,eg:按天排重,按小时排重等 * * @param size * @param _format 格式化日期:eg:yyyyMMdd,yyyyMMddHH */ public Distinct(int size, String _format) { cardinalityMap = new FixedMap<String, ICardinality>(size); format = new SimpleDateFormat(_format); time = format.format(new Date()); } /** * * 功能描述: <br> * 〈功能详细描述〉 * * @param dimension * @param keyword * @return 重复:false,不重复:true * @see [相关类/方法](可选) * @since [产品/模块版本](可选) */ public boolean offer(String dimension, String keyword) { String _time = format.format(new Date()); if (!time.equalsIgnoreCase(_time)) { cardinalityMap.clear(); time = _time; } ICardinality cardinality = cardinalityMap.get(dimension); if (cardinality == null) { synchronized (cardinalityMap) { if (cardinality == null) { cardinalityMap.put(dimension, AdaptiveCounting.Builder.obyCount(Integer.MAX_VALUE).build()); } } } cardinality = cardinalityMap.get(dimension); return cardinality.offer(keyword); } public boolean easyOffer(String dimension, String keyword) { String today = format.format(new Date()); if (!time.equals(today)) { cardinalityMap.clear(); time = today; } ICardinality cardinality = cardinalityMap.get(dimension); if (cardinality == null) { cardinalityMap.put(dimension, AdaptiveCounting.Builder.obyCount(Integer.MAX_VALUE).build()); } cardinality = cardinalityMap.get(dimension); return cardinality.offer(keyword); } public long cardinality(String dimension) { ICardinality card = cardinalityMap.get(dimension); if (null != card) { return card.cardinality(); } else { return 0; } } public ICardinality getCard(String dimension) { String _time = format.format(new Date()); if (!time.equalsIgnoreCase(_time)) { cardinalityMap.clear(); time = _time; } ICardinality cardinality = cardinalityMap.get(dimension); if (cardinality == null) { synchronized (cardinalityMap) { if (cardinalityMap.get(dimension) == null) { cardinalityMap.put(dimension, AdaptiveCounting.Builder.obyCount(Integer.MAX_VALUE).build()); } } } cardinality = cardinalityMap.get(dimension); return cardinality; } public boolean setCard(String dimension, ICardinality cardinality) { cardinalityMap.put(dimension, cardinality); return true; } public FixedMap<String, ICardinality> getCardinalityMap() { return cardinalityMap; } }
posted on 2019-05-17 19:36 cxhfuujust 阅读(404) 评论(0) 编辑 收藏 举报