Flink读写Redis(二)-flink-redis-connector代码学习

源码结构

RedisSink

package org.apache.flink.streaming.connectors.redis;

import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisClusterConfig;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisConfigBase;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisSentinelConfig;
import org.apache.flink.streaming.connectors.redis.common.container.RedisCommandsContainer;
import org.apache.flink.streaming.connectors.redis.common.container.RedisCommandsContainerBuilder;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommand;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisDataType;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommandDescription;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisMapper;

import org.apache.flink.util.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;

/**
 * A sink that delivers data to a Redis channel using the Jedis client.
 * <p> The sink takes two arguments {@link FlinkJedisConfigBase} and {@link RedisMapper}.
 * <p> When {@link FlinkJedisPoolConfig} is passed as the first argument,
 * the sink will create connection using {@link redis.clients.jedis.JedisPool}. Please use this when
 * you want to connect to a single Redis server.
 * <p> When {@link FlinkJedisSentinelConfig} is passed as the first argument, the sink will create connection
 * using {@link redis.clients.jedis.JedisSentinelPool}. Please use this when you want to connect to Sentinel.
 * <p> Please use {@link FlinkJedisClusterConfig} as the first argument if you want to connect to
 * a Redis Cluster.
 *
 * <p>Example:
 *
 * <pre>
 *{@code
 *public static class RedisExampleMapper implements RedisMapper<Tuple2<String, String>> {
 *
 *	private RedisCommand redisCommand;
 *
 *	public RedisExampleMapper(RedisCommand redisCommand){
 *		this.redisCommand = redisCommand;
 *	}
 *	public RedisCommandDescription getCommandDescription() {
 *		return new RedisCommandDescription(redisCommand, REDIS_ADDITIONAL_KEY);
 *	}
 *	public String getKeyFromData(Tuple2<String, String> data) {
 *		return data.f0;
 *	}
 *	public String getValueFromData(Tuple2<String, String> data) {
 *		return data.f1;
 *	}
 *}
 *JedisPoolConfig jedisPoolConfig = new JedisPoolConfig.Builder()
 *    .setHost(REDIS_HOST).setPort(REDIS_PORT).build();
 *new RedisSink<String>(jedisPoolConfig, new RedisExampleMapper(RedisCommand.LPUSH));
 *}</pre>
 *
 * @param <IN> Type of the elements emitted by this sink
 */
public class RedisSink<IN> extends RichSinkFunction<IN> {

	private static final long serialVersionUID = 1L;

	private static final Logger LOG = LoggerFactory.getLogger(RedisSink.class);

	/**
	 * This additional key needed for {@link RedisDataType#HASH} and {@link RedisDataType#SORTED_SET}.
	 * Other {@link RedisDataType} works only with two variable i.e. name of the list and value to be added.
	 * But for {@link RedisDataType#HASH} and {@link RedisDataType#SORTED_SET} we need three variables.
	 * <p>For {@link RedisDataType#HASH} we need hash name, hash key and element.
	 * {@code additionalKey} used as hash name for {@link RedisDataType#HASH}
	 * <p>For {@link RedisDataType#SORTED_SET} we need set name, the element and it's score.
	 * {@code additionalKey} used as set name for {@link RedisDataType#SORTED_SET}
	 */
	private String additionalKey;
	private RedisMapper<IN> redisSinkMapper;
	private RedisCommand redisCommand;

	private FlinkJedisConfigBase flinkJedisConfigBase;
	private RedisCommandsContainer redisCommandsContainer;

	/**
	 * Creates a new {@link RedisSink} that connects to the Redis server.
	 *
	 * @param flinkJedisConfigBase The configuration of {@link FlinkJedisConfigBase}
	 * @param redisSinkMapper This is used to generate Redis command and key value from incoming elements.
	 */
	public RedisSink(FlinkJedisConfigBase flinkJedisConfigBase, RedisMapper<IN> redisSinkMapper) {
		Preconditions.checkNotNull(flinkJedisConfigBase, "Redis connection pool config should not be null");
		Preconditions.checkNotNull(redisSinkMapper, "Redis Mapper can not be null");
		Preconditions.checkNotNull(redisSinkMapper.getCommandDescription(), "Redis Mapper data type description can not be null");

		this.flinkJedisConfigBase = flinkJedisConfigBase;

		this.redisSinkMapper = redisSinkMapper;
		RedisCommandDescription redisCommandDescription = redisSinkMapper.getCommandDescription();
		this.redisCommand = redisCommandDescription.getCommand();
		this.additionalKey = redisCommandDescription.getAdditionalKey();
	}

	/**
	 * Called when new data arrives to the sink, and forwards it to Redis channel.
	 * Depending on the specified Redis data type (see {@link RedisDataType}),
	 * a different Redis command will be applied.
	 * Available commands are RPUSH, LPUSH, SADD, PUBLISH, SET, PFADD, HSET, ZADD.
	 *
	 * @param input The incoming data
	 */
	@Override
	public void invoke(IN input) throws Exception {
		String key = redisSinkMapper.getKeyFromData(input);
		String value = redisSinkMapper.getValueFromData(input);

		switch (redisCommand) {
			case RPUSH:
				this.redisCommandsContainer.rpush(key, value);
				break;
			case LPUSH:
				this.redisCommandsContainer.lpush(key, value);
				break;
			case SADD:
				this.redisCommandsContainer.sadd(key, value);
				break;
			case SET:
				this.redisCommandsContainer.set(key, value);
				break;
			case PFADD:
				this.redisCommandsContainer.pfadd(key, value);
				break;
			case PUBLISH:
				this.redisCommandsContainer.publish(key, value);
				break;
			case ZADD:
				this.redisCommandsContainer.zadd(this.additionalKey, value, key);
				break;
			case HSET:
				this.redisCommandsContainer.hset(this.additionalKey, key, value);
				break;
			default:
				throw new IllegalArgumentException("Cannot process such data type: " + redisCommand);
		}
	}

	/**
	 * Initializes the connection to Redis by either cluster or sentinels or single server.
	 *
	 * @throws IllegalArgumentException if jedisPoolConfig, jedisClusterConfig and jedisSentinelConfig are all null
     */
	@Override
	public void open(Configuration parameters) throws Exception {
		this.redisCommandsContainer = RedisCommandsContainerBuilder.build(this.flinkJedisConfigBase);
	}

	/**
	 * Closes commands container.
	 * @throws IOException if command container is unable to close.
	 */
	@Override
	public void close() throws IOException {
		if (redisCommandsContainer != null) {
			redisCommandsContainer.close();
		}
	}
}

RedisSink类继承了RichSinkFunction类,扩展了其中的open、invoke、close方法。open方法在sink打开时执行一次,在RedisSink中,其创建了一个RedisCommandsContainer对象,该对象其实是封装了对redis的操作,包含连接redis以及不同数据类型的写入操作;close方法中执行了RedisCommandsContainer对象的close方法,其实时关闭redis连接;每当数据流入时,就会调用invoke方法,该方法中根据不同的redis数据类型,调用RedisCommandsContainer对象的不同方法将数据写入redis。

FlinkJedisConfigBase

RedisSink的构造方法中需要传入一个FlinkJedisConfigBase对象,该对象主要是用来设置一些redis连接参数,比如IP、用户、密码、连接超时等信息,在后续创建RedisCommandsContainer对象时使用,FlinkJedisConfigBase是一个抽象类,具体的实现类有FlinkJedisPoolConfig、FlinkJedisSentinelConfig、FlinkJedisClusterConfig三种,分别对应了redis、redis哨兵、redis集群不同的连接模式,比较简单,就不贴代码了。

RedisMapper接口

RedisSink的构造方法中还需要RedisMapper实现对象,需要用户自定义类实现该接口,主要是用来定义数据如何映射成redis的key和value,另外是返回一个RedisDataTypeDescription对象,该对象其实是包含了操作的redis数据类型信息

package org.apache.flink.streaming.connectors.redis.common.mapper;

import org.apache.flink.api.common.functions.Function;

import java.io.Serializable;

/**
 * Function that creates the description how the input data should be mapped to redis type.
 *<p>Example:
 *<pre>{@code
 *private static class RedisTestMapper implements RedisMapper<Tuple2<String, String>> {
 *    public RedisDataTypeDescription getCommandDescription() {
 *        return new RedisDataTypeDescription(RedisCommand.PUBLISH);
 *    }
 *    public String getKeyFromData(Tuple2<String, String> data) {
 *        return data.f0;
 *    }
 *    public String getValueFromData(Tuple2<String, String> data) {
 *        return data.f1;
 *    }
 *}
 *}</pre>
 *
 * @param <T> The type of the element handled by this {@code RedisMapper}
 */
public interface RedisMapper<T> extends Function, Serializable {

	/**
	 * Returns descriptor which defines data type.
	 *
	 * @return data type descriptor
	 */
	RedisCommandDescription getCommandDescription();

	/**
	 * Extracts key from data.
	 *
	 * @param data source data
	 * @return key
	 */
	String getKeyFromData(T data);

	/**
	 * Extracts value from data.
	 *
	 * @param data source data
	 * @return value
	 */
	String getValueFromData(T data);
}

RedisCommandDescription、RedisDataType、RedisCommand

RedisCommandDescription、RedisDataType、RedisCommand三个类用了表示操作的redis数据类型,比较简单,不在描述,支持的数据类型在RedisDataType枚举中

package org.apache.flink.streaming.connectors.redis.common.mapper;

/**
 * All available data type for Redis.
 */
public enum RedisDataType {

	/**
	 * Strings are the most basic kind of Redis value. Redis Strings are binary safe,
	 * this means that a Redis string can contain any kind of data, for instance a JPEG image or a serialized Ruby object.
	 * A String value can be at max 512 Megabytes in length.
	 */
	STRING,

	/**
	 * Redis Hashes are maps between string fields and string values.
	 */
	HASH,

	/**
	 * Redis Lists are simply lists of strings, sorted by insertion order.
	 */
	LIST,

	/**
	 * Redis Sets are an unordered collection of Strings.
	 */
	SET,

	/**
	 * Redis Sorted Sets are, similarly to Redis Sets, non repeating collections of Strings.
	 * The difference is that every member of a Sorted Set is associated with score,
	 * that is used in order to take the sorted set ordered, from the smallest to the greatest score.
	 * While members are unique, scores may be repeated.
	 */
	SORTED_SET,

	/**
	 * HyperLogLog is a probabilistic data structure used in order to count unique things.
	 */
	HYPER_LOG_LOG,

	/**
	 * Redis implementation of publish and subscribe paradigm. Published messages are characterized into channels,
	 * without knowledge of what (if any) subscribers there may be.
	 * Subscribers express interest in one or more channels, and only receive messages
	 * that are of interest, without knowledge of what (if any) publishers there are.
	 */
	PUBSUB
}

RedisCommandsContainer

RedisCommandsContainer接口定义了redis操作的方法,具体的实现类有RedisContainer、RedisClusterContainer,其中RedisContainer是用在直连redis和redis哨兵模式中,而RedisClusterContainer是用在集群模式中,具体代码不贴了,里面主要是调用了Jedis的API。

package org.apache.flink.streaming.connectors.redis.common.container;

import java.io.IOException;
import java.io.Serializable;

/**
 * The container for all available Redis commands.
 */
public interface RedisCommandsContainer extends Serializable {

	/**
	 * Sets field in the hash stored at key to value.
	 * If key does not exist, a new key holding a hash is created.
	 * If field already exists in the hash, it is overwritten.
	 *
	 * @param key Hash name
	 * @param hashField Hash field
	 * @param value Hash value
	 */
	void hset(String key, String hashField, String value);

	/**
	 * Insert the specified value at the tail of the list stored at key.
	 * If key does not exist, it is created as empty list before performing the push operation.
	 *
	 * @param listName Name of the List
	 * @param value  Value to be added
	 */
	void rpush(String listName, String value);

	/**
	 * Insert the specified value at the head of the list stored at key.
	 * If key does not exist, it is created as empty list before performing the push operation.
	 *
	 * @param listName Name of the List
	 * @param value  Value to be added
	 */
	void lpush(String listName, String value);

	/**
	 * Add the specified member to the set stored at key.
	 * Specified members that are already a member of this set are ignored.
	 * If key does not exist, a new set is created before adding the specified members.
	 *
	 * @param setName Name of the Set
	 * @param value Value to be added
	 */
	void sadd(String setName, String value);

	/**
	 * Posts a message to the given channel.
	 *
	 * @param channelName Name of the channel to which data will be published
	 * @param message the message
	 */
	void publish(String channelName, String message);

	/**
	 * Set key to hold the string value. If key already holds a value, it is overwritten,
	 * regardless of its type. Any previous time to live associated with the key is
	 * discarded on successful SET operation.
	 *
	 * @param key the key name in which value to be set
	 * @param value the value
	 */
	void set(String key, String value);

	/**
	 * Adds all the element arguments to the HyperLogLog data structure
	 * stored at the variable name specified as first argument.
	 *
	 * @param key The name of the key
	 * @param element the element
	 */
	void pfadd(String key, String element);

	/**
	 * Adds the specified member with the specified scores to the sorted set stored at key.
	 *
	 * @param key The name of the Sorted Set
	 * @param score Score of the element
	 * @param element  element to be added
	 */
	void zadd(String key, String score, String element);

	/**
	 * Close the Jedis container.
	 *
	 * @throws IOException if the instance can not be closed properly
	 */
	void close() throws IOException;
}

RedisCommandsContainerBuilder

RedisCommandsContainerBuilder类是根据不同的FlinkJedisConfigBase实现类来创建不同的RedisCommandsContainer对象

package org.apache.flink.streaming.connectors.redis.common.container;

import org.apache.commons.pool2.impl.GenericObjectPoolConfig;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisClusterConfig;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisConfigBase;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisSentinelConfig;
import org.apache.flink.util.Preconditions;
import redis.clients.jedis.JedisCluster;
import redis.clients.jedis.JedisPool;
import redis.clients.jedis.JedisSentinelPool;

/**
 * The builder for {@link RedisCommandsContainer}.
 */
public class RedisCommandsContainerBuilder {

	/**
	 * Initialize the {@link RedisCommandsContainer} based on the instance type.
	 * @param flinkJedisConfigBase configuration base
	 * @return @throws IllegalArgumentException if jedisPoolConfig, jedisClusterConfig and jedisSentinelConfig are all null
     */
	public static RedisCommandsContainer build(FlinkJedisConfigBase flinkJedisConfigBase){
		if(flinkJedisConfigBase instanceof FlinkJedisPoolConfig){
			FlinkJedisPoolConfig flinkJedisPoolConfig = (FlinkJedisPoolConfig) flinkJedisConfigBase;
			return RedisCommandsContainerBuilder.build(flinkJedisPoolConfig);
		} else if (flinkJedisConfigBase instanceof FlinkJedisClusterConfig) {
			FlinkJedisClusterConfig flinkJedisClusterConfig = (FlinkJedisClusterConfig) flinkJedisConfigBase;
			return RedisCommandsContainerBuilder.build(flinkJedisClusterConfig);
		} else if (flinkJedisConfigBase instanceof FlinkJedisSentinelConfig) {
			FlinkJedisSentinelConfig flinkJedisSentinelConfig = (FlinkJedisSentinelConfig) flinkJedisConfigBase;
			return RedisCommandsContainerBuilder.build(flinkJedisSentinelConfig);
		} else {
			throw new IllegalArgumentException("Jedis configuration not found");
		}
	}

	/**
	 * Builds container for single Redis environment.
	 *
	 * @param jedisPoolConfig configuration for JedisPool
	 * @return container for single Redis environment
	 * @throws NullPointerException if jedisPoolConfig is null
	 */
	public static RedisCommandsContainer build(FlinkJedisPoolConfig jedisPoolConfig) {
		Preconditions.checkNotNull(jedisPoolConfig, "Redis pool config should not be Null");

		GenericObjectPoolConfig genericObjectPoolConfig = new GenericObjectPoolConfig();
		genericObjectPoolConfig.setMaxIdle(jedisPoolConfig.getMaxIdle());
		genericObjectPoolConfig.setMaxTotal(jedisPoolConfig.getMaxTotal());
		genericObjectPoolConfig.setMinIdle(jedisPoolConfig.getMinIdle());

		JedisPool jedisPool = new JedisPool(genericObjectPoolConfig, jedisPoolConfig.getHost(),
			jedisPoolConfig.getPort(), jedisPoolConfig.getConnectionTimeout(), jedisPoolConfig.getPassword(),
			jedisPoolConfig.getDatabase());
		return new RedisContainer(jedisPool);
	}

	/**
	 * Builds container for Redis Cluster environment.
	 *
	 * @param jedisClusterConfig configuration for JedisCluster
	 * @return container for Redis Cluster environment
	 * @throws NullPointerException if jedisClusterConfig is null
	 */
	public static RedisCommandsContainer build(FlinkJedisClusterConfig jedisClusterConfig) {
		Preconditions.checkNotNull(jedisClusterConfig, "Redis cluster config should not be Null");

		GenericObjectPoolConfig genericObjectPoolConfig = new GenericObjectPoolConfig();
		genericObjectPoolConfig.setMaxIdle(jedisClusterConfig.getMaxIdle());
		genericObjectPoolConfig.setMaxTotal(jedisClusterConfig.getMaxTotal());
		genericObjectPoolConfig.setMinIdle(jedisClusterConfig.getMinIdle());

		JedisCluster jedisCluster = new JedisCluster(jedisClusterConfig.getNodes(), jedisClusterConfig.getConnectionTimeout(),
			jedisClusterConfig.getMaxRedirections(), genericObjectPoolConfig);
		return new RedisClusterContainer(jedisCluster);
	}

	/**
	 * Builds container for Redis Sentinel environment.
	 *
	 * @param jedisSentinelConfig configuration for JedisSentinel
	 * @return container for Redis sentinel environment
	 * @throws NullPointerException if jedisSentinelConfig is null
	 */
	public static RedisCommandsContainer build(FlinkJedisSentinelConfig jedisSentinelConfig) {
		Preconditions.checkNotNull(jedisSentinelConfig, "Redis sentinel config should not be Null");

		GenericObjectPoolConfig genericObjectPoolConfig = new GenericObjectPoolConfig();
		genericObjectPoolConfig.setMaxIdle(jedisSentinelConfig.getMaxIdle());
		genericObjectPoolConfig.setMaxTotal(jedisSentinelConfig.getMaxTotal());
		genericObjectPoolConfig.setMinIdle(jedisSentinelConfig.getMinIdle());

		JedisSentinelPool jedisSentinelPool = new JedisSentinelPool(jedisSentinelConfig.getMasterName(),
			jedisSentinelConfig.getSentinels(), genericObjectPoolConfig,
			jedisSentinelConfig.getConnectionTimeout(), jedisSentinelConfig.getSoTimeout(),
			jedisSentinelConfig.getPassword(), jedisSentinelConfig.getDatabase());
		return new RedisContainer(jedisSentinelPool);
	}
}

整体下来,flink-redis-connector源码比较简洁,可以作为自定义flink sink的入门学习。

posted @ 2020-10-28 21:48  远去的列车  阅读(3991)  评论(0编辑  收藏  举报