Flink读写Redis(二)-flink-redis-connector代码学习
源码结构
RedisSink
package org.apache.flink.streaming.connectors.redis;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisClusterConfig;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisConfigBase;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisSentinelConfig;
import org.apache.flink.streaming.connectors.redis.common.container.RedisCommandsContainer;
import org.apache.flink.streaming.connectors.redis.common.container.RedisCommandsContainerBuilder;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommand;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisDataType;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommandDescription;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisMapper;
import org.apache.flink.util.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
/**
* A sink that delivers data to a Redis channel using the Jedis client.
* <p> The sink takes two arguments {@link FlinkJedisConfigBase} and {@link RedisMapper}.
* <p> When {@link FlinkJedisPoolConfig} is passed as the first argument,
* the sink will create connection using {@link redis.clients.jedis.JedisPool}. Please use this when
* you want to connect to a single Redis server.
* <p> When {@link FlinkJedisSentinelConfig} is passed as the first argument, the sink will create connection
* using {@link redis.clients.jedis.JedisSentinelPool}. Please use this when you want to connect to Sentinel.
* <p> Please use {@link FlinkJedisClusterConfig} as the first argument if you want to connect to
* a Redis Cluster.
*
* <p>Example:
*
* <pre>
*{@code
*public static class RedisExampleMapper implements RedisMapper<Tuple2<String, String>> {
*
* private RedisCommand redisCommand;
*
* public RedisExampleMapper(RedisCommand redisCommand){
* this.redisCommand = redisCommand;
* }
* public RedisCommandDescription getCommandDescription() {
* return new RedisCommandDescription(redisCommand, REDIS_ADDITIONAL_KEY);
* }
* public String getKeyFromData(Tuple2<String, String> data) {
* return data.f0;
* }
* public String getValueFromData(Tuple2<String, String> data) {
* return data.f1;
* }
*}
*JedisPoolConfig jedisPoolConfig = new JedisPoolConfig.Builder()
* .setHost(REDIS_HOST).setPort(REDIS_PORT).build();
*new RedisSink<String>(jedisPoolConfig, new RedisExampleMapper(RedisCommand.LPUSH));
*}</pre>
*
* @param <IN> Type of the elements emitted by this sink
*/
public class RedisSink<IN> extends RichSinkFunction<IN> {
private static final long serialVersionUID = 1L;
private static final Logger LOG = LoggerFactory.getLogger(RedisSink.class);
/**
* This additional key needed for {@link RedisDataType#HASH} and {@link RedisDataType#SORTED_SET}.
* Other {@link RedisDataType} works only with two variable i.e. name of the list and value to be added.
* But for {@link RedisDataType#HASH} and {@link RedisDataType#SORTED_SET} we need three variables.
* <p>For {@link RedisDataType#HASH} we need hash name, hash key and element.
* {@code additionalKey} used as hash name for {@link RedisDataType#HASH}
* <p>For {@link RedisDataType#SORTED_SET} we need set name, the element and it's score.
* {@code additionalKey} used as set name for {@link RedisDataType#SORTED_SET}
*/
private String additionalKey;
private RedisMapper<IN> redisSinkMapper;
private RedisCommand redisCommand;
private FlinkJedisConfigBase flinkJedisConfigBase;
private RedisCommandsContainer redisCommandsContainer;
/**
* Creates a new {@link RedisSink} that connects to the Redis server.
*
* @param flinkJedisConfigBase The configuration of {@link FlinkJedisConfigBase}
* @param redisSinkMapper This is used to generate Redis command and key value from incoming elements.
*/
public RedisSink(FlinkJedisConfigBase flinkJedisConfigBase, RedisMapper<IN> redisSinkMapper) {
Preconditions.checkNotNull(flinkJedisConfigBase, "Redis connection pool config should not be null");
Preconditions.checkNotNull(redisSinkMapper, "Redis Mapper can not be null");
Preconditions.checkNotNull(redisSinkMapper.getCommandDescription(), "Redis Mapper data type description can not be null");
this.flinkJedisConfigBase = flinkJedisConfigBase;
this.redisSinkMapper = redisSinkMapper;
RedisCommandDescription redisCommandDescription = redisSinkMapper.getCommandDescription();
this.redisCommand = redisCommandDescription.getCommand();
this.additionalKey = redisCommandDescription.getAdditionalKey();
}
/**
* Called when new data arrives to the sink, and forwards it to Redis channel.
* Depending on the specified Redis data type (see {@link RedisDataType}),
* a different Redis command will be applied.
* Available commands are RPUSH, LPUSH, SADD, PUBLISH, SET, PFADD, HSET, ZADD.
*
* @param input The incoming data
*/
@Override
public void invoke(IN input) throws Exception {
String key = redisSinkMapper.getKeyFromData(input);
String value = redisSinkMapper.getValueFromData(input);
switch (redisCommand) {
case RPUSH:
this.redisCommandsContainer.rpush(key, value);
break;
case LPUSH:
this.redisCommandsContainer.lpush(key, value);
break;
case SADD:
this.redisCommandsContainer.sadd(key, value);
break;
case SET:
this.redisCommandsContainer.set(key, value);
break;
case PFADD:
this.redisCommandsContainer.pfadd(key, value);
break;
case PUBLISH:
this.redisCommandsContainer.publish(key, value);
break;
case ZADD:
this.redisCommandsContainer.zadd(this.additionalKey, value, key);
break;
case HSET:
this.redisCommandsContainer.hset(this.additionalKey, key, value);
break;
default:
throw new IllegalArgumentException("Cannot process such data type: " + redisCommand);
}
}
/**
* Initializes the connection to Redis by either cluster or sentinels or single server.
*
* @throws IllegalArgumentException if jedisPoolConfig, jedisClusterConfig and jedisSentinelConfig are all null
*/
@Override
public void open(Configuration parameters) throws Exception {
this.redisCommandsContainer = RedisCommandsContainerBuilder.build(this.flinkJedisConfigBase);
}
/**
* Closes commands container.
* @throws IOException if command container is unable to close.
*/
@Override
public void close() throws IOException {
if (redisCommandsContainer != null) {
redisCommandsContainer.close();
}
}
}
RedisSink类继承了RichSinkFunction类,扩展了其中的open、invoke、close方法。open方法在sink打开时执行一次,在RedisSink中,其创建了一个RedisCommandsContainer对象,该对象其实是封装了对redis的操作,包含连接redis以及不同数据类型的写入操作;close方法中执行了RedisCommandsContainer对象的close方法,其实时关闭redis连接;每当数据流入时,就会调用invoke方法,该方法中根据不同的redis数据类型,调用RedisCommandsContainer对象的不同方法将数据写入redis。
FlinkJedisConfigBase
RedisSink的构造方法中需要传入一个FlinkJedisConfigBase对象,该对象主要是用来设置一些redis连接参数,比如IP、用户、密码、连接超时等信息,在后续创建RedisCommandsContainer对象时使用,FlinkJedisConfigBase是一个抽象类,具体的实现类有FlinkJedisPoolConfig、FlinkJedisSentinelConfig、FlinkJedisClusterConfig三种,分别对应了redis、redis哨兵、redis集群不同的连接模式,比较简单,就不贴代码了。
RedisMapper接口
RedisSink的构造方法中还需要RedisMapper实现对象,需要用户自定义类实现该接口,主要是用来定义数据如何映射成redis的key和value,另外是返回一个RedisDataTypeDescription对象,该对象其实是包含了操作的redis数据类型信息
package org.apache.flink.streaming.connectors.redis.common.mapper;
import org.apache.flink.api.common.functions.Function;
import java.io.Serializable;
/**
* Function that creates the description how the input data should be mapped to redis type.
*<p>Example:
*<pre>{@code
*private static class RedisTestMapper implements RedisMapper<Tuple2<String, String>> {
* public RedisDataTypeDescription getCommandDescription() {
* return new RedisDataTypeDescription(RedisCommand.PUBLISH);
* }
* public String getKeyFromData(Tuple2<String, String> data) {
* return data.f0;
* }
* public String getValueFromData(Tuple2<String, String> data) {
* return data.f1;
* }
*}
*}</pre>
*
* @param <T> The type of the element handled by this {@code RedisMapper}
*/
public interface RedisMapper<T> extends Function, Serializable {
/**
* Returns descriptor which defines data type.
*
* @return data type descriptor
*/
RedisCommandDescription getCommandDescription();
/**
* Extracts key from data.
*
* @param data source data
* @return key
*/
String getKeyFromData(T data);
/**
* Extracts value from data.
*
* @param data source data
* @return value
*/
String getValueFromData(T data);
}
RedisCommandDescription、RedisDataType、RedisCommand
RedisCommandDescription、RedisDataType、RedisCommand三个类用了表示操作的redis数据类型,比较简单,不在描述,支持的数据类型在RedisDataType枚举中
package org.apache.flink.streaming.connectors.redis.common.mapper;
/**
* All available data type for Redis.
*/
public enum RedisDataType {
/**
* Strings are the most basic kind of Redis value. Redis Strings are binary safe,
* this means that a Redis string can contain any kind of data, for instance a JPEG image or a serialized Ruby object.
* A String value can be at max 512 Megabytes in length.
*/
STRING,
/**
* Redis Hashes are maps between string fields and string values.
*/
HASH,
/**
* Redis Lists are simply lists of strings, sorted by insertion order.
*/
LIST,
/**
* Redis Sets are an unordered collection of Strings.
*/
SET,
/**
* Redis Sorted Sets are, similarly to Redis Sets, non repeating collections of Strings.
* The difference is that every member of a Sorted Set is associated with score,
* that is used in order to take the sorted set ordered, from the smallest to the greatest score.
* While members are unique, scores may be repeated.
*/
SORTED_SET,
/**
* HyperLogLog is a probabilistic data structure used in order to count unique things.
*/
HYPER_LOG_LOG,
/**
* Redis implementation of publish and subscribe paradigm. Published messages are characterized into channels,
* without knowledge of what (if any) subscribers there may be.
* Subscribers express interest in one or more channels, and only receive messages
* that are of interest, without knowledge of what (if any) publishers there are.
*/
PUBSUB
}
RedisCommandsContainer
RedisCommandsContainer接口定义了redis操作的方法,具体的实现类有RedisContainer、RedisClusterContainer,其中RedisContainer是用在直连redis和redis哨兵模式中,而RedisClusterContainer是用在集群模式中,具体代码不贴了,里面主要是调用了Jedis的API。
package org.apache.flink.streaming.connectors.redis.common.container;
import java.io.IOException;
import java.io.Serializable;
/**
* The container for all available Redis commands.
*/
public interface RedisCommandsContainer extends Serializable {
/**
* Sets field in the hash stored at key to value.
* If key does not exist, a new key holding a hash is created.
* If field already exists in the hash, it is overwritten.
*
* @param key Hash name
* @param hashField Hash field
* @param value Hash value
*/
void hset(String key, String hashField, String value);
/**
* Insert the specified value at the tail of the list stored at key.
* If key does not exist, it is created as empty list before performing the push operation.
*
* @param listName Name of the List
* @param value Value to be added
*/
void rpush(String listName, String value);
/**
* Insert the specified value at the head of the list stored at key.
* If key does not exist, it is created as empty list before performing the push operation.
*
* @param listName Name of the List
* @param value Value to be added
*/
void lpush(String listName, String value);
/**
* Add the specified member to the set stored at key.
* Specified members that are already a member of this set are ignored.
* If key does not exist, a new set is created before adding the specified members.
*
* @param setName Name of the Set
* @param value Value to be added
*/
void sadd(String setName, String value);
/**
* Posts a message to the given channel.
*
* @param channelName Name of the channel to which data will be published
* @param message the message
*/
void publish(String channelName, String message);
/**
* Set key to hold the string value. If key already holds a value, it is overwritten,
* regardless of its type. Any previous time to live associated with the key is
* discarded on successful SET operation.
*
* @param key the key name in which value to be set
* @param value the value
*/
void set(String key, String value);
/**
* Adds all the element arguments to the HyperLogLog data structure
* stored at the variable name specified as first argument.
*
* @param key The name of the key
* @param element the element
*/
void pfadd(String key, String element);
/**
* Adds the specified member with the specified scores to the sorted set stored at key.
*
* @param key The name of the Sorted Set
* @param score Score of the element
* @param element element to be added
*/
void zadd(String key, String score, String element);
/**
* Close the Jedis container.
*
* @throws IOException if the instance can not be closed properly
*/
void close() throws IOException;
}
RedisCommandsContainerBuilder
RedisCommandsContainerBuilder类是根据不同的FlinkJedisConfigBase实现类来创建不同的RedisCommandsContainer对象
package org.apache.flink.streaming.connectors.redis.common.container;
import org.apache.commons.pool2.impl.GenericObjectPoolConfig;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisClusterConfig;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisConfigBase;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisSentinelConfig;
import org.apache.flink.util.Preconditions;
import redis.clients.jedis.JedisCluster;
import redis.clients.jedis.JedisPool;
import redis.clients.jedis.JedisSentinelPool;
/**
* The builder for {@link RedisCommandsContainer}.
*/
public class RedisCommandsContainerBuilder {
/**
* Initialize the {@link RedisCommandsContainer} based on the instance type.
* @param flinkJedisConfigBase configuration base
* @return @throws IllegalArgumentException if jedisPoolConfig, jedisClusterConfig and jedisSentinelConfig are all null
*/
public static RedisCommandsContainer build(FlinkJedisConfigBase flinkJedisConfigBase){
if(flinkJedisConfigBase instanceof FlinkJedisPoolConfig){
FlinkJedisPoolConfig flinkJedisPoolConfig = (FlinkJedisPoolConfig) flinkJedisConfigBase;
return RedisCommandsContainerBuilder.build(flinkJedisPoolConfig);
} else if (flinkJedisConfigBase instanceof FlinkJedisClusterConfig) {
FlinkJedisClusterConfig flinkJedisClusterConfig = (FlinkJedisClusterConfig) flinkJedisConfigBase;
return RedisCommandsContainerBuilder.build(flinkJedisClusterConfig);
} else if (flinkJedisConfigBase instanceof FlinkJedisSentinelConfig) {
FlinkJedisSentinelConfig flinkJedisSentinelConfig = (FlinkJedisSentinelConfig) flinkJedisConfigBase;
return RedisCommandsContainerBuilder.build(flinkJedisSentinelConfig);
} else {
throw new IllegalArgumentException("Jedis configuration not found");
}
}
/**
* Builds container for single Redis environment.
*
* @param jedisPoolConfig configuration for JedisPool
* @return container for single Redis environment
* @throws NullPointerException if jedisPoolConfig is null
*/
public static RedisCommandsContainer build(FlinkJedisPoolConfig jedisPoolConfig) {
Preconditions.checkNotNull(jedisPoolConfig, "Redis pool config should not be Null");
GenericObjectPoolConfig genericObjectPoolConfig = new GenericObjectPoolConfig();
genericObjectPoolConfig.setMaxIdle(jedisPoolConfig.getMaxIdle());
genericObjectPoolConfig.setMaxTotal(jedisPoolConfig.getMaxTotal());
genericObjectPoolConfig.setMinIdle(jedisPoolConfig.getMinIdle());
JedisPool jedisPool = new JedisPool(genericObjectPoolConfig, jedisPoolConfig.getHost(),
jedisPoolConfig.getPort(), jedisPoolConfig.getConnectionTimeout(), jedisPoolConfig.getPassword(),
jedisPoolConfig.getDatabase());
return new RedisContainer(jedisPool);
}
/**
* Builds container for Redis Cluster environment.
*
* @param jedisClusterConfig configuration for JedisCluster
* @return container for Redis Cluster environment
* @throws NullPointerException if jedisClusterConfig is null
*/
public static RedisCommandsContainer build(FlinkJedisClusterConfig jedisClusterConfig) {
Preconditions.checkNotNull(jedisClusterConfig, "Redis cluster config should not be Null");
GenericObjectPoolConfig genericObjectPoolConfig = new GenericObjectPoolConfig();
genericObjectPoolConfig.setMaxIdle(jedisClusterConfig.getMaxIdle());
genericObjectPoolConfig.setMaxTotal(jedisClusterConfig.getMaxTotal());
genericObjectPoolConfig.setMinIdle(jedisClusterConfig.getMinIdle());
JedisCluster jedisCluster = new JedisCluster(jedisClusterConfig.getNodes(), jedisClusterConfig.getConnectionTimeout(),
jedisClusterConfig.getMaxRedirections(), genericObjectPoolConfig);
return new RedisClusterContainer(jedisCluster);
}
/**
* Builds container for Redis Sentinel environment.
*
* @param jedisSentinelConfig configuration for JedisSentinel
* @return container for Redis sentinel environment
* @throws NullPointerException if jedisSentinelConfig is null
*/
public static RedisCommandsContainer build(FlinkJedisSentinelConfig jedisSentinelConfig) {
Preconditions.checkNotNull(jedisSentinelConfig, "Redis sentinel config should not be Null");
GenericObjectPoolConfig genericObjectPoolConfig = new GenericObjectPoolConfig();
genericObjectPoolConfig.setMaxIdle(jedisSentinelConfig.getMaxIdle());
genericObjectPoolConfig.setMaxTotal(jedisSentinelConfig.getMaxTotal());
genericObjectPoolConfig.setMinIdle(jedisSentinelConfig.getMinIdle());
JedisSentinelPool jedisSentinelPool = new JedisSentinelPool(jedisSentinelConfig.getMasterName(),
jedisSentinelConfig.getSentinels(), genericObjectPoolConfig,
jedisSentinelConfig.getConnectionTimeout(), jedisSentinelConfig.getSoTimeout(),
jedisSentinelConfig.getPassword(), jedisSentinelConfig.getDatabase());
return new RedisContainer(jedisSentinelPool);
}
}
整体下来,flink-redis-connector源码比较简洁,可以作为自定义flink sink的入门学习。