Flink – metrics V1.2
WebRuntimeMonitor
.GET("/jobs/:jobid/vertices/:vertexid/metrics", handler(new JobVertexMetricsHandler(metricFetcher)))
.GET("/jobs/:jobid/metrics", handler(new JobMetricsHandler(metricFetcher)))
.GET("/taskmanagers/:" + TaskManagersHandler.TASK_MANAGER_ID_KEY + "/metrics", handler(new TaskManagerMetricsHandler(metricFetcher)))
.GET("/jobmanager/metrics", handler(new JobManagerMetricsHandler(metricFetcher)))
JobVertexMetricsHandler
AbstractMetricsHandler
MetricFetcher
核心就是fetchMetrics函数,会从JobManager获取数据,
private void fetchMetrics() { try { Option<scala.Tuple2<ActorGateway, Integer>> jobManagerGatewayAndWebPort = retriever.getJobManagerGatewayAndWebPort(); if (jobManagerGatewayAndWebPort.isDefined()) { ActorGateway jobManager = jobManagerGatewayAndWebPort.get()._1(); //得到JobManager的ActorGateway /** * Remove all metrics that belong to a job that is not running and no longer archived. */ Future<Object> jobDetailsFuture = jobManager.ask(new RequestJobDetails(true, true), timeout); //生成request获取job状态 jobDetailsFuture .onSuccess(new OnSuccess<Object>() { @Override public void onSuccess(Object result) throws Throwable { MultipleJobsDetails details = (MultipleJobsDetails) result; ArrayList<String> toRetain = new ArrayList<>(); for (JobDetails job : details.getRunningJobs()) { toRetain.add(job.getJobId().toString()); } for (JobDetails job : details.getFinishedJobs()) { toRetain.add(job.getJobId().toString()); } synchronized (metrics) { metrics.jobs.keySet().retainAll(toRetain); //只保留Runing和Finished的job,即不正常的都删掉 } } }, ctx); logErrorOnFailure(jobDetailsFuture, "Fetching of JobDetails failed."); String jobManagerPath = jobManager.path(); String queryServicePath = jobManagerPath.substring(0, jobManagerPath.lastIndexOf('/') + 1) + MetricQueryService.METRIC_QUERY_SERVICE_NAME; ActorRef jobManagerQueryService = actorSystem.actorFor(queryServicePath); queryMetrics(jobManagerQueryService); //查询jobManager的Metrics /** * We first request the list of all registered task managers from the job manager, and then * request the respective metric dump from each task manager. * * All stored metrics that do not belong to a registered task manager will be removed. */ Future<Object> registeredTaskManagersFuture = jobManager.ask(JobManagerMessages.getRequestRegisteredTaskManagers(), timeout); //查询所有taskManager registeredTaskManagersFuture .onSuccess(new OnSuccess<Object>() { @Override public void onSuccess(Object result) throws Throwable { Iterable<Instance> taskManagers = ((JobManagerMessages.RegisteredTaskManagers) result).asJavaIterable(); List<String> activeTaskManagers = new ArrayList<>(); for (Instance taskManager : taskManagers) { //遍历taskManager activeTaskManagers.add(taskManager.getId().toString()); String taskManagerPath = taskManager.getTaskManagerGateway().getAddress(); String queryServicePath = taskManagerPath.substring(0, taskManagerPath.lastIndexOf('/') + 1) + MetricQueryService.METRIC_QUERY_SERVICE_NAME + "_" + taskManager.getTaskManagerID().getResourceIdString(); ActorRef taskManagerQueryService = actorSystem.actorFor(queryServicePath); queryMetrics(taskManagerQueryService); //查询每个taskMananger的metrics } synchronized (metrics) { // remove all metrics belonging to unregistered task managers metrics.taskManagers.keySet().retainAll(activeTaskManagers); //删除所有的未注册的TaskManager } } }, ctx); logErrorOnFailure(registeredTaskManagersFuture, "Fetchin list of registered TaskManagers failed."); } } catch (Exception e) { LOG.warn("Exception while fetching metrics.", e); } }
queryMetrics
/** * Requests a metric dump from the given actor. * * @param actor ActorRef to request the dump from */ private void queryMetrics(ActorRef actor) { Future<Object> metricQueryFuture = new BasicGateway(actor).ask(MetricQueryService.getCreateDump(), timeout); //获取metrics dump metricQueryFuture .onSuccess(new OnSuccess<Object>() { @Override public void onSuccess(Object result) throws Throwable { addMetrics(result); } }, ctx); logErrorOnFailure(metricQueryFuture, "Fetching metrics failed."); } private void addMetrics(Object result) throws IOException { byte[] data = (byte[]) result; List<MetricDump> dumpedMetrics = deserializer.deserialize(data); for (MetricDump metric : dumpedMetrics) { metrics.add(metric); //把metrics dump加入metrics store } }
MetricStore
用嵌套的hashmap来存储metrics,瞬时值
final JobManagerMetricStore jobManager = new JobManagerMetricStore(); final Map<String, TaskManagerMetricStore> taskManagers = new HashMap<>(); final Map<String, JobMetricStore> jobs = new HashMap<>();
public static class JobManagerMetricStore extends ComponentMetricStore { } private static abstract class ComponentMetricStore { public final Map<String, String> metrics = new HashMap<>(); //store就是一个map public String getMetric(String name, String defaultValue) { String value = this.metrics.get(name); return value != null ? value : defaultValue; } }
MetricQueryService
public class MetricQueryService extends UntypedActor { private static final Logger LOG = LoggerFactory.getLogger(MetricQueryService.class); public static final String METRIC_QUERY_SERVICE_NAME = "MetricQueryService"; private static final CharacterFilter FILTER = new CharacterFilter() { @Override public String filterCharacters(String input) { return replaceInvalidChars(input); } }; private final MetricDumpSerializer serializer = new MetricDumpSerializer(); private final Map<Gauge<?>, Tuple2<QueryScopeInfo, String>> gauges = new HashMap<>(); private final Map<Counter, Tuple2<QueryScopeInfo, String>> counters = new HashMap<>(); private final Map<Histogram, Tuple2<QueryScopeInfo, String>> histograms = new HashMap<>(); private final Map<Meter, Tuple2<QueryScopeInfo, String>> meters = new HashMap<>();
收到CreateDump请求,
} else if (message instanceof CreateDump) { byte[] dump = serializer.serialize(counters, gauges, histograms, meters); getSender().tell(dump, getSelf());
Start
/** * Starts the MetricQueryService actor in the given actor system. * * @param actorSystem The actor system running the MetricQueryService * @param resourceID resource ID to disambiguate the actor name * @return actor reference to the MetricQueryService */ public static ActorRef startMetricQueryService(ActorSystem actorSystem, ResourceID resourceID) { String actorName = resourceID == null ? METRIC_QUERY_SERVICE_NAME : METRIC_QUERY_SERVICE_NAME + "_" + resourceID.getResourceIdString(); return actorSystem.actorOf(Props.create(MetricQueryService.class), actorName); }
在MetricRegistry中把metrics注册到QueryService中,
if (queryService != null) { MetricQueryService.notifyOfAddedMetric(queryService, metric, metricName, group); }
采集点
numRecordsIn
StreamInputProcessor –> processInput
@SuppressWarnings("SynchronizationOnLocalVariableOrMethodParameter") public boolean processInput(OneInputStreamOperator<IN, ?> streamOperator, final Object lock) throws Exception { if (numRecordsIn == null) { numRecordsIn = ((OperatorMetricGroup) streamOperator.getMetricGroup()).getIOMetricGroup().getNumRecordsInCounter(); } //...... // now we can do the actual processing StreamRecord<IN> record = recordOrMark.asRecord(); synchronized (lock) { numRecordsIn.inc(); //执行processElement前加一 streamOperator.setKeyContextElement1(record); streamOperator.processElement(record); } return true;
如果是chaining,
ChainingOutput
private static class ChainingOutput<T> implements Output<StreamRecord<T>> { protected final OneInputStreamOperator<T, ?> operator; protected final Counter numRecordsIn; public ChainingOutput(OneInputStreamOperator<T, ?> operator) { this.operator = operator; this.numRecordsIn = ((OperatorMetricGroup) operator.getMetricGroup()).getIOMetricGroup().getNumRecordsInCounter(); //初始化 } @Override public void collect(StreamRecord<T> record) { try { numRecordsIn.inc(); //对于chain,在output时调用processElement operator.setKeyContextElement1(record); operator.processElement(record); } catch (Exception e) { throw new ExceptionInChainedOperatorException(e); } }
numRecordsOut
在AbstractStreamOperator初始化时,
生成CountingOutput
@Override public void setup(StreamTask<?, ?> containingTask, StreamConfig config, Output<StreamRecord<OUT>> output) { this.container = containingTask; this.config = config; this.metrics = container.getEnvironment().getMetricGroup().addOperator(config.getOperatorName()); this.output = new CountingOutput(output, ((OperatorMetricGroup) this.metrics).getIOMetricGroup().getNumRecordsOutCounter()); //生成CountingOutput
这个output,
在processWatermark,processElement中会用于emit数据
output.emitWatermark(mark);
public class CountingOutput implements Output<StreamRecord<OUT>> { private final Output<StreamRecord<OUT>> output; private final Counter numRecordsOut; public CountingOutput(Output<StreamRecord<OUT>> output, Counter counter) { this.output = output; this.numRecordsOut = counter; } @Override public void emitWatermark(Watermark mark) { output.emitWatermark(mark); } @Override public void emitLatencyMarker(LatencyMarker latencyMarker) { output.emitLatencyMarker(latencyMarker); } @Override public void collect(StreamRecord<OUT> record) { numRecordsOut.inc(); //发出的时候,inc numRecordsOut output.collect(record); } @Override public void close() { output.close(); } }
注意numRecordsOut和numRecordsIn,除了会统计operator级别的,还会统计task级别的,逻辑在
AbstractStreamOperator
public void setup(StreamTask<?, ?> containingTask, StreamConfig config, Output<StreamRecord<OUT>> output) { this.container = containingTask; this.config = config; this.metrics = container.getEnvironment().getMetricGroup().addOperator(config.getOperatorName()); this.output = new CountingOutput(output, ((OperatorMetricGroup) this.metrics).getIOMetricGroup().getNumRecordsOutCounter()); if (config.isChainStart()) { ((OperatorMetricGroup) this.metrics).getIOMetricGroup().reuseInputMetricsForTask(); } if (config.isChainEnd()) { ((OperatorMetricGroup) this.metrics).getIOMetricGroup().reuseOutputMetricsForTask(); }
OperatorIOMetricGroup
public void reuseInputMetricsForTask() { TaskIOMetricGroup taskIO = parentMetricGroup.parent().getIOMetricGroup(); taskIO.reuseRecordsInputCounter(this.numRecordsIn); } public void reuseOutputMetricsForTask() { TaskIOMetricGroup taskIO = parentMetricGroup.parent().getIOMetricGroup(); taskIO.reuseRecordsOutputCounter(this.numRecordsOut); }
可以看到,会将ChainHead的numRecordsIn,set到task的TaskIOMetricGroup
而将ChainEnd的numRecordsOut,set到task的TaskIOMetricGroup
看起来很合理
numRecordInPerSecond,numRecordsOutPerSecond
在OperatorIOMetricGroup
public OperatorIOMetricGroup(OperatorMetricGroup parentMetricGroup) { super(parentMetricGroup); numRecordsIn = parentMetricGroup.counter(MetricNames.IO_NUM_RECORDS_IN); numRecordsOut = parentMetricGroup.counter(MetricNames.IO_NUM_RECORDS_OUT); numRecordsInRate = parentMetricGroup.meter(MetricNames.IO_NUM_RECORDS_IN_RATE, new MeterView(numRecordsIn, 60)); numRecordsOutRate = parentMetricGroup.meter(MetricNames.IO_NUM_RECORDS_OUT_RATE, new MeterView(numRecordsOut, 60)); }
可以看到numRecordsInRate和numRecordsOutRate,只是numRecordsIn和numRecordsOut的MeterView
public class MeterView implements Meter, View { /** The underlying counter maintaining the count */ private final Counter counter; /** The time-span over which the average is calculated */ private final int timeSpanInSeconds; /** Circular array containing the history of values */ private final long[] values; /** The index in the array for the current time */ private int time = 0; /** The last rate we computed */ private double currentRate = 0; public MeterView(Counter counter, int timeSpanInSeconds) { this.counter = counter; this.timeSpanInSeconds = timeSpanInSeconds - (timeSpanInSeconds % UPDATE_INTERVAL_SECONDS); //timeSpanInSeconds需要是UPDATE_INTERVAL_SECONDS(5)的倍数, this.values = new long[this.timeSpanInSeconds / UPDATE_INTERVAL_SECONDS + 1]; //比如timeSpanInSeconds为60,那么就需要保存12个value } @Override public void markEvent() { this.counter.inc(); } @Override public void markEvent(long n) { this.counter.inc(n); } @Override public long getCount() { return counter.getCount(); } @Override public double getRate() { //获取平均值 return currentRate; } @Override public void update() { //会被以UPDATE_INTERVAL_SECONDS为间隔调用 time = (time + 1) % values.length; values[time] = counter.getCount(); currentRate = ((double) (values[time] - values[(time + 1) % values.length]) / timeSpanInSeconds); //values保存了timeSpanInSeconds时间段的counter的变化过程,所以用最新的减最老的,再除以timeSpanInSeconds } }
这个实现真是tricky,不好的设计
在MetricRegistry中,会创建
ViewUpdater
public void register(Metric metric, String metricName, AbstractMetricGroup group) { try { if (reporters != null) { for (int i = 0; i < reporters.size(); i++) { MetricReporter reporter = reporters.get(i); if (reporter != null) { FrontMetricGroup front = new FrontMetricGroup<AbstractMetricGroup<?>>(i, group); reporter.notifyOfAddedMetric(metric, metricName, front); } } } if (queryService != null) { MetricQueryService.notifyOfAddedMetric(queryService, metric, metricName, group); } if (metric instanceof View) { if (viewUpdater == null) { viewUpdater = new ViewUpdater(executor); } viewUpdater.notifyOfAddedView((View) metric); } } catch (Exception e) { LOG.error("Error while registering metric.", e); } }
并且在register metrics的时候,除了注册到reporter,MetricQueryService
如果是view的子类还要,注册到ViewUpdater
public ViewUpdater(ScheduledExecutorService executor) { executor.scheduleWithFixedDelay(new ViewUpdaterTask(lock, toAdd, toRemove), 5, UPDATE_INTERVAL_SECONDS, TimeUnit.SECONDS); }
ViewUpdater会定期执行ViewUpdaterTask,task中就会调用view的update
numBytesInLocal, numBytesInRemote
在RemoteInputChannel和LocalInputChannel中,
public LocalInputChannel( SingleInputGate inputGate, int channelIndex, ResultPartitionID partitionId, ResultPartitionManager partitionManager, TaskEventDispatcher taskEventDispatcher, int initialBackoff, int maxBackoff, TaskIOMetricGroup metrics) { super(inputGate, channelIndex, partitionId, initialBackoff, maxBackoff, metrics.getNumBytesInLocalCounter()); //metrics.getNumBytesInLocalCounter() public RemoteInputChannel( SingleInputGate inputGate, int channelIndex, ResultPartitionID partitionId, ConnectionID connectionId, ConnectionManager connectionManager, int initialBackOff, int maxBackoff, TaskIOMetricGroup metrics) { super(inputGate, channelIndex, partitionId, initialBackOff, maxBackoff, metrics.getNumBytesInRemoteCounter()); // metrics.getNumBytesInRemoteCounter()
并且都会在
BufferAndAvailability getNextBuffer()
会调用,
numBytesIn.inc(next.getSize());
numBytesOut
RecordWriter
public class RecordWriter<T extends IOReadableWritable> { private Counter numBytesOut = new SimpleCounter(); public void emit(T record) throws IOException, InterruptedException { for (int targetChannel : channelSelector.selectChannels(record, numChannels)) { sendToTarget(record, targetChannel); } } private void sendToTarget(T record, int targetChannel) throws IOException, InterruptedException { RecordSerializer<T> serializer = serializers[targetChannel]; synchronized (serializer) { SerializationResult result = serializer.addRecord(record); while (result.isFullBuffer()) { Buffer buffer = serializer.getCurrentBuffer(); if (buffer != null) { numBytesOut.inc(buffer.getSize()); //计数numBytesOut writeAndClearBuffer(buffer, targetChannel, serializer); // If this was a full record, we are done. Not breaking // out of the loop at this point will lead to another // buffer request before breaking out (that would not be // a problem per se, but it can lead to stalls in the // pipeline). if (result.isFullRecord()) { break; } } else { buffer = targetPartition.getBufferProvider().requestBufferBlocking(); result = serializer.setNextBuffer(buffer); } } } }
RecordWriterOutput.collect –> StreamRecordWriter.emit –> RecordWriter.emit
inputQueueLength, outputQueueLength, inPoolUsage, outPoolUsage
TaskIOMetricGroup
/** * Initialize Buffer Metrics for a task */ public void initializeBufferMetrics(Task task) { final MetricGroup buffers = addGroup("buffers"); buffers.gauge("inputQueueLength", new InputBuffersGauge(task)); buffers.gauge("outputQueueLength", new OutputBuffersGauge(task)); buffers.gauge("inPoolUsage", new InputBufferPoolUsageGauge(task)); buffers.gauge("outPoolUsage", new OutputBufferPoolUsageGauge(task)); }
inputQueueLength
for (SingleInputGate inputGate : task.getAllInputGates()) { totalBuffers += inputGate.getNumberOfQueuedBuffers(); }
inputGate.getNumberOfQueuedBuffers
for (InputChannel channel : inputChannels.values()) { if (channel instanceof RemoteInputChannel) { // 只统计RemoteInputChannel totalBuffers += ((RemoteInputChannel) channel).getNumberOfQueuedBuffers(); } }
getNumberOfQueuedBuffers
/** * The received buffers. Received buffers are enqueued by the network I/O thread and the queue * is consumed by the receiving task thread. */ private final Queue<Buffer> receivedBuffers = new ArrayDeque<>(); public int getNumberOfQueuedBuffers() { synchronized (receivedBuffers) { return receivedBuffers.size(); } }
outputQueueLength
for (ResultPartition producedPartition : task.getProducedPartitions()) { totalBuffers += producedPartition.getNumberOfQueuedBuffers(); }
ResultPartition getNumberOfQueuedBuffers
for (ResultSubpartition subpartition : subpartitions) { totalBuffers += subpartition.getNumberOfQueuedBuffers(); }
SpillableSubpartition getNumberOfQueuedBuffers
class SpillableSubpartition extends ResultSubpartition { /** Buffers are kept in this queue as long as we weren't ask to release any. */ private final ArrayDeque<Buffer> buffers = new ArrayDeque<>(); @Override public int getNumberOfQueuedBuffers() { return buffers.size(); }
inputQueueLength, outputQueueLength
指标的含义是,inputchannel和resultparitition,持有的buffer个数,这些buffer被读完后会release,所以链路通畅的话,length应该会很小
inPoolUsage
int usedBuffers = 0; int bufferPoolSize = 0; for (SingleInputGate inputGate : task.getAllInputGates()) { usedBuffers += inputGate.getBufferPool().bestEffortGetNumOfUsedBuffers(); bufferPoolSize += inputGate.getBufferPool().getNumBuffers(); } if (bufferPoolSize != 0) { return ((float) usedBuffers) / bufferPoolSize; } else { return 0.0f; }
bestEffortGetNumOfUsedBuffers()
@Override public int bestEffortGetNumOfUsedBuffers() { return Math.max(0, numberOfRequestedMemorySegments - availableMemorySegments.size()); }
numberOfRequestedMemorySegments,从bufferpool申请多少
availableMemorySegments,可用的
所以相减就是使用多少
outPoolUsage
int usedBuffers = 0; int bufferPoolSize = 0; for (ResultPartition resultPartition : task.getProducedPartitions()) { usedBuffers += resultPartition.getBufferPool().bestEffortGetNumOfUsedBuffers(); bufferPoolSize += resultPartition.getBufferPool().getNumBuffers(); } if (bufferPoolSize != 0) { return ((float) usedBuffers) / bufferPoolSize; } else { return 0.0f; }
和inPoolUsage类似,也是看bufferPool的情况
所以inPoolUsage,outPoolUsage表示的是inputgate和resultpartition中bufferpool的使用情况
这个bufferpool是inputgate初始化的时候,注册到NetworkEnvironment创建的,
// Setup the buffer pool for each buffer reader final SingleInputGate[] inputGates = task.getAllInputGates(); for (SingleInputGate gate : inputGates) { BufferPool bufferPool = null; try { bufferPool = networkBufferPool.createBufferPool(gate.getNumberOfInputChannels(), false); gate.setBufferPool(bufferPool); }
可以看到默认大小是,inputchanels的size
如果pool用完了,那么inputGate和ResultPartiton就无法继续读取新的数据
latency
在AbstractStreamOperator中,
setup,
protected LatencyGauge latencyGauge;
latencyGauge = this.metrics.gauge("latency", new LatencyGauge(historySize));
注意,这里metrics是OperatorMetricGroup
this.metrics = container.getEnvironment().getMetricGroup().addOperator(config.getOperatorName());
TaskMetricGroup
public OperatorMetricGroup addOperator(String name) { OperatorMetricGroup operator = new OperatorMetricGroup(this.registry, this, name); synchronized (this) { OperatorMetricGroup previous = operators.put(name, operator); if (previous == null) { // no operator group so far return operator; } else { // already had an operator group. restore that one. operators.put(name, previous); return previous; } } }
LatencyGauge的定义,
/** * The gauge uses a HashMap internally to avoid classloading issues when accessing * the values using JMX. */ protected static class LatencyGauge implements Gauge<Map<String, HashMap<String, Double>>> { //LatencySourceDescriptor,包含vertexID和subtaskIndex //DescriptiveStatistics,统计模块 private final Map<LatencySourceDescriptor, DescriptiveStatistics> latencyStats = new HashMap<>(); private final int historySize; LatencyGauge(int historySize) { this.historySize = historySize; } public void reportLatency(LatencyMarker marker, boolean isSink) { LatencySourceDescriptor sourceDescriptor = LatencySourceDescriptor.of(marker, !isSink); DescriptiveStatistics sourceStats = latencyStats.get(sourceDescriptor); if (sourceStats == null) { //初始化DescriptiveStatistics // 512 element window (4 kb) sourceStats = new DescriptiveStatistics(this.historySize); latencyStats.put(sourceDescriptor, sourceStats); } long now = System.currentTimeMillis(); sourceStats.addValue(now - marker.getMarkedTime()); //当前时间和source发出时时间差值作为延迟 } @Override public Map<String, HashMap<String, Double>> getValue() { while (true) { try { Map<String, HashMap<String, Double>> ret = new HashMap<>(); for (Map.Entry<LatencySourceDescriptor, DescriptiveStatistics> source : latencyStats.entrySet()) { HashMap<String, Double> sourceStatistics = new HashMap<>(6); sourceStatistics.put("max", source.getValue().getMax()); sourceStatistics.put("mean", source.getValue().getMean()); sourceStatistics.put("min", source.getValue().getMin()); sourceStatistics.put("p50", source.getValue().getPercentile(50)); sourceStatistics.put("p95", source.getValue().getPercentile(95)); sourceStatistics.put("p99", source.getValue().getPercentile(99)); ret.put(source.getKey().toString(), sourceStatistics); } return ret; // Concurrent access onto the "latencyStats" map could cause // ConcurrentModificationExceptions. To avoid unnecessary blocking // of the reportLatency() method, we retry this operation until // it succeeds. } catch(ConcurrentModificationException ignore) { LOG.debug("Unable to report latency statistics", ignore); } } } }
这个Gauge.getValue返回的是个map,太奇葩
latencyStats里面有多少entry,取决于有多少source,以及每个source有几个并发
因为他要记录,每个source operator的某个subtask,到当前operator的该subtask的延迟
public static LatencySourceDescriptor of(LatencyMarker marker, boolean ignoreSubtaskIndex) { if (ignoreSubtaskIndex) { return new LatencySourceDescriptor(marker.getVertexID(), -1); } else { return new LatencySourceDescriptor(marker.getVertexID(), marker.getSubtaskIndex()); } }
LatencySourceDescriptor构造函数,由vertexid,和subtaskIndex组成
如果忽略subtaskindex,置为-1
流程
StreamSource
定义LatencyMarksEmitter
private static class LatencyMarksEmitter<OUT> { private final ScheduledFuture<?> latencyMarkTimer; public LatencyMarksEmitter( final ProcessingTimeService processingTimeService, final Output<StreamRecord<OUT>> output, long latencyTrackingInterval, final int vertexID, final int subtaskIndex) { latencyMarkTimer = processingTimeService.scheduleAtFixedRate( //根据processingTime定期发送latencyMarker new ProcessingTimeCallback() { @Override public void onProcessingTime(long timestamp) throws Exception { try { // ProcessingTimeService callbacks are executed under the checkpointing lock output.emitLatencyMarker(new LatencyMarker(timestamp, vertexID, subtaskIndex)); //emitLatencyMarker,以processTime为初始时间 } catch (Throwable t) { // we catch the Throwables here so that we don't trigger the processing // timer services async exception handler LOG.warn("Error while emitting latency marker.", t); } } }, 0L, latencyTrackingInterval); }
source.run,当isLatencyTrackingEnabled,schedule latency marker
public void run(final Object lockingObject, final Output<StreamRecord<OUT>> collector) throws Exception { final TimeCharacteristic timeCharacteristic = getOperatorConfig().getTimeCharacteristic(); LatencyMarksEmitter latencyEmitter = null; if(getExecutionConfig().isLatencyTrackingEnabled()) { latencyEmitter = new LatencyMarksEmitter<>( getProcessingTimeService(), collector, getExecutionConfig().getLatencyTrackingInterval(), getOperatorConfig().getVertexID(), getRuntimeContext().getIndexOfThisSubtask()); }
StreamInputProcessor –> processInput
如果是isLatencyMarker
else if(recordOrMark.isLatencyMarker()) { // handle latency marker synchronized (lock) { streamOperator.processLatencyMarker(recordOrMark.asLatencyMarker()); } continue; }
对于,chaining, ChainingOutput
private static class ChainingOutput<T> implements Output<StreamRecord<T>> { protected final OneInputStreamOperator<T, ?> operator; protected final Counter numRecordsIn; @Override public void emitLatencyMarker(LatencyMarker latencyMarker) { try { operator.processLatencyMarker(latencyMarker); } catch (Exception e) { throw new ExceptionInChainedOperatorException(e); } }
AbstractStreamOperator
public void processLatencyMarker(LatencyMarker latencyMarker) throws Exception { reportOrForwardLatencyMarker(latencyMarker); }
protected void reportOrForwardLatencyMarker(LatencyMarker marker) { // all operators are tracking latencies this.latencyGauge.reportLatency(marker, false); // everything except sinks forwards latency markers this.output.emitLatencyMarker(marker); }
调用到latencyGauge.reportLatency,逻辑如上
后续继续emitLatencyMarker
currentLowWatermark, checkpointAlignmentTime
OneInputStreamTask
@Override public void init() throws Exception { if (numberOfInputs > 0) { InputGate[] inputGates = getEnvironment().getAllInputGates(); inputProcessor = new StreamInputProcessor<IN>( inputGates, inSerializer, this, configuration.getCheckpointMode(), getEnvironment().getIOManager(), getEnvironment().getTaskManagerInfo().getConfiguration()); // make sure that stream tasks report their I/O statistics inputProcessor.setMetricGroup(getEnvironment().getMetricGroup().getIOMetricGroup()); } }
StreamInputProcessor
public void setMetricGroup(TaskIOMetricGroup metrics) { metrics.gauge("currentLowWatermark", new Gauge<Long>() { @Override public Long getValue() { return lastEmittedWatermark; } }); metrics.gauge("checkpointAlignmentTime", new Gauge<Long>() { @Override public Long getValue() { return barrierHandler.getAlignmentDurationNanos(); } }); }
currentLowWatermark,即lastEmittedWatermark
默认值是,
lastEmittedWatermark = Long.MIN_VALUE;
所以如果没有assignTimestampsAndWatermarks,那么currentLowWatermark会是一个极大的负数
public boolean processInput(OneInputStreamOperator<IN, ?> streamOperator, final Object lock) throws Exception { while (true) { if (currentRecordDeserializer != null) { if (result.isFullRecord()) { StreamElement recordOrMark = deserializationDelegate.getInstance(); if (recordOrMark.isWatermark()) { long watermarkMillis = recordOrMark.asWatermark().getTimestamp(); if (watermarkMillis > watermarks[currentChannel]) { // 更新每个channel对应的waterMark watermarks[currentChannel] = watermarkMillis; long newMinWatermark = Long.MAX_VALUE; for (long watermark: watermarks) { // 找出所有channel最小的watermark,以最小的为准 newMinWatermark = Math.min(watermark, newMinWatermark); } if (newMinWatermark > lastEmittedWatermark) { lastEmittedWatermark = newMinWatermark; // 将最小的watermark设为lastEmittedWatermark synchronized (lock) { streamOperator.processWatermark(new Watermark(lastEmittedWatermark)); } } } continue; }
checkpointAlignmentTime
barrierHandler.getAlignmentDurationNanos
@Override public long getAlignmentDurationNanos() { long start = this.startOfAlignmentTimestamp; if (start <= 0) { return latestAlignmentDurationNanos; } else { return System.nanoTime() - start; } }
startOfAlignmentTimestamp是在这次checkpoint开始的时候打的时间戳,即beginNewAlignment
private void beginNewAlignment(long checkpointId, int channelIndex) throws IOException { currentCheckpointId = checkpointId; onBarrier(channelIndex); startOfAlignmentTimestamp = System.nanoTime(); }
beginNewAlignment在
processBarrier中被调用,
if (numBarriersReceived > 0) { // this is only true if some alignment is already progress and was not canceled if (barrierId == currentCheckpointId) { // regular case onBarrier(channelIndex); } else if (barrierId > currentCheckpointId) {// 当收到新的checkpointid,所以老的id已经过期,需要产生新的checkpoint // we did not complete the current checkpoint, another started before LOG.warn("Received checkpoint barrier for checkpoint {} before completing current checkpoint {}. " + "Skipping current checkpoint.", barrierId, currentCheckpointId); // let the task know we are not completing this notifyAbort(currentCheckpointId, new CheckpointDeclineSubsumedException(barrierId)); // abort the current checkpoint releaseBlocksAndResetBarriers(); // begin a the new checkpoint beginNewAlignment(barrierId, channelIndex); //标识checkpoint开始 } else { // ignore trailing barrier from an earlier checkpoint (obsolete now) return; } } else if (barrierId > currentCheckpointId) { //新的checkpoint开始 // first barrier of a new checkpoint beginNewAlignment(barrierId, channelIndex); //标识checkpoint开始 }
所以checkpointAlignmentTime的意思是,当前的checkpoint已经等待多久,因为要等到所有input channel的barrier,checkpoint才会触发
单位是纳秒,所以billion级别代表秒
如果比较大,说明各个并发之前的延迟差异较大,或延迟较高