Flink – metrics V1.2 - fxjwind

Flink – metrics V1.2

WebRuntimeMonitor

.GET("/jobs/:jobid/vertices/:vertexid/metrics", handler(new JobVertexMetricsHandler(metricFetcher)))

.GET("/jobs/:jobid/metrics", handler(new JobMetricsHandler(metricFetcher)))

.GET("/taskmanagers/:" + TaskManagersHandler.TASK_MANAGER_ID_KEY + "/metrics", handler(new TaskManagerMetricsHandler(metricFetcher)))

.GET("/jobmanager/metrics", handler(new JobManagerMetricsHandler(metricFetcher)))

JobVertexMetricsHandler

AbstractMetricsHandler

MetricFetcher

核心就是fetchMetrics函数，会从JobManager获取数据，

private void fetchMetrics() {
    try {
        Option<scala.Tuple2<ActorGateway, Integer>> jobManagerGatewayAndWebPort = retriever.getJobManagerGatewayAndWebPort();
        if (jobManagerGatewayAndWebPort.isDefined()) {
            ActorGateway jobManager = jobManagerGatewayAndWebPort.get()._1(); //得到JobManager的ActorGateway

            /**
             * Remove all metrics that belong to a job that is not running and no longer archived.
             */
            Future<Object> jobDetailsFuture = jobManager.ask(new RequestJobDetails(true, true), timeout); //生成request获取job状态
            jobDetailsFuture
                .onSuccess(new OnSuccess<Object>() {
                    @Override
                    public void onSuccess(Object result) throws Throwable {
                        MultipleJobsDetails details = (MultipleJobsDetails) result;
                        ArrayList<String> toRetain = new ArrayList<>();
                        for (JobDetails job : details.getRunningJobs()) {
                            toRetain.add(job.getJobId().toString());
                        }
                        for (JobDetails job : details.getFinishedJobs()) {
                            toRetain.add(job.getJobId().toString());
                        }
                        synchronized (metrics) {
                            metrics.jobs.keySet().retainAll(toRetain); //只保留Runing和Finished的job，即不正常的都删掉
                        }
                    }
                }, ctx);
            logErrorOnFailure(jobDetailsFuture, "Fetching of JobDetails failed.");

            String jobManagerPath = jobManager.path();
            String queryServicePath = jobManagerPath.substring(0, jobManagerPath.lastIndexOf('/') + 1) + MetricQueryService.METRIC_QUERY_SERVICE_NAME;
            ActorRef jobManagerQueryService = actorSystem.actorFor(queryServicePath);

            queryMetrics(jobManagerQueryService); //查询jobManager的Metrics

            /**
             * We first request the list of all registered task managers from the job manager, and then
             * request the respective metric dump from each task manager.
             *
             * All stored metrics that do not belong to a registered task manager will be removed.
             */
            Future<Object> registeredTaskManagersFuture = jobManager.ask(JobManagerMessages.getRequestRegisteredTaskManagers(), timeout); //查询所有taskManager
            registeredTaskManagersFuture
                .onSuccess(new OnSuccess<Object>() {
                    @Override
                    public void onSuccess(Object result) throws Throwable {
                        Iterable<Instance> taskManagers = ((JobManagerMessages.RegisteredTaskManagers) result).asJavaIterable();
                        List<String> activeTaskManagers = new ArrayList<>();
                        for (Instance taskManager : taskManagers) { //遍历taskManager
                            activeTaskManagers.add(taskManager.getId().toString());

                            String taskManagerPath = taskManager.getTaskManagerGateway().getAddress();
                            String queryServicePath = taskManagerPath.substring(0, taskManagerPath.lastIndexOf('/') + 1) + MetricQueryService.METRIC_QUERY_SERVICE_NAME + "_" + taskManager.getTaskManagerID().getResourceIdString();
                            ActorRef taskManagerQueryService = actorSystem.actorFor(queryServicePath);

                            queryMetrics(taskManagerQueryService); //查询每个taskMananger的metrics
                        }
                        synchronized (metrics) { // remove all metrics belonging to unregistered task managers
                            metrics.taskManagers.keySet().retainAll(activeTaskManagers); //删除所有的未注册的TaskManager
                        }
                    }
                }, ctx);
            logErrorOnFailure(registeredTaskManagersFuture, "Fetchin list of registered TaskManagers failed.");
        }
    } catch (Exception e) {
        LOG.warn("Exception while fetching metrics.", e);
    }
}

queryMetrics

/**
 * Requests a metric dump from the given actor.
 *
 * @param actor ActorRef to request the dump from
 */
private void queryMetrics(ActorRef actor) {
    Future<Object> metricQueryFuture = new BasicGateway(actor).ask(MetricQueryService.getCreateDump(), timeout); //获取metrics dump
    metricQueryFuture
        .onSuccess(new OnSuccess<Object>() {
            @Override
            public void onSuccess(Object result) throws Throwable {
                addMetrics(result);
            }
        }, ctx);
    logErrorOnFailure(metricQueryFuture, "Fetching metrics failed.");
}

private void addMetrics(Object result) throws IOException {
    byte[] data = (byte[]) result;
    List<MetricDump> dumpedMetrics = deserializer.deserialize(data);
    for (MetricDump metric : dumpedMetrics) {
        metrics.add(metric); //把metrics dump加入metrics store
    }
}

MetricStore

用嵌套的hashmap来存储metrics，瞬时值

final JobManagerMetricStore jobManager = new JobManagerMetricStore();
final Map<String, TaskManagerMetricStore> taskManagers = new HashMap<>();
final Map<String, JobMetricStore> jobs = new HashMap<>();

public static class JobManagerMetricStore extends ComponentMetricStore {
}

private static abstract class ComponentMetricStore {
    public final Map<String, String> metrics = new HashMap<>(); //store就是一个map

    public String getMetric(String name, String defaultValue) {
        String value = this.metrics.get(name);
        return value != null
            ? value
            : defaultValue;
    }
}

MetricQueryService

public class MetricQueryService extends UntypedActor {
    private static final Logger LOG = LoggerFactory.getLogger(MetricQueryService.class);

    public static final String METRIC_QUERY_SERVICE_NAME = "MetricQueryService";

    private static final CharacterFilter FILTER = new CharacterFilter() {
        @Override
        public String filterCharacters(String input) {
            return replaceInvalidChars(input);
        }
    };

    private final MetricDumpSerializer serializer = new MetricDumpSerializer();

    private final Map<Gauge<?>, Tuple2<QueryScopeInfo, String>> gauges = new HashMap<>();
    private final Map<Counter, Tuple2<QueryScopeInfo, String>> counters = new HashMap<>();
    private final Map<Histogram, Tuple2<QueryScopeInfo, String>> histograms = new HashMap<>();
    private final Map<Meter, Tuple2<QueryScopeInfo, String>> meters = new HashMap<>();

收到CreateDump请求，

} else if (message instanceof CreateDump) {
    byte[] dump = serializer.serialize(counters, gauges, histograms, meters);
    getSender().tell(dump, getSelf());

Start

   /**
     * Starts the MetricQueryService actor in the given actor system.
     *
     * @param actorSystem The actor system running the MetricQueryService
     * @param resourceID resource ID to disambiguate the actor name
     * @return actor reference to the MetricQueryService
     */
    public static ActorRef startMetricQueryService(ActorSystem actorSystem, ResourceID resourceID) {
        String actorName = resourceID == null
            ? METRIC_QUERY_SERVICE_NAME
            : METRIC_QUERY_SERVICE_NAME + "_" + resourceID.getResourceIdString();
        return actorSystem.actorOf(Props.create(MetricQueryService.class), actorName);
    }

在MetricRegistry中把metrics注册到QueryService中，

if (queryService != null) {
    MetricQueryService.notifyOfAddedMetric(queryService, metric, metricName, group);
}

采集点

numRecordsIn

StreamInputProcessor –> processInput

    @SuppressWarnings("SynchronizationOnLocalVariableOrMethodParameter")
    public boolean processInput(OneInputStreamOperator<IN, ?> streamOperator, final Object lock) throws Exception {
        if (numRecordsIn == null) {
            numRecordsIn = ((OperatorMetricGroup) streamOperator.getMetricGroup()).getIOMetricGroup().getNumRecordsInCounter();
        }
        //......
        
                        
        // now we can do the actual processing
        StreamRecord<IN> record = recordOrMark.asRecord();
        synchronized (lock) {
            numRecordsIn.inc(); //执行processElement前加一
            streamOperator.setKeyContextElement1(record);
            streamOperator.processElement(record);
        }
        return true;

如果是chaining，

ChainingOutput

private static class ChainingOutput<T> implements Output<StreamRecord<T>> {
    
    protected final OneInputStreamOperator<T, ?> operator;
    protected final Counter numRecordsIn;

    public ChainingOutput(OneInputStreamOperator<T, ?> operator) {
        this.operator = operator;
        this.numRecordsIn = ((OperatorMetricGroup) operator.getMetricGroup()).getIOMetricGroup().getNumRecordsInCounter(); //初始化
    }

    @Override
    public void collect(StreamRecord<T> record) {
        try {
            numRecordsIn.inc(); //对于chain，在output时调用processElement
            operator.setKeyContextElement1(record);
            operator.processElement(record);
        }
        catch (Exception e) {
            throw new ExceptionInChainedOperatorException(e);
        }
    }

numRecordsOut

在AbstractStreamOperator初始化时，

生成CountingOutput

    @Override
    public void setup(StreamTask<?, ?> containingTask, StreamConfig config, Output<StreamRecord<OUT>> output) {
        this.container = containingTask;
        this.config = config;
        
        this.metrics = container.getEnvironment().getMetricGroup().addOperator(config.getOperatorName());
        this.output = new CountingOutput(output, ((OperatorMetricGroup) this.metrics).getIOMetricGroup().getNumRecordsOutCounter()); //生成CountingOutput

这个output，

在processWatermark，processElement中会用于emit数据

output.emitWatermark(mark);

    public class CountingOutput implements Output<StreamRecord<OUT>> {
        private final Output<StreamRecord<OUT>> output;
        private final Counter numRecordsOut;

        public CountingOutput(Output<StreamRecord<OUT>> output, Counter counter) {
            this.output = output;
            this.numRecordsOut = counter;
        }

        @Override
        public void emitWatermark(Watermark mark) {
            output.emitWatermark(mark);
        }

        @Override
        public void emitLatencyMarker(LatencyMarker latencyMarker) {
            output.emitLatencyMarker(latencyMarker);
        }

        @Override
        public void collect(StreamRecord<OUT> record) {
            numRecordsOut.inc(); //发出的时候，inc numRecordsOut
            output.collect(record);
        }

        @Override
        public void close() {
            output.close();
        }
    }

注意numRecordsOut和numRecordsIn，除了会统计operator级别的，还会统计task级别的，逻辑在

AbstractStreamOperator

    public void setup(StreamTask<?, ?> containingTask, StreamConfig config, Output<StreamRecord<OUT>> output) {
        this.container = containingTask;
        this.config = config;
        
        this.metrics = container.getEnvironment().getMetricGroup().addOperator(config.getOperatorName());
        this.output = new CountingOutput(output, ((OperatorMetricGroup) this.metrics).getIOMetricGroup().getNumRecordsOutCounter());
        if (config.isChainStart()) {
            ((OperatorMetricGroup) this.metrics).getIOMetricGroup().reuseInputMetricsForTask();
        }
        if (config.isChainEnd()) {
            ((OperatorMetricGroup) this.metrics).getIOMetricGroup().reuseOutputMetricsForTask();
        }

OperatorIOMetricGroup

    public void reuseInputMetricsForTask() {
        TaskIOMetricGroup taskIO = parentMetricGroup.parent().getIOMetricGroup();
        taskIO.reuseRecordsInputCounter(this.numRecordsIn);
        
    }

    public void reuseOutputMetricsForTask() {
        TaskIOMetricGroup taskIO = parentMetricGroup.parent().getIOMetricGroup();
        taskIO.reuseRecordsOutputCounter(this.numRecordsOut);
    }

可以看到，会将ChainHead的numRecordsIn，set到task的TaskIOMetricGroup

而将ChainEnd的numRecordsOut，set到task的TaskIOMetricGroup

看起来很合理

numRecordInPerSecond，numRecordsOutPerSecond

在OperatorIOMetricGroup

public OperatorIOMetricGroup(OperatorMetricGroup parentMetricGroup) {
        super(parentMetricGroup);
        numRecordsIn = parentMetricGroup.counter(MetricNames.IO_NUM_RECORDS_IN);
        numRecordsOut = parentMetricGroup.counter(MetricNames.IO_NUM_RECORDS_OUT);
        numRecordsInRate = parentMetricGroup.meter(MetricNames.IO_NUM_RECORDS_IN_RATE, new MeterView(numRecordsIn, 60));
        numRecordsOutRate = parentMetricGroup.meter(MetricNames.IO_NUM_RECORDS_OUT_RATE, new MeterView(numRecordsOut, 60));
    }

可以看到numRecordsInRate和numRecordsOutRate，只是numRecordsIn和numRecordsOut的MeterView

public class MeterView implements Meter, View {
    /** The underlying counter maintaining the count */
    private final Counter counter;
    /** The time-span over which the average is calculated */
    private final int timeSpanInSeconds;
    /** Circular array containing the history of values */
    private final long[] values;
    /** The index in the array for the current time */
    private int time = 0;
    /** The last rate we computed */
    private double currentRate = 0;

    public MeterView(Counter counter, int timeSpanInSeconds) {
        this.counter = counter;
        this.timeSpanInSeconds = timeSpanInSeconds - (timeSpanInSeconds % UPDATE_INTERVAL_SECONDS); //timeSpanInSeconds需要是UPDATE_INTERVAL_SECONDS(5)的倍数，
        this.values = new long[this.timeSpanInSeconds / UPDATE_INTERVAL_SECONDS + 1]; //比如timeSpanInSeconds为60，那么就需要保存12个value
    }

    @Override
    public void markEvent() {
        this.counter.inc();
    }

    @Override
    public void markEvent(long n) {
        this.counter.inc(n);
    }

    @Override
    public long getCount() {
        return counter.getCount();
    }

    @Override
    public double getRate() { //获取平均值
        return currentRate;
    }

    @Override
    public void update() { //会被以UPDATE_INTERVAL_SECONDS为间隔调用
        time = (time + 1) % values.length;
        values[time] = counter.getCount();
        currentRate =  ((double) (values[time] - values[(time + 1) % values.length]) / timeSpanInSeconds); //values保存了timeSpanInSeconds时间段的counter的变化过程，所以用最新的减最老的，再除以timeSpanInSeconds
    }
}

这个实现真是tricky，不好的设计

在MetricRegistry中，会创建

ViewUpdater

    public void register(Metric metric, String metricName, AbstractMetricGroup group) {
        try {
            if (reporters != null) {
                for (int i = 0; i < reporters.size(); i++) {
                    MetricReporter reporter = reporters.get(i);
                    if (reporter != null) {
                        FrontMetricGroup front = new FrontMetricGroup<AbstractMetricGroup<?>>(i, group);
                        reporter.notifyOfAddedMetric(metric, metricName, front);
                    }
                }
            }
            if (queryService != null) {
                MetricQueryService.notifyOfAddedMetric(queryService, metric, metricName, group);
            }
            if (metric instanceof View) {
                if (viewUpdater == null) {
                    viewUpdater = new ViewUpdater(executor);
                }
                viewUpdater.notifyOfAddedView((View) metric);
            }
        } catch (Exception e) {
            LOG.error("Error while registering metric.", e);
        }
    }

并且在register metrics的时候，除了注册到reporter，MetricQueryService

如果是view的子类还要，注册到ViewUpdater

    public ViewUpdater(ScheduledExecutorService executor) {
        executor.scheduleWithFixedDelay(new ViewUpdaterTask(lock, toAdd, toRemove), 5, UPDATE_INTERVAL_SECONDS, TimeUnit.SECONDS);
    }

ViewUpdater会定期执行ViewUpdaterTask，task中就会调用view的update

numBytesInLocal, numBytesInRemote

在RemoteInputChannel和LocalInputChannel中，

    public LocalInputChannel(
        SingleInputGate inputGate,
        int channelIndex,
        ResultPartitionID partitionId,
        ResultPartitionManager partitionManager,
        TaskEventDispatcher taskEventDispatcher,
        int initialBackoff,
        int maxBackoff,
        TaskIOMetricGroup metrics) {

        super(inputGate, channelIndex, partitionId, initialBackoff, maxBackoff, metrics.getNumBytesInLocalCounter()); //metrics.getNumBytesInLocalCounter()
        
    public RemoteInputChannel(
        SingleInputGate inputGate,
        int channelIndex,
        ResultPartitionID partitionId,
        ConnectionID connectionId,
        ConnectionManager connectionManager,
        int initialBackOff,
        int maxBackoff,
        TaskIOMetricGroup metrics) {

        super(inputGate, channelIndex, partitionId, initialBackOff, maxBackoff, metrics.getNumBytesInRemoteCounter()); // metrics.getNumBytesInRemoteCounter()

并且都会在

BufferAndAvailability getNextBuffer()

会调用，

numBytesIn.inc(next.getSize());

numBytesOut

RecordWriter

public class RecordWriter<T extends IOReadableWritable> {
    private Counter numBytesOut = new SimpleCounter();
    
    public void emit(T record) throws IOException, InterruptedException {
        for (int targetChannel : channelSelector.selectChannels(record, numChannels)) {
            sendToTarget(record, targetChannel);
        }
    }
    
    private void sendToTarget(T record, int targetChannel) throws IOException, InterruptedException {
        RecordSerializer<T> serializer = serializers[targetChannel];

        synchronized (serializer) {
            SerializationResult result = serializer.addRecord(record);

            while (result.isFullBuffer()) {
                Buffer buffer = serializer.getCurrentBuffer();

                if (buffer != null) {
                    numBytesOut.inc(buffer.getSize()); //计数numBytesOut
                    writeAndClearBuffer(buffer, targetChannel, serializer);

                    // If this was a full record, we are done. Not breaking
                    // out of the loop at this point will lead to another
                    // buffer request before breaking out (that would not be
                    // a problem per se, but it can lead to stalls in the
                    // pipeline).
                    if (result.isFullRecord()) {
                        break;
                    }
                } else {
                    buffer = targetPartition.getBufferProvider().requestBufferBlocking();
                    result = serializer.setNextBuffer(buffer);
                }
            }
        }
    }

RecordWriterOutput.collect –> StreamRecordWriter.emit –> RecordWriter.emit

inputQueueLength, outputQueueLength, inPoolUsage, outPoolUsage

TaskIOMetricGroup

   /**
     * Initialize Buffer Metrics for a task
     */
    public void initializeBufferMetrics(Task task) {
        final MetricGroup buffers = addGroup("buffers");
        buffers.gauge("inputQueueLength", new InputBuffersGauge(task));
        buffers.gauge("outputQueueLength", new OutputBuffersGauge(task));
        buffers.gauge("inPoolUsage", new InputBufferPoolUsageGauge(task));
        buffers.gauge("outPoolUsage", new OutputBufferPoolUsageGauge(task));
    }

inputQueueLength

for (SingleInputGate inputGate : task.getAllInputGates()) {
    totalBuffers += inputGate.getNumberOfQueuedBuffers();
}

inputGate.getNumberOfQueuedBuffers

for (InputChannel channel : inputChannels.values()) {
    if (channel instanceof RemoteInputChannel) { // 只统计RemoteInputChannel
        totalBuffers += ((RemoteInputChannel) channel).getNumberOfQueuedBuffers();
    }
}

getNumberOfQueuedBuffers

/**
     * The received buffers. Received buffers are enqueued by the network I/O thread and the queue
     * is consumed by the receiving task thread.
     */
    private final Queue<Buffer> receivedBuffers = new ArrayDeque<>();

    public int getNumberOfQueuedBuffers() {
        synchronized (receivedBuffers) {
            return receivedBuffers.size();
        }
    }

outputQueueLength

for (ResultPartition producedPartition : task.getProducedPartitions()) {
    totalBuffers += producedPartition.getNumberOfQueuedBuffers();
}

ResultPartition getNumberOfQueuedBuffers

for (ResultSubpartition subpartition : subpartitions) {
    totalBuffers += subpartition.getNumberOfQueuedBuffers();
}

SpillableSubpartition getNumberOfQueuedBuffers

class SpillableSubpartition extends ResultSubpartition {
    /** Buffers are kept in this queue as long as we weren't ask to release any. */
    private final ArrayDeque<Buffer> buffers = new ArrayDeque<>();
    
    @Override
    public int getNumberOfQueuedBuffers() {
        return buffers.size();
    }

inputQueueLength, outputQueueLength

指标的含义是，inputchannel和resultparitition，持有的buffer个数，这些buffer被读完后会release，所以链路通畅的话，length应该会很小

inPoolUsage

int usedBuffers = 0;
int bufferPoolSize = 0;

for (SingleInputGate inputGate : task.getAllInputGates()) {
    usedBuffers += inputGate.getBufferPool().bestEffortGetNumOfUsedBuffers();
    bufferPoolSize += inputGate.getBufferPool().getNumBuffers();
}

if (bufferPoolSize != 0) {
    return ((float) usedBuffers) / bufferPoolSize;
} else {
    return 0.0f;
}

bestEffortGetNumOfUsedBuffers()

@Override
public int bestEffortGetNumOfUsedBuffers() {
    return Math.max(0, numberOfRequestedMemorySegments - availableMemorySegments.size());
}

numberOfRequestedMemorySegments，从bufferpool申请多少
availableMemorySegments，可用的
所以相减就是使用多少

outPoolUsage

int usedBuffers = 0;
int bufferPoolSize = 0;

for (ResultPartition resultPartition : task.getProducedPartitions()) {
    usedBuffers += resultPartition.getBufferPool().bestEffortGetNumOfUsedBuffers();
    bufferPoolSize += resultPartition.getBufferPool().getNumBuffers();
}

if (bufferPoolSize != 0) {
    return ((float) usedBuffers) / bufferPoolSize;
} else {
    return 0.0f;
}

和inPoolUsage类似，也是看bufferPool的情况

所以inPoolUsage，outPoolUsage表示的是inputgate和resultpartition中bufferpool的使用情况

这个bufferpool是inputgate初始化的时候，注册到NetworkEnvironment创建的，

// Setup the buffer pool for each buffer reader
final SingleInputGate[] inputGates = task.getAllInputGates();

for (SingleInputGate gate : inputGates) {
    BufferPool bufferPool = null;

    try {
        bufferPool = networkBufferPool.createBufferPool(gate.getNumberOfInputChannels(), false);
        gate.setBufferPool(bufferPool);
    }

可以看到默认大小是，inputchanels的size

如果pool用完了，那么inputGate和ResultPartiton就无法继续读取新的数据

latency

在AbstractStreamOperator中，

setup，

protected LatencyGauge latencyGauge;

latencyGauge = this.metrics.gauge("latency", new LatencyGauge(historySize));

注意，这里metrics是OperatorMetricGroup

this.metrics = container.getEnvironment().getMetricGroup().addOperator(config.getOperatorName());

TaskMetricGroup

    public OperatorMetricGroup addOperator(String name) {
        OperatorMetricGroup operator = new OperatorMetricGroup(this.registry, this, name);

        synchronized (this) {
            OperatorMetricGroup previous = operators.put(name, operator);
            if (previous == null) {
                // no operator group so far
                return operator;
            } else {
                // already had an operator group. restore that one.
                operators.put(name, previous);
                return previous;
            }
        }
    }

LatencyGauge的定义，

/**
     * The gauge uses a HashMap internally to avoid classloading issues when accessing
     * the values using JMX.
     */
    protected static class LatencyGauge implements Gauge<Map<String, HashMap<String, Double>>> {
    
        //LatencySourceDescriptor，包含vertexID和subtaskIndex
        //DescriptiveStatistics，统计模块
        private final Map<LatencySourceDescriptor, DescriptiveStatistics> latencyStats = new HashMap<>();
        private final int historySize;

        LatencyGauge(int historySize) {
            this.historySize = historySize;
        }

        public void reportLatency(LatencyMarker marker, boolean isSink) {
            LatencySourceDescriptor sourceDescriptor = LatencySourceDescriptor.of(marker, !isSink);
            DescriptiveStatistics sourceStats = latencyStats.get(sourceDescriptor);
            if (sourceStats == null) { //初始化DescriptiveStatistics
                // 512 element window (4 kb)
                sourceStats = new DescriptiveStatistics(this.historySize);
                latencyStats.put(sourceDescriptor, sourceStats);
            }
            long now = System.currentTimeMillis();
            sourceStats.addValue(now - marker.getMarkedTime()); //当前时间和source发出时时间差值作为延迟
        }

        @Override
        public Map<String, HashMap<String, Double>> getValue() {
            while (true) {
                try {
                    Map<String, HashMap<String, Double>> ret = new HashMap<>();
                    for (Map.Entry<LatencySourceDescriptor, DescriptiveStatistics> source : latencyStats.entrySet()) {
                        HashMap<String, Double> sourceStatistics = new HashMap<>(6);
                        sourceStatistics.put("max", source.getValue().getMax());
                        sourceStatistics.put("mean", source.getValue().getMean());
                        sourceStatistics.put("min", source.getValue().getMin());
                        sourceStatistics.put("p50", source.getValue().getPercentile(50));
                        sourceStatistics.put("p95", source.getValue().getPercentile(95));
                        sourceStatistics.put("p99", source.getValue().getPercentile(99));
                        ret.put(source.getKey().toString(), sourceStatistics);
                    }
                    return ret;
                    // Concurrent access onto the "latencyStats" map could cause
                    // ConcurrentModificationExceptions. To avoid unnecessary blocking
                    // of the reportLatency() method, we retry this operation until
                    // it succeeds.
                } catch(ConcurrentModificationException ignore) {
                    LOG.debug("Unable to report latency statistics", ignore);
                }
            }
        }
    }

这个Gauge.getValue返回的是个map，太奇葩

latencyStats里面有多少entry，取决于有多少source，以及每个source有几个并发

因为他要记录，每个source operator的某个subtask，到当前operator的该subtask的延迟

        public static LatencySourceDescriptor of(LatencyMarker marker, boolean ignoreSubtaskIndex) {
            if (ignoreSubtaskIndex) {
                return new LatencySourceDescriptor(marker.getVertexID(), -1);
            } else {
                return new LatencySourceDescriptor(marker.getVertexID(), marker.getSubtaskIndex());
            }

        }

LatencySourceDescriptor构造函数，由vertexid，和subtaskIndex组成

如果忽略subtaskindex，置为-1

流程

StreamSource

定义LatencyMarksEmitter

private static class LatencyMarksEmitter<OUT> {
        private final ScheduledFuture<?> latencyMarkTimer;

        public LatencyMarksEmitter(
                final ProcessingTimeService processingTimeService,
                final Output<StreamRecord<OUT>> output,
                long latencyTrackingInterval,
                final int vertexID,
                final int subtaskIndex) {

            latencyMarkTimer = processingTimeService.scheduleAtFixedRate( //根据processingTime定期发送latencyMarker
                new ProcessingTimeCallback() {
                    @Override
                    public void onProcessingTime(long timestamp) throws Exception {
                        try {
                            // ProcessingTimeService callbacks are executed under the checkpointing lock
                            output.emitLatencyMarker(new LatencyMarker(timestamp, vertexID, subtaskIndex)); //emitLatencyMarker，以processTime为初始时间
                        } catch (Throwable t) {
                            // we catch the Throwables here so that we don't trigger the processing
                            // timer services async exception handler
                            LOG.warn("Error while emitting latency marker.", t);
                        }
                    }
                },
                0L,
                latencyTrackingInterval);
        }

source.run，当isLatencyTrackingEnabled，schedule latency marker

public void run(final Object lockingObject, final Output<StreamRecord<OUT>> collector) throws Exception {
        final TimeCharacteristic timeCharacteristic = getOperatorConfig().getTimeCharacteristic();

        LatencyMarksEmitter latencyEmitter = null;
        if(getExecutionConfig().isLatencyTrackingEnabled()) {
            latencyEmitter = new LatencyMarksEmitter<>(
                getProcessingTimeService(),
                collector,
                getExecutionConfig().getLatencyTrackingInterval(),
                getOperatorConfig().getVertexID(),
                getRuntimeContext().getIndexOfThisSubtask());
        }

StreamInputProcessor –> processInput

如果是isLatencyMarker

else if(recordOrMark.isLatencyMarker()) {
    // handle latency marker
    synchronized (lock) {
        streamOperator.processLatencyMarker(recordOrMark.asLatencyMarker());
    }
    continue;
}

对于，chaining， ChainingOutput

private static class ChainingOutput<T> implements Output<StreamRecord<T>> {
    
    protected final OneInputStreamOperator<T, ?> operator;
    protected final Counter numRecordsIn;

    @Override
    public void emitLatencyMarker(LatencyMarker latencyMarker) {
        try {
            operator.processLatencyMarker(latencyMarker);
        }
        catch (Exception e) {
            throw new ExceptionInChainedOperatorException(e);
        }
    }

AbstractStreamOperator

public void processLatencyMarker(LatencyMarker latencyMarker) throws Exception {
        reportOrForwardLatencyMarker(latencyMarker);
    }

protected void reportOrForwardLatencyMarker(LatencyMarker marker) {
        // all operators are tracking latencies
        this.latencyGauge.reportLatency(marker, false);

        // everything except sinks forwards latency markers
        this.output.emitLatencyMarker(marker);
    }

调用到latencyGauge.reportLatency，逻辑如上

后续继续emitLatencyMarker

currentLowWatermark, checkpointAlignmentTime

OneInputStreamTask

@Override
    public void init() throws Exception {
         if (numberOfInputs > 0) {
            InputGate[] inputGates = getEnvironment().getAllInputGates();
            inputProcessor = new StreamInputProcessor<IN>(
                    inputGates, inSerializer,
                    this, 
                    configuration.getCheckpointMode(),
                    getEnvironment().getIOManager(),
                    getEnvironment().getTaskManagerInfo().getConfiguration());

            // make sure that stream tasks report their I/O statistics
            inputProcessor.setMetricGroup(getEnvironment().getMetricGroup().getIOMetricGroup());
        }
    }

StreamInputProcessor

    public void setMetricGroup(TaskIOMetricGroup metrics) {
        metrics.gauge("currentLowWatermark", new Gauge<Long>() {
            @Override
            public Long getValue() {
                return lastEmittedWatermark;
            }
        });

        metrics.gauge("checkpointAlignmentTime", new Gauge<Long>() {
            @Override
            public Long getValue() {
                return barrierHandler.getAlignmentDurationNanos();
            }
        });
    }

currentLowWatermark，即lastEmittedWatermark

默认值是，

lastEmittedWatermark = Long.MIN_VALUE;

所以如果没有assignTimestampsAndWatermarks，那么currentLowWatermark会是一个极大的负数

    public boolean processInput(OneInputStreamOperator<IN, ?> streamOperator, final Object lock) throws Exception {
        while (true) {
            if (currentRecordDeserializer != null) {
              
                if (result.isFullRecord()) {
                    StreamElement recordOrMark = deserializationDelegate.getInstance();

                    if (recordOrMark.isWatermark()) {
                        long watermarkMillis = recordOrMark.asWatermark().getTimestamp();
                        if (watermarkMillis > watermarks[currentChannel]) { // 更新每个channel对应的waterMark
                            watermarks[currentChannel] = watermarkMillis;
                            long newMinWatermark = Long.MAX_VALUE;
                            for (long watermark: watermarks) { // 找出所有channel最小的watermark，以最小的为准
                                newMinWatermark = Math.min(watermark, newMinWatermark); 
                            }
                            if (newMinWatermark > lastEmittedWatermark) {
                                lastEmittedWatermark = newMinWatermark; // 将最小的watermark设为lastEmittedWatermark
                                synchronized (lock) {
                                    streamOperator.processWatermark(new Watermark(lastEmittedWatermark));
                                }
                            }
                        }
                        continue;
                    }

checkpointAlignmentTime

barrierHandler.getAlignmentDurationNanos

    @Override
    public long getAlignmentDurationNanos() {
        long start = this.startOfAlignmentTimestamp;
        if (start <= 0) {
            return latestAlignmentDurationNanos;
        } else {
            return System.nanoTime() - start;
        }
    }

startOfAlignmentTimestamp是在这次checkpoint开始的时候打的时间戳，即beginNewAlignment

    private void beginNewAlignment(long checkpointId, int channelIndex) throws IOException {
        currentCheckpointId = checkpointId;
        onBarrier(channelIndex);

        startOfAlignmentTimestamp = System.nanoTime();
    }

beginNewAlignment在

processBarrier中被调用，

        if (numBarriersReceived > 0) {
            // this is only true if some alignment is already progress and was not canceled

            if (barrierId == currentCheckpointId) {
                // regular case
                onBarrier(channelIndex);
            }
            else if (barrierId > currentCheckpointId) {// 当收到新的checkpointid，所以老的id已经过期，需要产生新的checkpoint
                // we did not complete the current checkpoint, another started before
                LOG.warn("Received checkpoint barrier for checkpoint {} before completing current checkpoint {}. " +
                        "Skipping current checkpoint.", barrierId, currentCheckpointId);

                // let the task know we are not completing this
                notifyAbort(currentCheckpointId, new CheckpointDeclineSubsumedException(barrierId));

                // abort the current checkpoint
                releaseBlocksAndResetBarriers();

                // begin a the new checkpoint
                beginNewAlignment(barrierId, channelIndex); //标识checkpoint开始
            }
            else {
                // ignore trailing barrier from an earlier checkpoint (obsolete now)
                return;
            }
        }
        else if (barrierId > currentCheckpointId) { //新的checkpoint开始
            // first barrier of a new checkpoint
            beginNewAlignment(barrierId, channelIndex); //标识checkpoint开始
        }

所以checkpointAlignmentTime的意思是，当前的checkpoint已经等待多久，因为要等到所有input channel的barrier，checkpoint才会触发

单位是纳秒，所以billion级别代表秒

如果比较大，说明各个并发之前的延迟差异较大，或延迟较高

posted on 2017-02-15 15:27 fxjwind 阅读(1934) 评论(1) 编辑收藏举报

刷新页面返回顶部

fxjwind