Android 12(S) MultiMedia(十四)ESQueue

之前看到在ATSParser::Pogram::Stream中会创建一个ESQueue,用于存储解析出来的ES data,这个ESQueue到底是用来做什么的呢?这节就来研究研究。



ESQueue的全名是ElementaryStreamQueue, 构造函数传入两个参数Mode和flags,mode指定了Stream的类型(H264、AC3等),flag用于标记是否加密加扰等信息

ElementaryStreamQueue::ElementaryStreamQueue(Mode mode, uint32_t flags)
    : mMode(mode),
      mAUIndex(0) {

    ALOGV("ElementaryStreamQueue(%p) mode %x  flags %x  isScrambled %d  isSampleEncrypted %d",
            this, mode, flags, isScrambled(), isSampleEncrypted());

    // Create the decryptor anyway since we don't know the use-case unless key is provided
    // Won't decrypt if key info not available (e.g., scanner/extractor just parsing ts files)
    mSampleDecryptor = isSampleEncrypted() ?
#ifdef __ANDROID_APEX__
        new SampleDecryptor
        new HlsSampleDecryptor
        : NULL;




status_t err = mQueue->appendData(data, size, timeUs, payloadOffset, PES_scrambling_control);


status_t ElementaryStreamQueue::appendData(
        const void *data, size_t size, int64_t timeUs,
        int32_t payloadOffset, uint32_t pesScramblingControl) {

    if (mEOSReached) {
        ALOGE("appending data after EOS");
        return ERROR_MALFORMED;

    // 这边是一个很重要的判断,mBuffer == NULL 或 mBuffer数据为空时才会进入判断
    if (!isScrambled() && (mBuffer == NULL || mBuffer->size() == 0)) {
        switch (mMode) {
            case H264:
            case MPEG_VIDEO:
#if 0
                if (size < 4 || memcmp("\x00\x00\x00\x01", data, 4)) {
                    return ERROR_MALFORMED;
                uint8_t *ptr = (uint8_t *)data;
          // 检查NAL头,找到开始的偏移量
                ssize_t startOffset = -1;
                for (size_t i = 0; i + 2 < size; ++i) {
                    if (!memcmp("\x00\x00\x01", &ptr[i], 3)) {
                        startOffset = i;

                if (startOffset < 0) {
                    return ERROR_MALFORMED;

                if (mFormat == NULL && startOffset > 0) {
                    ALOGI("found something resembling an H.264/MPEG syncword "
                          "at offset %zd",

                data = &ptr[startOffset];
                size -= startOffset;

            // ......
   // 检查是否需要扩充buffer的大小
    size_t neededSize = (mBuffer == NULL ? 0 : mBuffer->size()) + size;
    if (mBuffer == NULL || neededSize > mBuffer->capacity()) {
        neededSize = (neededSize + 65535) & ~65535;

        ALOGV("resizing buffer to size %zu", neededSize);

        sp<ABuffer> buffer = new ABuffer(neededSize);
        if (mBuffer != NULL) {
            memcpy(buffer->data(), mBuffer->data(), mBuffer->size());
            buffer->setRange(0, mBuffer->size());
        } else {
            buffer->setRange(0, 0);

        mBuffer = buffer;
   // 将数据拷贝到mBuffer当中,并设置可读写范围
    memcpy(mBuffer->data() + mBuffer->size(), data, size);
    mBuffer->setRange(0, mBuffer->size() + size);
    // 创建一个RangeInfo来保存当前buffer的长度,时间、偏移量等信息,并保存到队列当中
    RangeInfo info;
    info.mLength = size;
    info.mTimestampUs = timeUs;
    info.mPesOffset = payloadOffset;
    info.mPesScramblingControl = pesScramblingControl;

#if 0
    if (mMode == AAC) {
        ALOGI("size = %zu, timeUs = %.2f secs", size, timeUs / 1E6);
        hexdump(data, size);

    return OK;

这个方法的主要内容就是将ES data加入到mBuffer当中,当mBuffer为NULL或者数据为空时,会去检查NALU开始的标志,如果前三个字节为0x000001则说明到一个新的NALU,同时添加一个RangeInfo




sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnit() {
    if (isScrambled()) {
        return dequeueScrambledAccessUnit();
    // 这边的flag需要根据创建ATSParser时传入的参数来确定
    if ((mFlags & kFlag_AlignedData) && mMode == H264) {
        if (mRangeInfos.empty()) {
            return NULL;

        RangeInfo info = *mRangeInfos.begin();

        sp<ABuffer> accessUnit = new ABuffer(info.mLength);
        memcpy(accessUnit->data(), mBuffer->data(), info.mLength);
        accessUnit->meta()->setInt64("timeUs", info.mTimestampUs);

                mBuffer->data() + info.mLength,
                mBuffer->size() - info.mLength);

        mBuffer->setRange(0, mBuffer->size() - info.mLength);

        if (mFormat == NULL) {
            mFormat = new MetaData;
            if (!MakeAVCCodecSpecificData(*mFormat, accessUnit->data(), accessUnit->size())) {

        return accessUnit;

    switch (mMode) {
        case H264:
            // 调用到dequeueAccessUnitH264方法
            return dequeueAccessUnitH264();
    // ......
            if (mMode != MPEG_AUDIO) {
                ALOGE("Unknown mode");
                return NULL;
            return dequeueAccessUnitMPEGAudio();



VCL :vide coding layer 视频编码层。这层我的理解是做编码实现

SODB :string of data bits 原始数据比特流。由VCL产生的编码后的数据流

NAL :network abstract layer 网络抽象层。这层我理解为将编码后的结构做封装便于传输,一帧数据就是一个NAL单元

RBSP : raw byte squence playlod。将SODB进行封装成为nal_unit得到的,是一个通用封装格式,主要就是将SODB大小补充为8的倍数,补充方式为先补1,然后补0直到达到8的倍数

NALU :组成NAL的单元。将RBSP针对不同的传输网络进行重新封装之后的单元(nal_unit(RBSP)加上NAL header(1byte)) header定义了当前NALU的类型,NALU类型可以是SPS、PPS、SEI、SLICE等等


sequence:一段h264码流由多个sequence组成,一个sequence是一秒,sequence由固定的结构单元,1SPS + 1PPS + 1SEI +(I +P + B),序列中的每个单元前都有0x000001作为分割符,即每个NALU之前都有0x000001。

SPS :sequence parameter sets 序列参数集。保存了一组编码视频序列的全局参数,

PPS :picture parameter set 图像参数集。作用域编码视频序列中的一个或多个独立图像

SEI :supplemental enhancement information 附加增强信息。包括画面定时等信息

参考:NAL单元简介 NAL单元详解 NAL语法结构


看到这些定义大概就知道了ts packet中存储的ES流应该就是一个个NALU单元,一个个NALU单元可以组成NAL,也就是一帧数据。


sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitH264() {
    const uint8_t *data = mBuffer->data();

    size_t size = mBuffer->size();
    Vector<NALPosition> nals;

    size_t totalSize = 0;
    size_t seiCount = 0;

    status_t err;
    const uint8_t *nalStart;
    size_t nalSize;
    bool foundSlice = false;
    bool foundIDR = false;

    ALOGV("dequeueAccessUnit_H264[%d] %p/%zu", mAUIndex, data, size);
  // 获取一个NALU单元的数据范围,header + SODB
    while ((err = getNextNALUnit(&data, &size, &nalStart, &nalSize)) == OK) {
        if (nalSize == 0) continue;
        // 1、获取NALU的类型
        unsigned nalType = nalStart[0] & 0x1f;
        bool flush = false;
        // 2、NALU值 1:slice (P帧)   5:IDR 即时解码刷新(I 帧)
        if (nalType == 1 || nalType == 5) {
            if (nalType == 5) {
                foundIDR = true;
            if (foundSlice) {
                //TODO: Shouldn't this have been called with nalSize-1?
                ABitReader br(nalStart + 1, nalSize);
                unsigned first_mb_in_slice = parseUE(&br);
         // 找到新帧时将flush置为true
                if (first_mb_in_slice == 0) {
                    // This slice starts a new frame.

                    flush = true;

            foundSlice = true;
        // 3、NALU值  9:    7: SPS
        } else if ((nalType == 9 || nalType == 7) && foundSlice) {
            // Access unit delimiter and SPS will be associated with the
            // next frame.
       // 找到一个SPS并且当前切片已经找到,说明当前帧已经结束了,这个SPS是属于下一序列的,flush置为true,
            flush = true;
        // 4、SEI
        } else if (nalType == 6 && nalSize > 0) {
            // found non-zero sized SEI

        if (flush) {
            // The access unit will contain all nal units up to, but excluding
            // the current one, separated by 0x00 0x00 0x00 0x01 startcodes.

            size_t auSize = 4 * nals.size() + totalSize;
            sp<ABuffer> accessUnit = new ABuffer(auSize);
            sp<ABuffer> sei;

            if (seiCount > 0) {
                sei = new ABuffer(seiCount * sizeof(NALPosition));
                accessUnit->meta()->setBuffer("sei", sei);

            AString out;

            size_t dstOffset = 0;
            size_t seiIndex = 0;
            size_t shrunkBytes = 0;
            for (size_t i = 0; i < nals.size(); ++i) {
                const NALPosition &pos = nals.itemAt(i);

                unsigned nalType = mBuffer->data()[pos.nalOffset] & 0x1f;

                if (nalType == 6 && pos.nalSize > 0) {
                    if (seiIndex >= sei->size() / sizeof(NALPosition)) {
                        ALOGE("Wrong seiIndex");
                        return NULL;
                    NALPosition &seiPos = ((NALPosition *)sei->data())[seiIndex++];
                    seiPos.nalOffset = dstOffset + 4;
                    seiPos.nalSize = pos.nalSize;

                char tmp[128];
                sprintf(tmp, "0x%02x", nalType);
                if (i > 0) {
                    out.append(", ");
          // 拷贝分隔符
                memcpy(accessUnit->data() + dstOffset, "\x00\x00\x00\x01", 4);

                if (mSampleDecryptor != NULL && (nalType == 1 || nalType == 5)) {
                    uint8_t *nalData = mBuffer->data() + pos.nalOffset;
                    size_t newSize = mSampleDecryptor->processNal(nalData, pos.nalSize);
                    // Note: the data can shrink due to unescaping, but it can never grow
                    if (newSize > pos.nalSize) {
                        // don't log unless verbose, since this can get called a lot if
                        // the caller is trying to resynchronize
                        ALOGV("expected sample size < %u, got %zu", pos.nalSize, newSize);
                        return NULL;
                    memcpy(accessUnit->data() + dstOffset + 4,
                    dstOffset += newSize + 4;

                    size_t thisShrunkBytes = pos.nalSize - newSize;
                    //ALOGV("dequeueAccessUnitH264[%d]: nalType: %d -> %zu (%zu)",
                    //        nalType, (int)pos.nalSize, newSize, thisShrunkBytes);

                    shrunkBytes += thisShrunkBytes;
                else {
                    // 拷贝数据
                    memcpy(accessUnit->data() + dstOffset + 4,
                            mBuffer->data() + pos.nalOffset,

                    dstOffset += pos.nalSize + 4;
                    //ALOGV("dequeueAccessUnitH264 [%d] %d @%d",
                    //        nalType, (int)pos.nalSize, (int)pos.nalOffset);

            ALOGV("accessUnit contains nal types %s", out.c_str());

            const NALPosition &pos = nals.itemAt(nals.size() - 1);
            size_t nextScan = pos.nalOffset + pos.nalSize;

                    mBuffer->data() + nextScan,
                    mBuffer->size() - nextScan);

            mBuffer->setRange(0, mBuffer->size() - nextScan);
       // 取出一个timestamp
            int64_t timeUs = fetchTimestamp(nextScan);
            if (timeUs < 0LL) {
                ALOGE("Negative timeUs");
                return NULL;
        // 添加I帧标志
            accessUnit->meta()->setInt64("timeUs", timeUs);
            if (foundIDR) {
                accessUnit->meta()->setInt32("isSync", 1);
            // 创建MediaFormat,提取csd信息
            if (mFormat == NULL) {
                mFormat = new MetaData;
                if (!MakeAVCCodecSpecificData(*mFormat,
                        accessUnit->size())) {

            if (mSampleDecryptor != NULL && shrunkBytes > 0) {
                size_t adjustedSize = accessUnit->size() - shrunkBytes;
                ALOGV("dequeueAccessUnitH264[%d]: AU size adjusted %zu -> %zu",
                        mAUIndex, accessUnit->size(), adjustedSize);
                accessUnit->setRange(0, adjustedSize);

            ALOGV("dequeueAccessUnitH264[%d]: AU %p(%zu) dstOffset:%zu, nals:%zu, totalSize:%zu ",
                    mAUIndex, accessUnit->data(), accessUnit->size(),
                    dstOffset, nals.size(), totalSize);
       // 返回buffer
            return accessUnit;

        NALPosition pos;
        pos.nalOffset = nalStart - mBuffer->data();
        pos.nalSize = nalSize;
     // 将偏移量记录到nals当中,最后遍历取出

        totalSize += nalSize;
    if (err != (status_t)-EAGAIN) {
        ALOGE("Unexpeted err");
        return NULL;

    return NULL;




mSource = new AnotherPacketSource(meta);

另外看看seek points,只有I帧会被用来初始化seekpoint

        if (pesStartOffset >= 0 && (event != NULL) && !found && mQueue->getFormat() != NULL) {
            int32_t sync = 0;
            if (accessUnit->meta()->findInt32("isSync", &sync) && sync) {
                int64_t timeUs;
                if (accessUnit->meta()->findInt64("timeUs", &timeUs)) {
                    found = true;
                    event->init(pesStartOffset, mSource, timeUs, getSourceType());
