使用 GStreamer appsrc 等插件实现视频音频混流，录制和推流

目前在做的在线直播教室，需要将老师分享的屏幕和老师的声音、学生的声音录制为一个视频文件，以便学生上课后还可以再看回放。

直播服务我们采用的是腾讯的视频服务，有现成的 SDK 可以用。但 SDK 自带的录制接口满足不了我们的需求，考察了 ffmpeg 和 GStreamer 后，决定在项目中使用 GStreamer 来实现。

在开始编写代码以前，先用命令行进行测试，命令行如下：

gst-launch-1.0.exe -v --gst-debug-level=4 flvmux name=mux ! tee name=t ! queue ! filesink name=file location=test.flv \
t. ! queue ! rtmpsink location="rtmp://live.abc.com/live/........" \
adder name=mix ! queue ! audiorate ! audioconvert ! voaacenc ! mux.audio \
videotestsrc name=screen_src ! queue ! videorate ! x264enc ! mux.video \
audiotestsrc name=send_audio_src ! queue ! audiorate ! mix. \
audiotestsrc wave=5 name=receive_audio_src ! queue ! audiorate ! mix.

命令看起来有点复杂，但其实逻辑挺简单的，看下面这个逻辑图就比较容易理解了（点击查看大图）

上面的命令中的推流地址需要替换为你的推流地址，如果没有，可以先把下面这部分内容去掉不推流

t. ! queue ! rtmpsink location="rtmp://live.abc.com/live/........" \

另外这个命令是在 Windows 中的 MINGW64 的 bash 环境里面运行的，如果在 Windows 的 cmd 环境中运行，把每行最后的 \ 和换行去掉就可以了。

上面命令中用到了几个关键的插件，分别解释一下：

adder：音频混流，将两路音频混为一路

voaacenc：音频编码，将原始的音频流编码为 aac 格式

x264enc：视频编码，将原始的视频流编码为 h264 格式

flvmux：flv 组装，将视频和音频组装在一起

tee：分流器，将一路输入变为两路输出，以分别进行后续的处理。一路保存为文件，一路进行推流

filesink：文件存储，将输入数据存储到指定的文件中

rtmpsink：推流，将输入数据推流到指定的视频服务器

接下来进行代码实现，我们的项目是 QT C++ 项目，目前仅在 Windows 平台使用

将 gstream 管道的初始化等放在 GStreamProcess 类中，代码如下：

GStreamProcess.h

#pragma once

#include "stdafx.h"

#include <gst/gst.h>
#include <gst/app/gstappsrc.h>
#include <gst/base/gstbaseparse.h>

typedef struct _AppSrcOption AppSrcOption;

struct _AppSrcOption
{
	_AppSrcOption()
		: pipeline(nullptr)
		, shareScreenAppsrc(nullptr)
		, sendAudioAppsrc(nullptr)
		, bus(nullptr)
		, gloop(nullptr)
		, width(0)
		, height(0)
	{}

	GstElement *pipeline;
	GstElement *shareScreenAppsrc;
	GstElement *sendAudioAppsrc;
	GstElement *receiveAudioAppsrc;
	GstElement *rtmp;
	GstBus *bus;
	GMainLoop *gloop;

	QString recordFileName;

	iLiveSucCallback sucCallback;
	iLiveErrCallback errCallback;
	void* callbackData;

	uint width;
	uint height;

	QString pushStreamUrl;
};

//int gstreamerInit(AppSrcOption *app, int argc, char *argv[]);


class GStreamProcess : public QThread
{
	Q_OBJECT

public:
	AppSrcOption* app;

protected:		
	void run() Q_DECL_OVERRIDE;

signals:
	void resultReady(const QString &s);
};

GStreamProcess.cpp

#include "stdafx.h"

GST_DEBUG_CATEGORY(appsrc_pipeline_debug);
#define GST_CAT_DEFAULT appsrc_pipeline_debug

static gboolean
bus_message(GstBus * bus, GstMessage * message, AppSrcOption * app)
{
	GST_DEBUG("got message %s",
		gst_message_type_get_name(GST_MESSAGE_TYPE(message)));

	switch (GST_MESSAGE_TYPE(message)) {
	case GST_MESSAGE_ERROR: {
		GError *err = NULL;
		gchar *dbg_info = NULL;

		gst_message_parse_error(message, &err, &dbg_info);

		gchar* elename = GST_OBJECT_NAME(message->src);

		g_printerr("ERROR from element %s: %s\n",
			elename, err->message);
		g_printerr("Debugging info: %s\n", (dbg_info) ? dbg_info : "none");

		app->errCallback(-90001, err->message, app->callbackData);

		g_error_free(err);
		g_free(dbg_info);
		g_main_loop_quit(app->gloop);
		break;
	}
	case GST_MESSAGE_EOS: {
		g_main_loop_quit(app->gloop);
		break;
	}
	default:
		break;
	}

	return TRUE;
}

void GStreamProcess::run()
{
	GError *error = NULL;

	int argc = 1;

	char *mock[1] = {"empty"};
	char **argv[1];
	*argv = mock;

	gst_init(&argc, argv);

	GST_DEBUG_CATEGORY_INIT(appsrc_pipeline_debug, "appsrc-pipeline", 0,
		"appsrc pipeline example");

	app->gloop = g_main_loop_new(NULL, TRUE);

	GstElement *pipeline = gst_parse_launch("flvmux name=mux ! queue ! tee name=t ! queue ! filesink name=file t. ! queue ! rtmpsink name=rtmp adder name=mix ! queue ! audiorate ! audioconvert ! voaacenc ! mux.audio appsrc name=screen_src ! queue ! videorate ! x264enc ! mux.video appsrc name=send_audio_src ! queue ! audiorate ! mix. appsrc wave=5 name=receive_audio_src ! queue ! audiorate ! mix.", NULL);
	g_assert(pipeline);

	app->bus = gst_pipeline_get_bus(GST_PIPELINE(pipeline));
	g_assert(app->bus);

	/* add watch for messages */
	gst_bus_add_watch(app->bus, (GstBusFunc)bus_message, app);


	/* 设置 screen src 属性 */

	app->shareScreenAppsrc = gst_bin_get_by_name(GST_BIN(pipeline), "screen_src");
	g_assert(app->shareScreenAppsrc);

	GstCaps *caps = gst_caps_new_simple("video/x-raw",
		"format", G_TYPE_STRING, "I420",
		"width", G_TYPE_INT, app->width,
		"height", G_TYPE_INT, app->height,
		"framerate", GST_TYPE_FRACTION, 15, 1,
		NULL);

	gst_app_src_set_caps(GST_APP_SRC(app->shareScreenAppsrc), caps);

	g_object_set(app->shareScreenAppsrc, "format", GST_FORMAT_TIME, NULL);
	g_object_set(app->shareScreenAppsrc, "is-live", TRUE, NULL);


	/* 设置 send audio src 属性 */

	app->sendAudioAppsrc = gst_bin_get_by_name(GST_BIN(pipeline), "send_audio_src");
	g_assert(app->sendAudioAppsrc);

	caps = gst_caps_new_simple("audio/x-raw",
		"format", G_TYPE_STRING, "S16LE",
		"layout", G_TYPE_STRING, "interleaved",
		"channels", G_TYPE_INT, 2,
		"rate", G_TYPE_INT, 48000,
		NULL);

	gst_app_src_set_caps(GST_APP_SRC(app->sendAudioAppsrc), caps);
	g_object_set(app->sendAudioAppsrc, "format", GST_FORMAT_TIME, NULL);
	g_object_set(app->sendAudioAppsrc, "is-live", TRUE, NULL);


	/* 设置 receive audio src 属性 */

	app->receiveAudioAppsrc = gst_bin_get_by_name(GST_BIN(pipeline), "receive_audio_src");
	g_assert(app->receiveAudioAppsrc);

	caps = gst_caps_new_simple("audio/x-raw",
		"format", G_TYPE_STRING, "S16LE",
		"layout", G_TYPE_STRING, "interleaved",
		"channels", G_TYPE_INT, 2,
		"rate", G_TYPE_INT, 48000,
		NULL);

	gst_app_src_set_caps(GST_APP_SRC(app->receiveAudioAppsrc), caps);
	g_object_set(app->receiveAudioAppsrc, "format", GST_FORMAT_TIME, NULL);
	g_object_set(app->receiveAudioAppsrc, "is-live", TRUE, NULL);


	/* 设置 filesink 属性 */

	GstElement *filesink = gst_bin_get_by_name(GST_BIN(pipeline), "file");
	g_assert(filesink);

	g_object_set(G_OBJECT(filesink), "location", app->recordFileName.toStdString().c_str(), NULL);


	/* 设置 rtmp 属性 */

	GstElement *rtmp = gst_bin_get_by_name(GST_BIN(pipeline), "rtmp");
	g_assert(rtmp);

	g_object_set(G_OBJECT(rtmp), "location", app->pushStreamUrl.toStdString().c_str(), NULL);

	/* go to playing */
	gst_element_set_state(pipeline, GST_STATE_PLAYING);

	//GST_DEBUG_BIN_TO_DOT_FILE_WITH_TS(GST_BIN(pipeline), GST_DEBUG_GRAPH_SHOW_ALL, "pipeline_dot");

	app->pipeline = pipeline;

	app->sucCallback(app->callbackData);

	g_main_loop_run(app->gloop);

	GST_DEBUG("stopping");

	gst_element_set_state(app->pipeline, GST_STATE_NULL);

	gst_object_unref(app->bus);
	g_main_loop_unref(app->gloop);
}

上面代码中，比较关键的地方是 appsrc 的 format 属性需要设置为 GST_FORMAT_TIME，如果不设置的话，视频和音频会无法同步，就是下面这几行代码：

...
g_object_set(app->shareScreenAppsrc, "format", GST_FORMAT_TIME, NULL);
...
g_object_set(app->sendAudioAppsrc, "format", GST_FORMAT_TIME, NULL);
...
g_object_set(app->receiveAudioAppsrc, "format", GST_FORMAT_TIME, NULL);

然后在需要启动录制的地方开启线程，启动 GStreamer 处理线程（因为项目比较复杂，这里只截取部分）

...
m_pAppSrcOption = new AppSrcOption();
m_pAppSrcOption->recordFileName = filePath;
m_pAppSrcOption->pushStreamUrl = m_pushStreamUrl;

m_pAppSrcOption->callbackData = this;
m_pAppSrcOption->sucCallback = OnLocalRecordSuc;
m_pAppSrcOption->errCallback = OnLocalRecordErr;

m_pLocalRecordProcessThread = new GStreamProcess();
m_pLocalRecordProcessThread->app = m_pAppSrcOption;

connect(m_pLocalRecordProcessThread, &GStreamProcess::finished, m_pLocalRecordProcessThread, &QObject::deleteLater);
connect(m_pLocalRecordProcessThread, &GStreamProcess::finished, this, &MainForm::OnLocalRecordClose);

m_pLocalRecordProcessThread->start();

m_pFillBlankAudioTimer->start(2000);
...

接下来注入视频帧数据。这个项目在分享屏幕时，每一帧的视频数据会回调指定方法。在回调方法中，我们将数据传给管道中的 shareScreenAppsrc

void MainForm::localVideoHook(const LiveVideoFrame* video_frame)
{
	if (m_pAppSrcOption && m_pAppSrcOption->pipeline)
	{
		GstBuffer *buffer;
		guint8 *ptr;
		ptr = (guint8 *)g_malloc(video_frame->dataSize * sizeof(uint8));
		if (NULL == ptr)
		{
			qDebug("OnLocalVideo::malloc failed!");
		}
		else
		{
			memcpy(ptr, video_frame->data, video_frame->dataSize);
			buffer = gst_buffer_new_wrapped((void*)ptr, video_frame->dataSize);

			//设置时间戳
			GST_BUFFER_PTS(buffer) = gst_clock_get_time(m_pAppSrcOption->pipeline->clock) - m_pAppSrcOption->pipeline->base_time;

			GstFlowReturn ret;
			//注入视频帧数据
			g_signal_emit_by_name(m_pAppSrcOption->shareScreenAppsrc, "push-buffer", buffer, &ret);

			gst_buffer_unref(buffer);
		}
	}
}

上面代码中，设置时间戳的代码非常关键，如果没有时间戳，会导致管道中的 videorate 由于缺少时间戳信息而失败。
这里的时间戳取的是：当前管道时间 - 管道启动时的时间

然后以类似的方式注入音频帧数据，音频帧有两路，一路为老师的声音，一路为学生的声音。
老师的声音：

void MainForm::sendAudioHook(const iLiveAudioFrame* audio_frame)
{

	if (m_pAppSrcOption && m_pAppSrcOption->pipeline)
	{
		GstBuffer *buffer;

		guint8 *ptr;
		ptr = (guint8 *)g_malloc(audio_frame->dataSize * sizeof(uint8));
		if (NULL == ptr)
		{
			qDebug("OnSendAudioCallback::malloc failed!");
		}
		else
		{
			memcpy(ptr, audio_frame->data, audio_frame->dataSize);
			buffer = gst_buffer_new_wrapped((void*)ptr, audio_frame->dataSize);

			GstClockTime pts = gst_clock_get_time(m_pAppSrcOption->pipeline->clock) - m_pAppSrcOption->pipeline->base_time;
			GST_BUFFER_PTS(buffer) = pts;
			m_lastWriteSendAudioTime = pts;

			GST_DEBUG("feed buffer");

			GstFlowReturn ret;
			g_signal_emit_by_name(m_pAppSrcOption->sendAudioAppsrc, "push-buffer", buffer, &ret);

			gst_buffer_unref(buffer);
		}
	}
}

学生的声音：

void MainForm::receiveAudioHook(const iLiveAudioFrame* audio_frame)
{

	if (m_pAppSrcOption && m_pAppSrcOption->pipeline)
	{
		GstBuffer *buffer;

		guint8 *ptr;
		ptr = (guint8 *)g_malloc(audio_frame->dataSize * sizeof(uint8));
		if (NULL == ptr)
		{
			qDebug("receiveAudioHook::malloc failed!");
		}
		else
		{
			memcpy(ptr, audio_frame->data, audio_frame->dataSize);
			buffer = gst_buffer_new_wrapped((void*)ptr, audio_frame->dataSize);

			GstClockTime pts = gst_clock_get_time(m_pAppSrcOption->pipeline->clock) - m_pAppSrcOption->pipeline->base_time;
			GST_BUFFER_PTS(buffer) = pts;
			m_lastWriteReceiveAudioTime = pts;

			GST_DEBUG("feed buffer");

			GstFlowReturn ret;
			g_signal_emit_by_name(m_pAppSrcOption->receiveAudioAppsrc, "push-buffer", buffer, &ret);

			gst_buffer_unref(buffer);
		}
	}
}

在项目中还有一个特殊的地方，因为声音并不是一直有数据的，如果学生没有连麦或老师没有开启麦克风，是没有音频帧数据回调的，这会导致音频混流时因为缺少数据而一直等待，造成阻塞。

这里采取了一个变通的方法，就是设置一个定时器，每隔 2 秒注入一帧空白数据。上面代码中的 m_pFillBlankAudioTimer->start(2000); 就是用于启动这个定时器的。下面是注入空白帧的代码：

2018-03-15 updated: 这里每2秒注入一个空白帧会导致录制下来的视频出现声音突突声。后来改为增加了一个变量，在收到音频信号时记录上次插入音频帧的时间戳。每隔2秒打算插入空白帧时，检查时间戳距现在大于 5 秒才插入，否则不必插入。

//填充音频无信号时的空白声音，音频断流的话，会导致 adder 混流 block
void MainForm::OnFillBlankAudioTimer()
{
	if (m_pAppSrcOption && m_pAppSrcOption->pipeline && m_pAppSrcOption->receiveAudioAppsrc)
	{
		GstClockTime pts = gst_clock_get_time(m_pAppSrcOption->pipeline->clock) - m_pAppSrcOption->pipeline->base_time;

		if (GST_TIME_AS_SECONDS(pts - m_lastWriteReceiveAudioTime) > 5)
		{
			GstBuffer *buffer;
			guint size;
			GstFlowReturn ret;

			size = 3840;

			buffer = gst_buffer_new_allocate(NULL, size, NULL);

			//全部填入0x0
			gst_buffer_memset(buffer, 0, 0x0, size);

			GST_BUFFER_PTS(buffer) = pts;

			g_signal_emit_by_name(m_pAppSrcOption->receiveAudioAppsrc, "push-buffer", buffer, &ret);
			gst_buffer_unref(buffer);
		}
	}

	if (m_pAppSrcOption && m_pAppSrcOption->pipeline && m_pAppSrcOption->sendAudioAppsrc)
	{
		GstClockTime pts = gst_clock_get_time(m_pAppSrcOption->pipeline->clock) - m_pAppSrcOption->pipeline->base_time;

		if (GST_TIME_AS_SECONDS(pts - m_lastWriteSendAudioTime) > 5)
		{
			GstBuffer *buffer;
			guint size;
			GstFlowReturn ret;

			size = 3840;

			buffer = gst_buffer_new_allocate(NULL, size, NULL);

			//全部填入0x0
			gst_buffer_memset(buffer, 0, 0x0, size);

			GST_BUFFER_PTS(buffer) = pts;

			g_signal_emit_by_name(m_pAppSrcOption->sendAudioAppsrc, "push-buffer", buffer, &ret);
			gst_buffer_unref(buffer);
		}
	}
}

当需要结束混流和录制时，向管道中的各个 appsrc 发送 end-of-stream 消息，管道在处理完所有数据后，就会正常结束，关闭退出。

void MainForm::onBtnStopPushStream()
{
	QMessageBox::StandardButton ret = QMessageBox::question(this, FromBits("确认"), FromBits("是否要停止视频录制？（多次录制会产生多个视频文件，会影响回放的体验，应尽量避免多次录制）"));

	if (ret == QMessageBox::Yes)
	{
		stopPushStream();

		if (m_pAppSrcOption && m_pAppSrcOption->pipeline)
		{
			GstFlowReturn ret;
			g_signal_emit_by_name(m_pAppSrcOption->shareScreenAppsrc, "end-of-stream", &ret);
			g_signal_emit_by_name(m_pAppSrcOption->sendAudioAppsrc, "end-of-stream", &ret);
			g_signal_emit_by_name(m_pAppSrcOption->receiveAudioAppsrc, "end-of-stream", &ret);

			m_pFillBlankAudioTimer->stop();
		}

		m_pushStreamStatus = E_ChangingPushStream;
		setStatus(m_status);
	}
}

之前我们在启动线程时，用下面这句代码注册了事件，当线程结束时会调用 OnLocalRecordClose 方法，可以在这个方法中更改 UI 控件的状态和释放资源

...
connect(m_pLocalRecordProcessThread, &GStreamProcess::finished, this, &MainForm::OnLocalRecordClose);
...

因为第一次使用 GStreamer 进行开发，走了很多弯路，踩了很多坑。好在最后还是完成了需要的功能，直播录制和推流的效果还是不错的。

但这个方案还存在一个问题，就是rtmp 推流时如果失败，会导致整个管道出错停止，这个还需要设法解决。

开发过程中参考了很多资料，比较有用的是下面这几个：

appsrc 的 demo 代码

https://gist.github.com/nzjrs/725122/16ceee88aafae389bab207818e0661328921e1ab （需要FQ）

http://blog.csdn.net/u010312436/article/details/53610599

https://gstreamer.freedesktop.org/documentation/application-development/advanced/pipeline-manipulation.html

GStreamer 时钟机制

https://gstreamer.freedesktop.org/documentation/application-development/advanced/clocks.html

GStreamer 写日志和生成管道逻辑图的方法

https://gstreamer.freedesktop.org/documentation/tutorials/basic/debugging-tools.html

https://gstreamer.freedesktop.org/data/doc/gstreamer/head/gstreamer/html/gst-running.html

posted @ 2018-01-20 21:14 hejiangyuan 阅读(7878) 评论(1) 编辑收藏举报

刷新页面返回顶部