ASR项目实战-构建Kaldi
准备工作
安装构建时依赖的基础软件
软件清单如下:
- bzip2
- python3
- automake
- libtool
- cmake
- gcc
- g++
- gfortran
- git
- subversion
不同平台安装软件的方式不同,比如可以使用yum
或者apt-get
等。
下载开源软件
软件清单如下:
按照一定的规则,将下载后的文件放在指定目录,如下是样例
opensrc glog glog-0.4.0.zip Libunwind libunwind-1.3.1-src.zip Kaldi kaldi-master.zip OpenFST openfst-1.6.7.tar.gz OpenBLAS OpenBLAS-0.3.6.tar.gz install usr local bin sbin include lib lib64 script build.sh
构建脚本
build.sh
的内容,如下为样例:
BUILD_CORE_NUM=8 SCRIPT_FILE=`readlink -f $0` SCRIPT_ROOT=`dir ${SCRIPT_FILE}` BUILD_ROOT=`dir ${SCRIPT_ROOT}` SOURCE_ROOT=${BUILD_ROOT}/opensrc INSTALL_ROOT=${BUILD_ROOT}/install/usr/local export INSTALL_ROOT PATH=${INSTALL_ROOT}/bin:${INSTALL_ROOT}/sbin:${PATH} export PATH LD_LIBRARY_PATH=${INSTALL_ROOT}/lib:${INSTALL_ROOT}/lib64:${LD_LIBRARY_PATH} export LD_LIBRARY_PATH # Libunwind VERSION=1.3.1 BUILD_LOG=${SCRIPT_ROOT}/Libunwind.out rm -f ${BUILD_LOG} touch ${BUILD_LOG} cd ${SOURCE_ROOT}/Libunwind rm -rf libunwind-${VERSION} unzip -o libunwind-${VERSION}-src.zip tar vxfz libunwind-${VERSION}.tar.gz cd libunwind-${VERSION} ./autogen.sh CFLAGS="-fPIC -I${INSTALL_ROOT}/include" \ ./configure \ >> ${BUILD_LOG} 2>&1 make -j${BUILD_CORE_NUM} >> ${BUILD_LOG} 2>&1 make install prefix=${INSTALL_ROOT} >> ${BUILD_LOG} 2>&1 # glog VERSION=0.4.0 BUILD_LOG=${SCRIPT_ROOT}/glog.out rm -f ${BUILD_LOG} touch ${BUILD_LOG} cd ${SOURCE_ROOT}/glog rm -rf glog-${VERSION} tar vxfz glog-${VERSION}.tar.gz cd glog-${VERSION} ./autogen.sh CXXFLAGS="-fPIC -I${INSTALL_ROOT}/include" \ LDFLAGS="-L${INSTALL_ROOT}/lib -L${INSTALL_ROOT}/lib64" \ ./configure \ --prefix=${INSTALL_ROOT} \ >> ${BUILD_LOG} 2>&1 make -j${BUILD_CORE_NUM} >> ${BUILD_LOG} 2>&1 make install >> ${BUILD_LOG} 2>&1 # OpenBLAS VERSION=0.3.6 BUILD_LOG=${SCRIPT_ROOT}/OpenBLAS.out rm -f ${BUILD_LOG} touch ${BUILD_LOG} cd ${SOURCE_ROOT}/OpenBLAS rm -rf OpenBLAS-${VERSION} rm -f OpenBLAS tar vxfz OpenBLAS-${VERSION}.tar.gz ln -s OpenBLAS-${VERSION} OpenBLAS cd OpenBLAS-${VERSION} make -j${BUILD_CORE_NUM} >> ${BUILD_LOG} 2>&1 make PREFIX=${INSTALL_ROOT} install >> ${BUILD_LOG} 2>&1 # OpenFST VERSION=1.6.7 BUILD_LOG=${SCRIPT_ROOT}/OpenFST.out rm -f ${BUILD_LOG} touch ${BUILD_LOG} cd ${SOURCE_ROOT}/OpenFST rm -rf openfst-${VERSION} tar vxzf openfst-${VERSION}.tar.gz cd openfst-${VERSION} CXXFLAGS="-fPIC -I${INSTALL_ROOT}/include" \ CFLAGS="-fPIC -I${INSTALL_ROOT}/include" \ LDFLAGS="-L${INSTALL_ROOT}/lib -L${INSTALL_ROOT}/lib64" \ LIBS="${INSTALL_ROOT}/lib/libglog.a -pthread -lunwind" \ ./configure \ --enable-static \ --enable-shared \ --enable-far \ --enable-ngram-fsts \ --prefix=${INSTALL_ROOT} \ >> ${BUILD_LOG} 2>&1 make -j${BUILD_CORE_NUM} >> ${BUILD_LOG} 2>&1 make install >> ${BUILD_LOG} 2>&1 # Kaldi VERSION=master BUILD_LOG=${SCRIPT_ROOT}/Kaldi.out rm -f ${BUILD_LOG} touch ${BUILD_LOG} cd ${SOURCE_ROOT}/Kaldi rm -rf kaldi-${VERSION}.zip rm -f kaldi unzip kaldi-${VERSION}.zip cd kaldi-${VERSION}/src CXXFLAGS="-fPIC -I${INSTALL_ROOT}/include" \ ./configure \ --static \ --openblas-root=${INSTALL_ROOT} \ --static-math=yes \ --threaded-math=yes \ --static-fst=yes \ --fst-version=1.6.7 \ --fst-root=${INSTALL_ROOT} \ --use-cuda=no \ >> ${BUILD_LOG} 2>&1 make clean -j${BUILD_CORE_NUM} >> ${BUILD_LOG} 2>&1 make depend -j${BUILD_CORE_NUM} >> ${BUILD_LOG} 2>&1 make -j${BUILD_CORE_NUM} >> ${BUILD_LOG} 2>&1
注意事项
OpenFST自身实现一套日志系统,与glog集成时会报符号冲突。当前有用户在官网上提出类似的问题,但没有官方的答复。为了完整利用glog的能力,需要对OpenFST的实现做一定的修改。
修改点主要涉及如下几个文件:
-
src/include/log.h
,删除代码中相关的类和变量的定义。#include <fst/types.h> #include <fst/lock.h> using std::string; // 删除中间出现的代码 #define ATTRIBUTE_DEPRECATED __attribute__((deprecated)) #endif -
src/include/flags.h
,在头部引入glog的头文件,增加如下代码:#include <fst/types.h> #include <fst/lock.h> #include "glog/logging.h" // 引入glog的头文件 using std::string; -
src/lib/flags.cc
,删除同名的变量。// DEFINE_int32(v, 0, "verbosity level");
本文来自博客园,作者:jackieathome,转载请注明原文链接:https://www.cnblogs.com/jackieathome/p/17935655.html
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· SQL Server 2025 AI相关能力初探
· AI编程工具终极对决:字节Trae VS Cursor,谁才是开发者新宠?
· 开源Multi-agent AI智能体框架aevatar.ai,欢迎大家贡献代码
· Manus重磅发布:全球首款通用AI代理技术深度解析与实战指南