1 test data preparation
1> select representative data voice to match real application scenario
test device :containing noise
read by text : relatively clean
tts : synthesis
2> get right label for test data
i. call other asr api to do the labeling
ii.use our asr model to do this
draw a parallel between the two results above
2 get standard label
python get_label_ref.py $test_dir $ref
3 decoding
for file in $test_dir/* do run.pl $decode_dir/decode.`basename $file`.log \ online2-wav-nnet3-latgen-faster --online=true --do-endpointing=false --frame-subsampling-factor=3 \ --config=conf/online.conf --max-active=7000 --beam=15.0 --lattice-beam=6.0 --acoustic-scale=1.0 \ --word-symbol-table=exp/chain/tdnn/graph/words.txt exp/chain/tdnn/final.mdl exp/chain/tdnn/graph/HCLG.fst \ "ark:echo utterance-id1 utterance_`basename $file`|" "scp:echo utterance_`basename $file` $file |" ark:/dev/null done
4 result deal
#get test_set's decoding result for file in $decode_dir/* do name=`basename $file` label=`cat $file | grep '^utterance'` label_1=`echo ${label/utterance_/}` label_2=`echo ${label_1/.wav/}` echo $label_2 > $text/${name} done
5 get wer
if you want to get CER, you can set a flag to do and get cer result
if [ $flag == 'cer' ] || [ $flag == 'CER' ] ;then
echo "do CER estimation:"
PYTHONIOENCODING=utf8 python3 handle_file.py $text $ref
else
echo "do WER estimation:"
fi
and the default performance means WER
#each file to compute WER for file in ${text}/* do cat $file >> $total_content compute-wer --text --mode=present ark:$ref ark:$file > $log_wer/wer_`basename $file`
done
#all files to compute a overall WER
compute-wer --text --mode=present ark:$ref ark:$total_content