subset_lat_dir.sh

#!/bin/bash

   

# Copyright 2018 Jarvan Wang

# Copyright 2017 Vimal Manohar

# Apache 2.0.

   

cmd=run.pl

nj=40

if [ -f ./path.sh ]; then . ./path.sh; fi

   

. ./utils/parse_options.sh

   

if [ $# -ne 3 ]; then

cat <<EOF

This script creates an lattice directory containing a subset of

utterances contained in <subset-data-dir> from the

original lattice directory containing lattices for utterances in

<full-data-dir>.

   

The number of split jobs in the output lattice directory is

equal to the number of jobs in the original lattice directory,

unless the subset data directory has too few speakers.

   

Usage: $0 [options] <subset-data-dir> <lat-dir> <subset-lat-dir>

e.g.: $0 data/train exp/tri3_lat_sp exp/tri3_lat

Options:

--cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.

EOF

exit 1

fi

   

subset_data=$1

lat_dir=$2

dir=$3

   

ori_nj=$(cat $lat_dir/num_jobs) || exit 1

   

mkdir -p $dir

cp $lat_dir/{final.mdl,*.mat,*_opts,tree} $dir/ || true

cp -r $lat_dir/phones $dir 2>/dev/null || true

   

$cmd JOB=1:$ori_nj $dir/log/copy_lattices.JOB.log \

lattice-copy "ark:gunzip -c $lat_dir/lat.JOB.gz |" \

ark,scp:$dir/lat_tmp.JOB.ark,$dir/lat_tmp.JOB.scp || exit 1

   

for n in `seq $ori_nj`; do

cat $dir/lat_tmp.$n.scp

done > $dir/lat_tmp.scp

   

#awk '{hash[$1]=$2}END{for(key in hash){printf("%s %s\n",key,hash[key])}}' < $dir/lat_tmp.scp > $dir/lat_tmp_sorted_uniq.scp

mv $dir/lat_tmp.scp $dir/lat_tmp.scp.bak

perl -e 'my %hash;while(<>){chomp;($key,$ark)=split;$hash{$key}=$ark};for $key (sort keys %hash){printf("%s %s\n",$key,$hash{$key})}' $dir/lat_tmp.scp.bak > $dir/lat_tmp.scp

   

utils/split_data.sh $subset_data $nj

$cmd JOB=1:$nj $dir/log/filter_lattices.JOB.log \

lattice-copy \

"scp:utils/filter_scp.pl $subset_data/split${nj}/JOB/utt2spk $dir/lat_tmp.scp |" \

"ark:| gzip -c > $dir/lat.JOB.gz" || exit 1

   

echo $nj > $dir/num_jobs

   

#rm $dir/lat_tmp.*.{ark,scp} $dir/lat_tmp.scp

   

exit 0

   

posted @ 2019-01-17 10:13  JarvanWang  阅读(405)  评论(0编辑  收藏  举报