
  • data/test

# things in data/*test* and data/*train*

cmvn.scp # Start point of every audio feature at ark file(binary) after CMVN(Cepstral Mean and Variance Normalization)

feats.scp # Start point of every audio feature at ark file(binary)

spk2utt # speaker name to his(her) utterance

text # Audio names and corresponding text

utt2spk # utterance to its speaker name

wav.scp # Location of every audio

/splitN # A directory used to split task into N parts, each part has splited things above

  • data/lang

# thing in data/*lang*, which is a language directory

  • /phones
    • align_lexicon.txt



      HI HI HH_B AY_E


G.fst # Grammar's finite state transducer

L.fst # Lexicon's finite state transducer

L_disambig.fst # disambigous lexicon's finite state transducer

oov.int # IDs of out of vocabulary phones

oov.txt # out of vocabulary phones

phones.txt # phones existed in words.txt file

topo #

words.txt # a word list of each word with its ID, the word existed in text file



posted @ 2017-09-09 22:41  JarvanWang  阅读(608)  评论(0编辑  收藏  举报