7
7
. ./cmd.sh
8
8
9
9
# general configuration
10
- backend=chainer
11
- stage=0 # start from 0 if you need to start from data preparation
12
- gpu=-1 # use 0 when using GPU on slurm/grid engine, otherwise -1
10
+ backend=pytorch
11
+ stage=-1 # start from -1 if you need to start from data download
12
+ gpu= # will be deprecated, please use ngpu
13
+ ngpu=0 # number of gpus ("0" uses cpu, otherwise use gpu)
13
14
debugmode=1
14
15
dumpdir=dump # directory to dump full features
15
16
N=0 # number of minibatches to be used (mainly for debugging). "0" uses all minibatches.
@@ -21,8 +22,8 @@ do_delta=false # true when using CNN
21
22
22
23
# network archtecture
23
24
# encoder related
24
- etype=vggblstmp # encoder architecture type
25
- elayers=6
25
+ etype=blstmp # encoder architecture type
26
+ elayers=8
26
27
eunits=320
27
28
eprojs=320
28
29
subsample=1_2_2_1_1 # skip every n frame from input to nth layers
@@ -38,7 +39,7 @@ aconv_filts=100
38
39
mtlalpha=0.5
39
40
40
41
# minibatch related
41
- batchsize=30
42
+ batchsize=50
42
43
maxlen_in=800 # if input length > maxlen_in, batchsize is automatically reduced
43
44
maxlen_out=150 # if output length > maxlen_out, batchsize is automatically reduced
44
45
@@ -47,7 +48,7 @@ opt=adadelta
47
48
epochs=15
48
49
49
50
# rnnlm related
50
- lm_weight=1.0
51
+ lm_weight=0.3
51
52
52
53
# decoding parameter
53
54
beam_size=20
@@ -57,37 +58,62 @@ minlenratio=0.0
57
58
ctc_weight=0.3
58
59
recog_model=acc.best # set a model to be used for decoding: 'acc.best' or 'loss.best'
59
60
60
- # data
61
- wsj0=/export/corpora5/LDC/LDC93S6B
62
- wsj1=/export/corpora5/LDC/LDC94S13B
61
+ # Set this to somewhere where you want to put your data, or where
62
+ # someone else has already put it. You'll want to change this
63
+ # if you're not on the CLSP grid.
64
+ datadir=/export/a15/vpanayotov/data
63
65
64
- # bpe
65
- nbpe=500
66
+ # base url for downloads.
67
+ data_url=www.openslr.org/resources/12
68
+
69
+ # bpemode (unigram or bpe)
70
+ nbpe=200
71
+ bpemode=unigram
66
72
67
73
# exp tag
68
74
tag=" " # tag for managing experiments.
69
75
70
76
. utils/parse_options.sh || exit 1;
71
77
72
- . ./path.sh
73
- . ./cmd.sh
78
+ . ./path.sh
79
+ . ./cmd.sh
80
+
81
+ # check gpu option usage
82
+ if [ ! -z $gpu ]; then
83
+ echo " WARNING: --gpu option will be deprecated."
84
+ echo " WARNING: please use --ngpu option."
85
+ if [ $gpu -eq -1 ]; then
86
+ ngpu=0
87
+ else
88
+ ngpu=1
89
+ fi
90
+ fi
74
91
75
92
# Set bash to 'debug' mode, it will exit on :
76
93
# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
77
94
set -e
78
95
set -u
79
96
set -o pipefail
80
97
81
- train_set=train_si284
82
- train_dev=test_dev93
83
- recog_set=" test_dev93 test_eval92"
98
+ train_set=train_960
99
+ train_dev=dev
100
+ recog_set=" test_clean test_other dev_clean dev_other"
101
+
102
+ if [ ${stage} -le -1 ]; then
103
+ echo " stage -1: Data Download"
104
+ for part in dev-clean test-clean dev-other test-other train-clean-100 train-clean-360 train-other-500; do
105
+ local/download_and_untar.sh ${datadir} ${data_url} ${part}
106
+ done
107
+ fi
84
108
85
109
if [ ${stage} -le 0 ]; then
86
110
# ## Task dependent. You have to make data the following preparation part by yourself.
87
111
# ## But you can utilize Kaldi recipes in most cases
88
112
echo " stage 0: Data preparation"
89
- local/wsj_data_prep.sh ${wsj0} /?? -{? ,?? }.? ${wsj1} /?? -{? ,?? }.?
90
- local/wsj_format_data.sh
113
+ for part in dev-clean test-clean dev-other test-other train-clean-100 train-clean-360 train-other-500; do
114
+ # use underscore-separated names in data directories.
115
+ local/data_prep.sh ${datadir} /LibriSpeech/${part} data/$( echo ${part} | sed s/-/_/g)
116
+ done
91
117
fi
92
118
93
119
feat_tr_dir=${dumpdir} /${train_set} /delta${do_delta} ; mkdir -p ${feat_tr_dir}
@@ -98,81 +124,97 @@ if [ ${stage} -le 1 ]; then
98
124
echo " stage 1: Feature Generation"
99
125
fbankdir=fbank
100
126
# Generate the fbank features; by default 80-dimensional fbanks with pitch on each frame
101
- for x in train_si284 test_dev93 test_eval92 ; do
102
- steps/make_fbank_pitch.sh --cmd " $train_cmd " --nj 10 data/${x} exp/make_fbank/${x} ${fbankdir}
127
+ for x in dev_clean test_clean dev_other test_other train_clean_100 train_clean_360 train_other_500 ; do
128
+ steps/make_fbank_pitch.sh --cmd " $train_cmd " --nj 32 data/${x} exp/make_fbank/${x} ${fbankdir}
103
129
done
104
130
131
+ utils/combine_data.sh data/${train_set} _org data/train_clean_100 data/train_clean_360 data/train_other_500
132
+ utils/combine_data.sh data/${train_dev} _org data/dev_clean data/dev_other
133
+
134
+ # remove utt having more than 3000 frames
135
+ # remove utt having more than 400 characters
136
+ remove_longshortdata.sh --maxframes 3000 --maxchars 400 data/${train_set} _org data/${train_set}
137
+ remove_longshortdata.sh --maxframes 3000 --maxchars 400 data/${train_dev} _org data/${train_dev}
138
+
105
139
# compute global CMVN
106
140
compute-cmvn-stats scp:data/${train_set} /feats.scp data/${train_set} /cmvn.ark
107
141
108
142
# dump features for training
109
143
if [[ $( hostname -f) == * .clsp.jhu.edu ]] && [ ! -d ${feat_tr_dir} /storage ]; then
110
144
utils/create_split_dir.pl \
111
- /export/b{10,11,12,13 }/${USER} /espnet-data/egs/voxforge/asr1/dump/${train_set} /delta${do_delta} /storage \
145
+ /export/b{14,15,16,17 }/${USER} /espnet-data/egs/voxforge/asr1/dump/${train_set} /delta${do_delta} /storage \
112
146
${feat_tr_dir} /storage
113
147
fi
114
148
if [[ $( hostname -f) == * .clsp.jhu.edu ]] && [ ! -d ${feat_dt_dir} /storage ]; then
115
149
utils/create_split_dir.pl \
116
- /export/b{10,11,12,13 }/${USER} /espnet-data/egs/voxforge/asr1/dump/${train_dev} /delta${do_delta} /storage \
150
+ /export/b{14,15,16,17 }/${USER} /espnet-data/egs/voxforge/asr1/dump/${train_dev} /delta${do_delta} /storage \
117
151
${feat_dt_dir} /storage
118
152
fi
119
- dump.sh --cmd " $train_cmd " --nj 32 --do_delta $do_delta \
153
+ dump.sh --cmd " $train_cmd " --nj 80 --do_delta $do_delta \
120
154
data/${train_set} /feats.scp data/${train_set} /cmvn.ark exp/dump_feats/train ${feat_tr_dir}
121
- dump.sh --cmd " $train_cmd " --nj 4 --do_delta $do_delta \
155
+ dump.sh --cmd " $train_cmd " --nj 32 --do_delta $do_delta \
122
156
data/${train_dev} /feats.scp data/${train_set} /cmvn.ark exp/dump_feats/dev ${feat_dt_dir}
157
+ for rtask in ${recog_set} ; do
158
+ feat_recog_dir=${dumpdir} /${rtask} /delta${do_delta} ; mkdir -p ${feat_recog_dir}
159
+ dump.sh --cmd " $train_cmd " --nj 32 --do_delta $do_delta \
160
+ data/${rtask} /feats.scp data/${train_set} /cmvn.ark exp/dump_feats/recog/${rtask} \
161
+ ${feat_recog_dir}
162
+ done
123
163
fi
124
164
125
- dict=data/bpe/${train_set} _units_${nbpe} .txt
126
- code=data/bpe/code_${nbpe} .txt
127
- nlsyms=data/bpe/non_lang_syms.txt
165
+ dict=data/lang_char/${train_set} _units.txt
166
+ bpemodel=data/lang_char/${train_set}${nbpe}
128
167
echo " dictionary: ${dict} "
129
168
if [ ${stage} -le 2 ]; then
130
169
# ## Task dependent. You have to check non-linguistic symbols used in the corpus.
131
170
echo " stage 2: Dictionary and Json Data Preparation"
132
- mkdir -p data/bpe
171
+ mkdir -p data/lang_char/
133
172
echo " <unk> 1" > ${dict} # <unk> must be 1, 0 will be used for "blank" in CTC
134
- cut -f 2- -d" " data/${train_set} /text | learn_bpe.py -s ${nbpe} > ${code}
135
- cut -f 2- -d " " data/${train_set} /text | apply_bpe.py -c ${code} \
136
- | tr ' ' ' \n' | sort | uniq | awk ' {print $0 " " NR+1}' >> ${dict}
173
+ cut -f 2- -d" " data/${train_set} /text > data/lang_char/input.txt
174
+ spm_train --input= data/lang_char/input.txt --vocab_size= ${nbpe} --model_type= ${bpemode} --model_prefix= ${bpemodel} --input_sentence_size=100000000
175
+ spm_encode --model= ${bpemodel} .model --output_format=piece < data/lang_char/input.txt | tr ' ' ' \n' | sort | uniq | awk ' {print $0 " " NR+1}' >> ${dict}
137
176
wc -l ${dict}
138
177
139
- echo " make a non-linguistic symbol list"
140
- cut -f 2- data/${train_set} /text | tr " " " \n" | sort | uniq | grep " <" > ${nlsyms}
141
- cat ${nlsyms}
142
-
143
- echo " make json files"
144
- data2json.sh --feat ${feat_tr_dir} /feats.scp --bpecode ${code} \
145
- data/${train_set} ${dict} > ${feat_tr_dir} /data_${nbpe} .json
146
- data2json.sh --feat ${feat_dt_dir} /feats.scp --bpecode ${code} \
178
+ # make json labels
179
+ data2json.sh --feat ${feat_tr_dir} /feats.scp --bpecode ${bpemodel} .model \
180
+ data/${train_set} ${dict} > ${feat_tr_dir} /data_${nbpe} .json
181
+ data2json.sh --feat ${feat_dt_dir} /feats.scp --bpecode ${bpemodel} .model \
147
182
data/${train_dev} ${dict} > ${feat_dt_dir} /data_${nbpe} .json
148
183
fi
149
184
150
- # you can skip this and remove --rnnlm option in the recognition (stage 5)
151
- lmexpdir=exp/train_rnnlm_2layer_bs2048_bpe ${nbpe}
185
+ # You can skip this and remove --rnnlm option in the recognition (stage 5)
186
+ lmexpdir=exp/train_rnnlm_2layer_bs256
152
187
mkdir -p ${lmexpdir}
153
188
if [ ${stage} -le 3 ]; then
154
189
echo " stage 3: LM Preparation"
155
190
lmdatadir=data/local/lm_train
156
191
mkdir -p ${lmdatadir}
157
- cut -f 2- -d" " data/${train_set} /text | apply_bpe.py -c ${code} | perl -pe ' s/\n/ <eos> /g' \
158
- > ${lmdatadir} /train_trans_${nbpe} .txt
159
- zcat ${wsj1} /13-32.1/wsj1/doc/lng_modl/lm_train/np_data/{87,88,89}/* .z | grep -v " <" | tr [a-z] [A-Z] \
160
- | apply_bpe.py -c ${code} | perl -pe ' s/\n/ <eos> /g' >> ${lmdatadir} /train_others_${nbpe} .txt
161
- cat ${lmdatadir} /train_trans_${nbpe} .txt ${lmdatadir} /train_others_${nbpe} .txt | tr ' \n' ' ' > ${lmdatadir} /train_${nbpe} .txt
162
- cut -f 2- -d" " data/${train_dev} /text | apply_bpe.py -c ${code} | perl -pe ' s/\n/ <eos> /g' \
163
- > ${lmdatadir} /valid_${nbpe} .txt
192
+ spm_encode --model=${bpemodel} .model --output_format=piece < data/lang_char/input.txt | perl -pe ' s/\n/ <eos> /g' \
193
+ > ${lmdatadir} /train.txt
194
+ cut -f 2- -d" " data/${train_set} /text | spm_encode --model=${bpemodel} .model --output_format=piece | perl -pe ' s/\n/ <eos> /g' \
195
+ > ${lmdatadir} /valid.txt
196
+ # use only 1 gpu
197
+ if [ ${ngpu} -gt 1 ]; then
198
+ echo " LM training does not support multi-gpu. signle gpu will be used."
199
+ lmngpu=1
200
+ else
201
+ lmngpu=${ngpu}
202
+ fi
164
203
${cuda_cmd} ${lmexpdir} /train.log \
165
204
lm_train.py \
166
- --gpu ${gpu} \
205
+ --ngpu ${lmngpu} \
206
+ --backend ${backend} \
167
207
--verbose 1 \
168
208
--outdir ${lmexpdir} \
169
- --train-label ${lmdatadir} /train_${nbpe} .txt \
170
- --valid-label ${lmdatadir} /valid_${nbpe} .txt \
209
+ --train-label ${lmdatadir} /train.txt \
210
+ --valid-label ${lmdatadir} /valid.txt \
211
+ --epoch 60 \
212
+ --batchsize 256 \
171
213
--dict ${dict}
172
214
fi
173
215
174
216
if [ -z ${tag} ]; then
175
- expdir=exp/${train_set} _${etype} _e${elayers} _subsample${subsample} _unit${eunits} _proj${eprojs} _d${dlayers} _unit${dunits} _${atype} _aconvc${aconv_chans} _aconvf${aconv_filts} _mtlalpha${mtlalpha} _${opt} _bs${batchsize} _mli${maxlen_in} _mlo${maxlen_out} _bpe${nbpe}
217
+ expdir=exp/${train_set} _${etype} _e${elayers} _subsample${subsample} _unit${eunits} _proj${eprojs} _d${dlayers} _unit${dunits} _${atype} _aconvc${aconv_chans} _aconvf${aconv_filts} _mtlalpha${mtlalpha} _${opt} _bs${batchsize} _mli${maxlen_in} _mlo${maxlen_out} _bpe${nbpe} _bpemode ${bpemode}
176
218
if ${do_delta} ; then
177
219
expdir=${expdir} _delta
178
220
fi
@@ -181,32 +223,21 @@ else
181
223
fi
182
224
mkdir -p ${expdir}
183
225
184
- # switch backend
185
- if [[ ${backend} == chainer ]]; then
186
- train_script=asr_train.py
187
- decode_script=asr_recog.py
188
- else
189
- train_script=asr_train_th.py
190
- decode_script=asr_recog_th.py
191
- fi
192
-
193
226
if [ ${stage} -le 4 ]; then
194
227
echo " stage 4: Network Training"
195
-
196
- ${cuda_cmd} ${expdir} /train.log \
197
- ${train_script } \
198
- --gpu ${gpu } \
228
+ ${cuda_cmd} --gpu ${ngpu} ${expdir} /train.log \
229
+ asr_train.py \
230
+ --ngpu ${ngpu } \
231
+ --backend ${backend } \
199
232
--outdir ${expdir} /results \
200
233
--debugmode ${debugmode} \
201
234
--dict ${dict} \
202
235
--debugdir ${expdir} \
203
236
--minibatches ${N} \
204
237
--verbose ${verbose} \
205
238
--resume ${resume} \
206
- --train-feat scp:${feat_tr_dir} /feats.scp \
207
- --valid-feat scp:${feat_dt_dir} /feats.scp \
208
- --train-label ${feat_tr_dir} /data_${nbpe} .json \
209
- --valid-label ${feat_dt_dir} /data_${nbpe} .json \
239
+ --train-json ${feat_tr_dir} /data_${nbpe} .json \
240
+ --valid-json ${feat_dt_dir} /data_${nbpe} .json \
210
241
--etype ${etype} \
211
242
--elayers ${elayers} \
212
243
--eunits ${eunits} \
@@ -232,29 +263,27 @@ if [ ${stage} -le 5 ]; then
232
263
for rtask in ${recog_set} ; do
233
264
(
234
265
decode_dir=decode_${rtask} _beam${beam_size} _e${recog_model} _p${penalty} _len${minlenratio} -${maxlenratio} _ctcw${ctc_weight} _rnnlm${lm_weight}
266
+ feat_recog_dir=${dumpdir} /${rtask} /delta${do_delta}
235
267
236
268
# split data
237
269
data=data/${rtask}
238
270
split_data.sh --per-utt ${data} ${nj} ;
239
271
sdata=${data} /split${nj} utt;
240
272
241
- # feature extraction
242
- feats=" ark,s,cs:apply-cmvn --norm-vars=true data/${train_set} /cmvn.ark scp:${sdata} /JOB/feats.scp ark:- |"
243
- if ${do_delta} ; then
244
- feats=" $feats add-deltas ark:- ark:- |"
245
- fi
246
-
247
273
# make json labels for recognition
248
- data2json.sh --bpecode ${code} ${data} ${dict} > ${data} /data_${nbpe} .json
274
+ for j in ` seq 1 ${nj} ` ; do
275
+ data2json.sh --bpecode ${bpemodel} .model --feat ${feat_recog_dir} /feats.scp \
276
+ ${sdata} /${j} ${dict} > ${sdata} /${j} /data_${nbpe} .json
277
+ done
249
278
250
279
# ### use CPU for decoding
251
- gpu=-1
280
+ ngpu=0
252
281
253
282
${decode_cmd} JOB=1:${nj} ${expdir} /${decode_dir} /log/decode.JOB.log \
254
- ${decode_script} \
255
- --gpu ${gpu } \
256
- --recog-feat " $feats " \
257
- --recog-label ${data} /data_${nbpe} .json \
283
+ asr_recog.py \
284
+ --ngpu ${ngpu } \
285
+ --backend ${backend} \
286
+ --recog-json ${sdata} /JOB /data_${nbpe} .json \
258
287
--result-label ${expdir} /${decode_dir} /data.JOB.json \
259
288
--model ${expdir} /results/model.${recog_model} \
260
289
--model-conf ${expdir} /results/model.conf \
@@ -264,11 +293,12 @@ if [ ${stage} -le 5 ]; then
264
293
--minlenratio ${minlenratio} \
265
294
--ctc-weight ${ctc_weight} \
266
295
--rnnlm ${lmexpdir} /rnnlm.model.best \
267
- --lm-weight ${lm_weight} &
296
+ --lm-weight ${lm_weight} \
297
+ &
268
298
wait
269
299
270
- score_sclite.sh --bpe true --nlsyms ${nlsyms} ${expdir} /${decode_dir} ${dict}
271
-
300
+ score_sclite.sh --bpe ${nbpe} --bpemodel ${bpemodel} .model --wer true ${expdir} /${decode_dir} ${dict}
301
+
272
302
) &
273
303
done
274
304
wait
0 commit comments