Skip to content

Commit deea0f0

Browse files
committedJun 21, 2018
Merge remote-tracking branch 'upstream/master' into json
2 parents d5c4d5f + a99dd1a commit deea0f0

File tree

13 files changed

+151
-114
lines changed

13 files changed

+151
-114
lines changed
 

Diff for: ‎.gitignore

+3-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
*~
1+
B*~
22
*.pyc
33
\#*\#
44
.\#*
@@ -27,11 +27,10 @@ tools/nkf/
2727
tools/venv/
2828
tools/warp-ctc/
2929
tools/chainer_ctc/
30-
tools/subword-nmt/
31-
tools/chainer_ctc*
30+
tools/sentencepiece/
3231
tools/nkf*
33-
tools/subword-nmt*
3432
tools/chainer_ctc*
3533
tools/warp-ctc*
34+
tools/sentencepiece/*
3635

3736
.pytest_cache

Diff for: ‎egs/csj/asr1/run.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -137,12 +137,12 @@ if [ ${stage} -le 1 ]; then
137137
# dump features for training
138138
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d ${feat_tr_dir}/storage ]; then
139139
utils/create_split_dir.pl \
140-
/export/b{14,15,16,17}/${USER}/espnet-data/egs/voxforge/asr1/dump/${train_set}/delta${do_delta}/storage \
140+
/export/b{14,15,16,17}/${USER}/espnet-data/egs/csj/asr1/dump/${train_set}/delta${do_delta}/storage \
141141
${feat_tr_dir}/storage
142142
fi
143143
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d ${feat_dt_dir}/storage ]; then
144144
utils/create_split_dir.pl \
145-
/export/b{14,15,16,17}/${USER}/espnet-data/egs/voxforge/asr1/dump/${train_dev}/delta${do_delta}/storage \
145+
/export/b{14,15,16,17}/${USER}/espnet-data/egs/csj/asr1/dump/${train_dev}/delta${do_delta}/storage \
146146
${feat_dt_dir}/storage
147147
fi
148148
dump.sh --cmd "$train_cmd" --nj 32 --do_delta $do_delta \

Diff for: ‎egs/jsalt18e2e/asr1/run.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ do_delta=false # true when using CNN
2929
# network archtecture
3030
# encoder related
3131
etype=blstmp # encoder architecture type
32-
elayers=8
32+
elayers=4
3333
eunits=320
3434
eprojs=320
3535
subsample=1_2_2_1_1 # skip every n frame from input to nth layers
@@ -51,7 +51,7 @@ maxlen_out=150 # if output length > maxlen_out, batchsize is automatically reduc
5151

5252
# optimization related
5353
opt=adadelta
54-
epochs=15
54+
epochs=20
5555

5656
# decoding parameter
5757
beam_size=20

Diff for: ‎egs/wsj/asr1/local/run_bpe.sh renamed to ‎egs/librispeech/asr1/local/run_spm.sh

+115-85
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,10 @@
77
. ./cmd.sh
88

99
# general configuration
10-
backend=chainer
11-
stage=0 # start from 0 if you need to start from data preparation
12-
gpu=-1 # use 0 when using GPU on slurm/grid engine, otherwise -1
10+
backend=pytorch
11+
stage=-1 # start from -1 if you need to start from data download
12+
gpu= # will be deprecated, please use ngpu
13+
ngpu=0 # number of gpus ("0" uses cpu, otherwise use gpu)
1314
debugmode=1
1415
dumpdir=dump # directory to dump full features
1516
N=0 # number of minibatches to be used (mainly for debugging). "0" uses all minibatches.
@@ -21,8 +22,8 @@ do_delta=false # true when using CNN
2122

2223
# network archtecture
2324
# encoder related
24-
etype=vggblstmp # encoder architecture type
25-
elayers=6
25+
etype=blstmp # encoder architecture type
26+
elayers=8
2627
eunits=320
2728
eprojs=320
2829
subsample=1_2_2_1_1 # skip every n frame from input to nth layers
@@ -38,7 +39,7 @@ aconv_filts=100
3839
mtlalpha=0.5
3940

4041
# minibatch related
41-
batchsize=30
42+
batchsize=50
4243
maxlen_in=800 # if input length > maxlen_in, batchsize is automatically reduced
4344
maxlen_out=150 # if output length > maxlen_out, batchsize is automatically reduced
4445

@@ -47,7 +48,7 @@ opt=adadelta
4748
epochs=15
4849

4950
# rnnlm related
50-
lm_weight=1.0
51+
lm_weight=0.3
5152

5253
# decoding parameter
5354
beam_size=20
@@ -57,37 +58,62 @@ minlenratio=0.0
5758
ctc_weight=0.3
5859
recog_model=acc.best # set a model to be used for decoding: 'acc.best' or 'loss.best'
5960

60-
# data
61-
wsj0=/export/corpora5/LDC/LDC93S6B
62-
wsj1=/export/corpora5/LDC/LDC94S13B
61+
# Set this to somewhere where you want to put your data, or where
62+
# someone else has already put it. You'll want to change this
63+
# if you're not on the CLSP grid.
64+
datadir=/export/a15/vpanayotov/data
6365

64-
# bpe
65-
nbpe=500
66+
# base url for downloads.
67+
data_url=www.openslr.org/resources/12
68+
69+
# bpemode (unigram or bpe)
70+
nbpe=200
71+
bpemode=unigram
6672

6773
# exp tag
6874
tag="" # tag for managing experiments.
6975

7076
. utils/parse_options.sh || exit 1;
7177

72-
. ./path.sh
73-
. ./cmd.sh
78+
. ./path.sh
79+
. ./cmd.sh
80+
81+
# check gpu option usage
82+
if [ ! -z $gpu ]; then
83+
echo "WARNING: --gpu option will be deprecated."
84+
echo "WARNING: please use --ngpu option."
85+
if [ $gpu -eq -1 ]; then
86+
ngpu=0
87+
else
88+
ngpu=1
89+
fi
90+
fi
7491

7592
# Set bash to 'debug' mode, it will exit on :
7693
# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
7794
set -e
7895
set -u
7996
set -o pipefail
8097

81-
train_set=train_si284
82-
train_dev=test_dev93
83-
recog_set="test_dev93 test_eval92"
98+
train_set=train_960
99+
train_dev=dev
100+
recog_set="test_clean test_other dev_clean dev_other"
101+
102+
if [ ${stage} -le -1 ]; then
103+
echo "stage -1: Data Download"
104+
for part in dev-clean test-clean dev-other test-other train-clean-100 train-clean-360 train-other-500; do
105+
local/download_and_untar.sh ${datadir} ${data_url} ${part}
106+
done
107+
fi
84108

85109
if [ ${stage} -le 0 ]; then
86110
### Task dependent. You have to make data the following preparation part by yourself.
87111
### But you can utilize Kaldi recipes in most cases
88112
echo "stage 0: Data preparation"
89-
local/wsj_data_prep.sh ${wsj0}/??-{?,??}.? ${wsj1}/??-{?,??}.?
90-
local/wsj_format_data.sh
113+
for part in dev-clean test-clean dev-other test-other train-clean-100 train-clean-360 train-other-500; do
114+
# use underscore-separated names in data directories.
115+
local/data_prep.sh ${datadir}/LibriSpeech/${part} data/$(echo ${part} | sed s/-/_/g)
116+
done
91117
fi
92118

93119
feat_tr_dir=${dumpdir}/${train_set}/delta${do_delta}; mkdir -p ${feat_tr_dir}
@@ -98,81 +124,97 @@ if [ ${stage} -le 1 ]; then
98124
echo "stage 1: Feature Generation"
99125
fbankdir=fbank
100126
# Generate the fbank features; by default 80-dimensional fbanks with pitch on each frame
101-
for x in train_si284 test_dev93 test_eval92; do
102-
steps/make_fbank_pitch.sh --cmd "$train_cmd" --nj 10 data/${x} exp/make_fbank/${x} ${fbankdir}
127+
for x in dev_clean test_clean dev_other test_other train_clean_100 train_clean_360 train_other_500; do
128+
steps/make_fbank_pitch.sh --cmd "$train_cmd" --nj 32 data/${x} exp/make_fbank/${x} ${fbankdir}
103129
done
104130

131+
utils/combine_data.sh data/${train_set}_org data/train_clean_100 data/train_clean_360 data/train_other_500
132+
utils/combine_data.sh data/${train_dev}_org data/dev_clean data/dev_other
133+
134+
# remove utt having more than 3000 frames
135+
# remove utt having more than 400 characters
136+
remove_longshortdata.sh --maxframes 3000 --maxchars 400 data/${train_set}_org data/${train_set}
137+
remove_longshortdata.sh --maxframes 3000 --maxchars 400 data/${train_dev}_org data/${train_dev}
138+
105139
# compute global CMVN
106140
compute-cmvn-stats scp:data/${train_set}/feats.scp data/${train_set}/cmvn.ark
107141

108142
# dump features for training
109143
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d ${feat_tr_dir}/storage ]; then
110144
utils/create_split_dir.pl \
111-
/export/b{10,11,12,13}/${USER}/espnet-data/egs/voxforge/asr1/dump/${train_set}/delta${do_delta}/storage \
145+
/export/b{14,15,16,17}/${USER}/espnet-data/egs/voxforge/asr1/dump/${train_set}/delta${do_delta}/storage \
112146
${feat_tr_dir}/storage
113147
fi
114148
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d ${feat_dt_dir}/storage ]; then
115149
utils/create_split_dir.pl \
116-
/export/b{10,11,12,13}/${USER}/espnet-data/egs/voxforge/asr1/dump/${train_dev}/delta${do_delta}/storage \
150+
/export/b{14,15,16,17}/${USER}/espnet-data/egs/voxforge/asr1/dump/${train_dev}/delta${do_delta}/storage \
117151
${feat_dt_dir}/storage
118152
fi
119-
dump.sh --cmd "$train_cmd" --nj 32 --do_delta $do_delta \
153+
dump.sh --cmd "$train_cmd" --nj 80 --do_delta $do_delta \
120154
data/${train_set}/feats.scp data/${train_set}/cmvn.ark exp/dump_feats/train ${feat_tr_dir}
121-
dump.sh --cmd "$train_cmd" --nj 4 --do_delta $do_delta \
155+
dump.sh --cmd "$train_cmd" --nj 32 --do_delta $do_delta \
122156
data/${train_dev}/feats.scp data/${train_set}/cmvn.ark exp/dump_feats/dev ${feat_dt_dir}
157+
for rtask in ${recog_set}; do
158+
feat_recog_dir=${dumpdir}/${rtask}/delta${do_delta}; mkdir -p ${feat_recog_dir}
159+
dump.sh --cmd "$train_cmd" --nj 32 --do_delta $do_delta \
160+
data/${rtask}/feats.scp data/${train_set}/cmvn.ark exp/dump_feats/recog/${rtask} \
161+
${feat_recog_dir}
162+
done
123163
fi
124164

125-
dict=data/bpe/${train_set}_units_${nbpe}.txt
126-
code=data/bpe/code_${nbpe}.txt
127-
nlsyms=data/bpe/non_lang_syms.txt
165+
dict=data/lang_char/${train_set}_units.txt
166+
bpemodel=data/lang_char/${train_set}${nbpe}
128167
echo "dictionary: ${dict}"
129168
if [ ${stage} -le 2 ]; then
130169
### Task dependent. You have to check non-linguistic symbols used in the corpus.
131170
echo "stage 2: Dictionary and Json Data Preparation"
132-
mkdir -p data/bpe
171+
mkdir -p data/lang_char/
133172
echo "<unk> 1" > ${dict} # <unk> must be 1, 0 will be used for "blank" in CTC
134-
cut -f 2- -d" " data/${train_set}/text | learn_bpe.py -s ${nbpe} > ${code}
135-
cut -f 2- -d" " data/${train_set}/text | apply_bpe.py -c ${code} \
136-
| tr ' ' '\n' | sort | uniq | awk '{print $0 " " NR+1}' >> ${dict}
173+
cut -f 2- -d" " data/${train_set}/text > data/lang_char/input.txt
174+
spm_train --input=data/lang_char/input.txt --vocab_size=${nbpe} --model_type=${bpemode} --model_prefix=${bpemodel} --input_sentence_size=100000000
175+
spm_encode --model=${bpemodel}.model --output_format=piece < data/lang_char/input.txt | tr ' ' '\n' | sort | uniq | awk '{print $0 " " NR+1}' >> ${dict}
137176
wc -l ${dict}
138177

139-
echo "make a non-linguistic symbol list"
140-
cut -f 2- data/${train_set}/text | tr " " "\n" | sort | uniq | grep "<" > ${nlsyms}
141-
cat ${nlsyms}
142-
143-
echo "make json files"
144-
data2json.sh --feat ${feat_tr_dir}/feats.scp --bpecode ${code} \
145-
data/${train_set} ${dict} > ${feat_tr_dir}/data_${nbpe}.json
146-
data2json.sh --feat ${feat_dt_dir}/feats.scp --bpecode ${code} \
178+
# make json labels
179+
data2json.sh --feat ${feat_tr_dir}/feats.scp --bpecode ${bpemodel}.model \
180+
data/${train_set} ${dict} > ${feat_tr_dir}/data_${nbpe}.json
181+
data2json.sh --feat ${feat_dt_dir}/feats.scp --bpecode ${bpemodel}.model \
147182
data/${train_dev} ${dict} > ${feat_dt_dir}/data_${nbpe}.json
148183
fi
149184

150-
# you can skip this and remove --rnnlm option in the recognition (stage 5)
151-
lmexpdir=exp/train_rnnlm_2layer_bs2048_bpe${nbpe}
185+
# You can skip this and remove --rnnlm option in the recognition (stage 5)
186+
lmexpdir=exp/train_rnnlm_2layer_bs256
152187
mkdir -p ${lmexpdir}
153188
if [ ${stage} -le 3 ]; then
154189
echo "stage 3: LM Preparation"
155190
lmdatadir=data/local/lm_train
156191
mkdir -p ${lmdatadir}
157-
cut -f 2- -d" " data/${train_set}/text | apply_bpe.py -c ${code} | perl -pe 's/\n/ <eos> /g' \
158-
> ${lmdatadir}/train_trans_${nbpe}.txt
159-
zcat ${wsj1}/13-32.1/wsj1/doc/lng_modl/lm_train/np_data/{87,88,89}/*.z | grep -v "<" | tr [a-z] [A-Z] \
160-
| apply_bpe.py -c ${code} | perl -pe 's/\n/ <eos> /g' >> ${lmdatadir}/train_others_${nbpe}.txt
161-
cat ${lmdatadir}/train_trans_${nbpe}.txt ${lmdatadir}/train_others_${nbpe}.txt | tr '\n' ' ' > ${lmdatadir}/train_${nbpe}.txt
162-
cut -f 2- -d" " data/${train_dev}/text | apply_bpe.py -c ${code} | perl -pe 's/\n/ <eos> /g' \
163-
> ${lmdatadir}/valid_${nbpe}.txt
192+
spm_encode --model=${bpemodel}.model --output_format=piece < data/lang_char/input.txt | perl -pe 's/\n/ <eos> /g' \
193+
> ${lmdatadir}/train.txt
194+
cut -f 2- -d" " data/${train_set}/text | spm_encode --model=${bpemodel}.model --output_format=piece | perl -pe 's/\n/ <eos> /g' \
195+
> ${lmdatadir}/valid.txt
196+
# use only 1 gpu
197+
if [ ${ngpu} -gt 1 ]; then
198+
echo "LM training does not support multi-gpu. signle gpu will be used."
199+
lmngpu=1
200+
else
201+
lmngpu=${ngpu}
202+
fi
164203
${cuda_cmd} ${lmexpdir}/train.log \
165204
lm_train.py \
166-
--gpu ${gpu} \
205+
--ngpu ${lmngpu} \
206+
--backend ${backend} \
167207
--verbose 1 \
168208
--outdir ${lmexpdir} \
169-
--train-label ${lmdatadir}/train_${nbpe}.txt \
170-
--valid-label ${lmdatadir}/valid_${nbpe}.txt \
209+
--train-label ${lmdatadir}/train.txt \
210+
--valid-label ${lmdatadir}/valid.txt \
211+
--epoch 60 \
212+
--batchsize 256 \
171213
--dict ${dict}
172214
fi
173215

174216
if [ -z ${tag} ]; then
175-
expdir=exp/${train_set}_${etype}_e${elayers}_subsample${subsample}_unit${eunits}_proj${eprojs}_d${dlayers}_unit${dunits}_${atype}_aconvc${aconv_chans}_aconvf${aconv_filts}_mtlalpha${mtlalpha}_${opt}_bs${batchsize}_mli${maxlen_in}_mlo${maxlen_out}_bpe${nbpe}
217+
expdir=exp/${train_set}_${etype}_e${elayers}_subsample${subsample}_unit${eunits}_proj${eprojs}_d${dlayers}_unit${dunits}_${atype}_aconvc${aconv_chans}_aconvf${aconv_filts}_mtlalpha${mtlalpha}_${opt}_bs${batchsize}_mli${maxlen_in}_mlo${maxlen_out}_bpe${nbpe}_bpemode${bpemode}
176218
if ${do_delta}; then
177219
expdir=${expdir}_delta
178220
fi
@@ -181,32 +223,21 @@ else
181223
fi
182224
mkdir -p ${expdir}
183225

184-
# switch backend
185-
if [[ ${backend} == chainer ]]; then
186-
train_script=asr_train.py
187-
decode_script=asr_recog.py
188-
else
189-
train_script=asr_train_th.py
190-
decode_script=asr_recog_th.py
191-
fi
192-
193226
if [ ${stage} -le 4 ]; then
194227
echo "stage 4: Network Training"
195-
196-
${cuda_cmd} ${expdir}/train.log \
197-
${train_script} \
198-
--gpu ${gpu} \
228+
${cuda_cmd} --gpu ${ngpu} ${expdir}/train.log \
229+
asr_train.py \
230+
--ngpu ${ngpu} \
231+
--backend ${backend} \
199232
--outdir ${expdir}/results \
200233
--debugmode ${debugmode} \
201234
--dict ${dict} \
202235
--debugdir ${expdir} \
203236
--minibatches ${N} \
204237
--verbose ${verbose} \
205238
--resume ${resume} \
206-
--train-feat scp:${feat_tr_dir}/feats.scp \
207-
--valid-feat scp:${feat_dt_dir}/feats.scp \
208-
--train-label ${feat_tr_dir}/data_${nbpe}.json \
209-
--valid-label ${feat_dt_dir}/data_${nbpe}.json \
239+
--train-json ${feat_tr_dir}/data_${nbpe}.json \
240+
--valid-json ${feat_dt_dir}/data_${nbpe}.json \
210241
--etype ${etype} \
211242
--elayers ${elayers} \
212243
--eunits ${eunits} \
@@ -232,29 +263,27 @@ if [ ${stage} -le 5 ]; then
232263
for rtask in ${recog_set}; do
233264
(
234265
decode_dir=decode_${rtask}_beam${beam_size}_e${recog_model}_p${penalty}_len${minlenratio}-${maxlenratio}_ctcw${ctc_weight}_rnnlm${lm_weight}
266+
feat_recog_dir=${dumpdir}/${rtask}/delta${do_delta}
235267

236268
# split data
237269
data=data/${rtask}
238270
split_data.sh --per-utt ${data} ${nj};
239271
sdata=${data}/split${nj}utt;
240272

241-
# feature extraction
242-
feats="ark,s,cs:apply-cmvn --norm-vars=true data/${train_set}/cmvn.ark scp:${sdata}/JOB/feats.scp ark:- |"
243-
if ${do_delta}; then
244-
feats="$feats add-deltas ark:- ark:- |"
245-
fi
246-
247273
# make json labels for recognition
248-
data2json.sh --bpecode ${code} ${data} ${dict} > ${data}/data_${nbpe}.json
274+
for j in `seq 1 ${nj}`; do
275+
data2json.sh --bpecode ${bpemodel}.model --feat ${feat_recog_dir}/feats.scp \
276+
${sdata}/${j} ${dict} > ${sdata}/${j}/data_${nbpe}.json
277+
done
249278

250279
#### use CPU for decoding
251-
gpu=-1
280+
ngpu=0
252281

253282
${decode_cmd} JOB=1:${nj} ${expdir}/${decode_dir}/log/decode.JOB.log \
254-
${decode_script} \
255-
--gpu ${gpu} \
256-
--recog-feat "$feats" \
257-
--recog-label ${data}/data_${nbpe}.json \
283+
asr_recog.py \
284+
--ngpu ${ngpu} \
285+
--backend ${backend} \
286+
--recog-json ${sdata}/JOB/data_${nbpe}.json \
258287
--result-label ${expdir}/${decode_dir}/data.JOB.json \
259288
--model ${expdir}/results/model.${recog_model} \
260289
--model-conf ${expdir}/results/model.conf \
@@ -264,11 +293,12 @@ if [ ${stage} -le 5 ]; then
264293
--minlenratio ${minlenratio} \
265294
--ctc-weight ${ctc_weight} \
266295
--rnnlm ${lmexpdir}/rnnlm.model.best \
267-
--lm-weight ${lm_weight} &
296+
--lm-weight ${lm_weight} \
297+
&
268298
wait
269299

270-
score_sclite.sh --bpe true --nlsyms ${nlsyms} ${expdir}/${decode_dir} ${dict}
271-
300+
score_sclite.sh --bpe ${nbpe} --bpemodel ${bpemodel}.model --wer true ${expdir}/${decode_dir} ${dict}
301+
272302
) &
273303
done
274304
wait

Diff for: ‎egs/librispeech/asr1/path.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sctk/bin
88
. $KALDI_ROOT/tools/config/common_path.sh
99
export LC_ALL=C
1010

11-
export PATH=$SPNET_ROOT/utils/:$SPNET_ROOT/bin/:$PATH
11+
export PATH=$MAIN_ROOT/tools/sentencepiece/src:$SPNET_ROOT/utils/:$SPNET_ROOT/bin/:$PATH
1212
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$MAIN_ROOT/tools/chainer_ctc/ext/warp-ctc/build
1313
source $MAIN_ROOT/tools/venv/bin/activate
1414
export PYTHONPATH=$SPNET_ROOT/lm/:$SPNET_ROOT/asr/:$SPNET_ROOT/nets/:$SPNET_ROOT/utils/:$SPNET_ROOT/bin/:$PYTHONPATH

Diff for: ‎egs/librispeech/asr1/run.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -138,12 +138,12 @@ if [ ${stage} -le 1 ]; then
138138
# dump features for training
139139
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d ${feat_tr_dir}/storage ]; then
140140
utils/create_split_dir.pl \
141-
/export/b{14,15,16,17}/${USER}/espnet-data/egs/voxforge/asr1/dump/${train_set}/delta${do_delta}/storage \
141+
/export/b{14,15,16,17}/${USER}/espnet-data/egs/librispeech/asr1/dump/${train_set}/delta${do_delta}/storage \
142142
${feat_tr_dir}/storage
143143
fi
144144
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d ${feat_dt_dir}/storage ]; then
145145
utils/create_split_dir.pl \
146-
/export/b{14,15,16,17}/${USER}/espnet-data/egs/voxforge/asr1/dump/${train_dev}/delta${do_delta}/storage \
146+
/export/b{14,15,16,17}/${USER}/espnet-data/egs/librispeech/asr1/dump/${train_dev}/delta${do_delta}/storage \
147147
${feat_dt_dir}/storage
148148
fi
149149
dump.sh --cmd "$train_cmd" --nj 80 --do_delta $do_delta \

Diff for: ‎egs/wsj/asr1/run.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -125,12 +125,12 @@ if [ ${stage} -le 1 ]; then
125125
# dump features for training
126126
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d ${feat_tr_dir}/storage ]; then
127127
utils/create_split_dir.pl \
128-
/export/b{10,11,12,13}/${USER}/espnet-data/egs/voxforge/asr1/dump/${train_set}/delta${do_delta}/storage \
128+
/export/b{10,11,12,13}/${USER}/espnet-data/egs/wsj/asr1/dump/${train_set}/delta${do_delta}/storage \
129129
${feat_tr_dir}/storage
130130
fi
131131
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d ${feat_dt_dir}/storage ]; then
132132
utils/create_split_dir.pl \
133-
/export/b{10,11,12,13}/${USER}/espnet-data/egs/voxforge/asr1/dump/${train_dev}/delta${do_delta}/storage \
133+
/export/b{10,11,12,13}/${USER}/espnet-data/egs/wsj/asr1/dump/${train_dev}/delta${do_delta}/storage \
134134
${feat_dt_dir}/storage
135135
fi
136136
dump.sh --cmd "$train_cmd" --nj 32 --do_delta $do_delta \

Diff for: ‎src/asr/asr_chainer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,7 @@ def train(args):
413413

414414
# Save attention weight each epoch
415415
if args.num_save_attention > 0 and args.mtlalpha != 1.0:
416-
data = sorted(valid_json.items()[:args.num_save_attention],
416+
data = sorted(list(valid_json.items())[:args.num_save_attention],
417417
key=lambda x: int(x[1]['input'][0]['shape'][1]), reverse=True)
418418
data = converter_kaldi([data], device=gpu_id)
419419
trainer.extend(PlotAttentionReport(model, data, args.outdir + "/att_ws"), trigger=(1, 'epoch'))

Diff for: ‎src/asr/asr_pytorch.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ def evaluate(self):
6969

7070
for batch in it:
7171
observation = {}
72+
if torch.__version__ != "0.3.1":
73+
torch.set_grad_enabled(False)
7274
with reporter_module.report_scope(observation):
7375
# read scp files
7476
# x: original json with loaded features
@@ -78,6 +80,9 @@ def evaluate(self):
7880
self.model(x)
7981
delete_feat(x)
8082

83+
if torch.__version__ != "0.3.1":
84+
torch.set_grad_enabled(True)
85+
8186
summary.add(observation)
8287

8388
self.model.train()
@@ -293,7 +298,7 @@ def train(args):
293298

294299
# Save attention weight each epoch
295300
if args.num_save_attention > 0 and args.mtlalpha != 1.0:
296-
data = sorted(valid_json.items()[:args.num_save_attention],
301+
data = sorted(list(valid_json.items())[:args.num_save_attention],
297302
key=lambda x: int(x[1]['input'][0]['shape'][1]), reverse=True)
298303
data = converter_kaldi([data], device=gpu_id)
299304
trainer.extend(PlotAttentionReport(model, data, args.outdir + "/att_ws"), trigger=(1, 'epoch'))

Diff for: ‎src/nets/e2e_asr_attctc_th.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -742,7 +742,7 @@ def forward(self, enc_hs_pad, enc_hs_len, dec_z, att_prev, scaling=2.0):
742742
# initialize attention weight with uniform dist.
743743
if att_prev is None:
744744
att_prev = [Variable(enc_hs_pad.data.new(
745-
l).zero_() + (1.0 / l)) for l in enc_hs_len]
745+
int(l)).zero_() + (1.0 / int(l))) for l in enc_hs_len]
746746
# if no bias, 0 0-pad goes 0
747747
att_prev = pad_list(att_prev, 0)
748748

Diff for: ‎src/utils/data2json.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ fi
3737

3838
# output
3939
if [ ! -z ${bpecode} ]; then
40-
paste -d " " <(awk '{print $1}' ${dir}/text) <(cut -f 2- -d" " ${dir}/text | apply_bpe.py -c ${bpecode}) > ${tmpdir}/token.scp
40+
paste -d " " <(awk '{print $1}' ${dir}/text) <(cut -f 2- -d" " ${dir}/text | spm_encode --model=${bpecode} --output_format=piece) > ${tmpdir}/token.scp
4141
elif [ ! -z ${nlsyms} ]; then
4242
text2token.py -s 1 -n 1 -l ${nlsyms} ${dir}/text > ${tmpdir}/token.scp
4343
else

Diff for: ‎src/utils/score_sclite.sh

+10-8
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77

88
nlsyms=""
99
wer=false
10-
bpe=false
10+
bpe=""
11+
bpemodel=""
1112
remove_blank=true
1213
filter=""
1314

@@ -24,10 +25,6 @@ dic=$2
2425
concatjson.py ${dir}/data.*.json > ${dir}/data.json
2526
json2trn.py ${dir}/data.json ${dic} ${dir}/ref.trn ${dir}/hyp.trn
2627

27-
if $bpe; then
28-
sed -i.bak1 -r 's/(@@ )|(@@ ?$)//g' ${dir}/ref.trn
29-
sed -i.bak1 -r 's/(@@ )|(@@ ?$)//g' ${dir}/hyp.trn
30-
fi
3128
if $remove_blank; then
3229
sed -i.bak2 -r 's/<blank> //g' ${dir}/hyp.trn
3330
fi
@@ -48,10 +45,15 @@ echo "write a CER (or TER) result in ${dir}/result.txt"
4845
grep -e Avg -e SPKR -m 2 ${dir}/result.txt
4946

5047
if ${wer}; then
51-
sed -e "s/ //g" -e "s/(/ (/" -e "s/<space>/ /g" ${dir}/ref.trn > ${dir}/ref.wrd.trn
52-
sed -e "s/ //g" -e "s/(/ (/" -e "s/<space>/ /g" ${dir}/hyp.trn > ${dir}/hyp.wrd.trn
48+
if [ ! -z $bpe ]; then
49+
spm_decode --model=${bpemodel} --input_format=piece < ${dir}/ref.trn | sed -e "s/▁/ /g" > ${dir}/ref.wrd.trn
50+
spm_decode --model=${bpemodel} --input_format=piece < ${dir}/hyp.trn | sed -e "s/▁/ /g" > ${dir}/hyp.wrd.trn
51+
else
52+
sed -e "s/ //g" -e "s/(/ (/" -e "s/<space>/ /g" ${dir}/ref.trn > ${dir}/ref.wrd.trn
53+
sed -e "s/ //g" -e "s/(/ (/" -e "s/<space>/ /g" ${dir}/hyp.trn > ${dir}/hyp.wrd.trn
54+
fi
5355
sclite -r ${dir}/ref.wrd.trn trn -h ${dir}/hyp.wrd.trn trn -i rm -o all stdout > ${dir}/result.wrd.txt
54-
56+
5557
echo "write a WER result in ${dir}/result.wrd.txt"
5658
grep -e Avg -e SPKR -m 2 ${dir}/result.wrd.txt
5759
fi

Diff for: ‎tools/Makefile

+5-4
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ KALDI =
22

33
.PHONY: all clean
44

5-
all: venv/bin/activate kaldi nkf kaldi-io-for-python venv/lib/python2.7/site-packages/torch warp-ctc chainer_ctc subword-nmt
5+
all: venv/bin/activate kaldi nkf kaldi-io-for-python venv/lib/python2.7/site-packages/torch warp-ctc chainer_ctc sentencepiece
66

77
kaldi-io-for-python:
88
git clone https://github.com/vesis84/kaldi-io-for-python.git
@@ -48,10 +48,11 @@ chainer_ctc: venv/bin/activate
4848
. venv/bin/activate; cd chainer_ctc && chmod +x install_warp-ctc.sh && ./install_warp-ctc.sh ; true
4949
. venv/bin/activate; cd chainer_ctc && pip install .
5050

51-
subword-nmt:
52-
git clone https://github.com/rsennrich/subword-nmt.git
51+
sentencepiece:
52+
git clone https://github.com/google/sentencepiece.git
53+
cd sentencepiece && ./autogen.sh && ./configure && $(MAKE)
5354

5455
clean:
55-
rm -fr kaldi_github kaldi kaldi_python venv nkf kaldi-io-for-python ../src/utils/kaldi_io_py.py warp-ctc chainer_ctc subword-nmt
56+
rm -fr kaldi_github kaldi kaldi_python venv nkf kaldi-io-for-python ../src/utils/kaldi_io_py.py warp-ctc chainer_ctc sentencepiece
5657
rm -f miniconda.sh
5758
find . -iname "*.pyc" -delete

0 commit comments

Comments
 (0)
Please sign in to comment.