asr_train.py
About 1 min
asr_train.py
Train an automatic speech recognition (ASR) model on one CPU, one or multiple GPUs
usage: asr_train.py [-h] [--config CONFIG] [--config2 CONFIG2]
[--config3 CONFIG3] [--ngpu NGPU] [--use-ddp]
[--train-dtype {float16,float32,float64,O0,O1,O2,O3}]
[--backend {chainer,pytorch}] --outdir OUTDIR
[--debugmode DEBUGMODE] --dict DICT [--seed SEED]
[--debugdir DEBUGDIR] [--resume [RESUME]]
[--minibatches MINIBATCHES] [--verbose VERBOSE]
[--tensorboard-dir [TENSORBOARD_DIR]]
[--report-interval-iters REPORT_INTERVAL_ITERS]
[--save-interval-iters SAVE_INTERVAL_ITERS]
[--train-json TRAIN_JSON] [--valid-json VALID_JSON]
[--model-module MODEL_MODULE] [--num-encs NUM_ENCS]
[--ctc_type {builtin,gtnctc,cudnnctc}]
[--mtlalpha MTLALPHA] [--lsm-weight LSM_WEIGHT]
[--report-cer] [--report-wer] [--nbest NBEST]
[--beam-size BEAM_SIZE] [--penalty PENALTY]
[--maxlenratio MAXLENRATIO] [--minlenratio MINLENRATIO]
[--ctc-weight CTC_WEIGHT] [--rnnlm RNNLM]
[--rnnlm-conf RNNLM_CONF] [--lm-weight LM_WEIGHT]
[--sym-space SYM_SPACE] [--sym-blank SYM_BLANK]
[--sortagrad [SORTAGRAD]]
[--batch-count {auto,seq,bin,frame}]
[--batch-size BATCH_SIZE] [--batch-bins BATCH_BINS]
[--batch-frames-in BATCH_FRAMES_IN]
[--batch-frames-out BATCH_FRAMES_OUT]
[--batch-frames-inout BATCH_FRAMES_INOUT] [--maxlen-in ML]
[--maxlen-out ML] [--n-iter-processes N_ITER_PROCESSES]
[--preprocess-conf [PREPROCESS_CONF]]
[--opt {adadelta,adam,noam}] [--accum-grad ACCUM_GRAD]
[--eps EPS] [--eps-decay EPS_DECAY]
[--weight-decay WEIGHT_DECAY]
[--criterion {loss,loss_eps_decay_only,acc}]
[--threshold THRESHOLD] [--epochs EPOCHS]
[--early-stop-criterion [EARLY_STOP_CRITERION]]
[--patience [PATIENCE]] [--grad-clip GRAD_CLIP]
[--num-save-attention NUM_SAVE_ATTENTION]
[--num-save-ctc NUM_SAVE_CTC] [--grad-noise GRAD_NOISE]
[--num-spkrs {1,2}]
[--context-residual [CONTEXT_RESIDUAL]]
[--enc-init ENC_INIT] [--enc-init-mods ENC_INIT_MODS]
[--dec-init DEC_INIT] [--dec-init-mods DEC_INIT_MODS]
[--freeze-mods FREEZE_MODS] [--use-frontend USE_FRONTEND]
[--use-wpe USE_WPE]
[--wtype {lstm,blstm,lstmp,blstmp,vgglstmp,vggblstmp,vgglstm,vggblstm,gru,bgru,grup,bgrup,vgggrup,vggbgrup,vgggru,vggbgru}]
[--wlayers WLAYERS] [--wunits WUNITS] [--wprojs WPROJS]
[--wdropout-rate WDROPOUT_RATE] [--wpe-taps WPE_TAPS]
[--wpe-delay WPE_DELAY]
[--use-dnn-mask-for-wpe USE_DNN_MASK_FOR_WPE]
[--use-beamformer USE_BEAMFORMER]
[--btype {lstm,blstm,lstmp,blstmp,vgglstmp,vggblstmp,vgglstm,vggblstm,gru,bgru,grup,bgrup,vgggrup,vggbgrup,vgggru,vggbgru}]
[--blayers BLAYERS] [--bunits BUNITS] [--bprojs BPROJS]
[--badim BADIM] [--bnmask BNMASK]
[--ref-channel REF_CHANNEL]
[--bdropout-rate BDROPOUT_RATE] [--stats-file STATS_FILE]
[--apply-uttmvn APPLY_UTTMVN]
[--uttmvn-norm-means UTTMVN_NORM_MEANS]
[--uttmvn-norm-vars UTTMVN_NORM_VARS]
[--fbank-fs FBANK_FS] [--n-mels N_MELS]
[--fbank-fmin FBANK_FMIN] [--fbank-fmax FBANK_FMAX]