s2t_train_ctc.py

About 2 min
s2t_train_ctc.py

base parser
usage: s2t_train_ctc.py [-h] [--config CONFIG] [--print_config]
                        [--log_level {ERROR,WARNING,INFO,DEBUG,NOTSET}]
                        [--drop_last_iter DROP_LAST_ITER] [--dry_run DRY_RUN]
                        [--iterator_type {sequence,category,chunk,task,none}]
                        [--valid_iterator_type {sequence,category,chunk,task,none}]
                        [--output_dir OUTPUT_DIR] [--ngpu NGPU] [--seed SEED]
                        [--num_workers NUM_WORKERS]
                        [--num_att_plot NUM_ATT_PLOT]
                        [--dist_backend DIST_BACKEND]
                        [--dist_init_method DIST_INIT_METHOD]
                        [--dist_world_size DIST_WORLD_SIZE]
                        [--dist_rank DIST_RANK] [--local_rank LOCAL_RANK]
                        [--dist_master_addr DIST_MASTER_ADDR]
                        [--dist_master_port DIST_MASTER_PORT]
                        [--dist_launcher {slurm,mpi,None}]
                        [--multiprocessing_distributed MULTIPROCESSING_DISTRIBUTED]
                        [--unused_parameters UNUSED_PARAMETERS]
                        [--sharded_ddp SHARDED_DDP]
                        [--use_deepspeed USE_DEEPSPEED]
                        [--deepspeed_config DEEPSPEED_CONFIG]
                        [--gradient_as_bucket_view GRADIENT_AS_BUCKET_VIEW]
                        [--ddp_comm_hook {none,fp16_compress_hook,bf16_compress_hook}]
                        [--cudnn_enabled CUDNN_ENABLED]
                        [--cudnn_benchmark CUDNN_BENCHMARK]
                        [--cudnn_deterministic CUDNN_DETERMINISTIC]
                        [--use_tf32 USE_TF32] [--collect_stats COLLECT_STATS]
                        [--write_collected_feats WRITE_COLLECTED_FEATS]
                        [--max_epoch MAX_EPOCH] [--patience PATIENCE]
                        [--val_scheduler_criterion VAL_SCHEDULER_CRITERION VAL_SCHEDULER_CRITERION]
                        [--early_stopping_criterion EARLY_STOPPING_CRITERION EARLY_STOPPING_CRITERION EARLY_STOPPING_CRITERION]
                        [--best_model_criterion BEST_MODEL_CRITERION [BEST_MODEL_CRITERION ...]]
                        [--keep_nbest_models KEEP_NBEST_MODELS [KEEP_NBEST_MODELS ...]]
                        [--nbest_averaging_interval NBEST_AVERAGING_INTERVAL]
                        [--grad_clip GRAD_CLIP]
                        [--grad_clip_type GRAD_CLIP_TYPE]
                        [--grad_noise GRAD_NOISE] [--accum_grad ACCUM_GRAD]
                        [--no_forward_run NO_FORWARD_RUN] [--resume RESUME]
                        [--train_dtype {float16,float32,float64}]
                        [--use_amp USE_AMP] [--log_interval LOG_INTERVAL]
                        [--use_matplotlib USE_MATPLOTLIB]
                        [--use_tensorboard USE_TENSORBOARD]
                        [--create_graph_in_tensorboard CREATE_GRAPH_IN_TENSORBOARD]
                        [--use_wandb USE_WANDB]
                        [--wandb_project WANDB_PROJECT] [--wandb_id WANDB_ID]
                        [--wandb_entity WANDB_ENTITY]
                        [--wandb_name WANDB_NAME]
                        [--wandb_model_log_interval WANDB_MODEL_LOG_INTERVAL]
                        [--wandb_allow_val_change WANDB_ALLOW_VAL_CHANGE]
                        [--detect_anomaly DETECT_ANOMALY]
                        [--use_adapter USE_ADAPTER] [--adapter {lora,houlsby}]
                        [--save_strategy {all,adapter_only,required_grad_only}]
                        [--adapter_conf ADAPTER_CONF]
                        [--pretrain_path PRETRAIN_PATH]
                        [--init_param [INIT_PARAM ...]]
                        [--ignore_init_mismatch IGNORE_INIT_MISMATCH]
                        [--freeze_param [FREEZE_PARAM ...]]
                        [--num_iters_per_epoch NUM_ITERS_PER_EPOCH]
                        [--batch_size BATCH_SIZE]
                        [--valid_batch_size VALID_BATCH_SIZE]
                        [--batch_bins BATCH_BINS]
                        [--valid_batch_bins VALID_BATCH_BINS]
                        [--category_sample_size CATEGORY_SAMPLE_SIZE]
                        [--upsampling_factor UPSAMPLING_FACTOR]
                        [--category_upsampling_factor CATEGORY_UPSAMPLING_FACTOR]
                        [--dataset_upsampling_factor DATASET_UPSAMPLING_FACTOR]
                        [--dataset_scaling_factor DATASET_SCALING_FACTOR]
                        [--max_batch_size MAX_BATCH_SIZE]
                        [--min_batch_size MIN_BATCH_SIZE]
                        [--train_shape_file TRAIN_SHAPE_FILE]
                        [--valid_shape_file VALID_SHAPE_FILE]
                        [--batch_type {unsorted,sorted,folded,length,numel,catbel,catpow,catpow_balance_dataset}]
                        [--valid_batch_type {unsorted,sorted,folded,length,numel,catbel,catpow,catpow_balance_dataset,None}]
                        [--fold_length FOLD_LENGTH]
                        [--sort_in_batch {descending,ascending}]
                        [--shuffle_within_batch SHUFFLE_WITHIN_BATCH]
                        [--sort_batch {descending,ascending}]
                        [--multiple_iterator MULTIPLE_ITERATOR]
                        [--chunk_length CHUNK_LENGTH]
                        [--chunk_shift_ratio CHUNK_SHIFT_RATIO]
                        [--num_cache_chunks NUM_CACHE_CHUNKS]
                        [--chunk_excluded_key_prefixes CHUNK_EXCLUDED_KEY_PREFIXES [CHUNK_EXCLUDED_KEY_PREFIXES ...]]
                        [--chunk_default_fs CHUNK_DEFAULT_FS]
                        [--chunk_max_abs_length CHUNK_MAX_ABS_LENGTH]
                        [--chunk_discard_short_samples CHUNK_DISCARD_SHORT_SAMPLES]
                        [--train_data_path_and_name_and_type TRAIN_DATA_PATH_AND_NAME_AND_TYPE]
                        [--valid_data_path_and_name_and_type VALID_DATA_PATH_AND_NAME_AND_TYPE]
                        [--multi_task_dataset MULTI_TASK_DATASET]
                        [--allow_variable_data_keys ALLOW_VARIABLE_DATA_KEYS]
                        [--max_cache_size MAX_CACHE_SIZE]
                        [--max_cache_fd MAX_CACHE_FD]
                        [--allow_multi_rates ALLOW_MULTI_RATES]
                        [--valid_max_cache_size VALID_MAX_CACHE_SIZE]
                        [--exclude_weight_decay EXCLUDE_WEIGHT_DECAY]
                        [--exclude_weight_decay_conf EXCLUDE_WEIGHT_DECAY_CONF]
                        [--optim {adam,adamw,sgd,adadelta,adagrad,adamax,asgd,lbfgs,rmsprop,rprop,radam,accagd,adabound,adamod,diffgrad,lamb,novograd,pid,qhm,sgdw,yogi}]
                        [--optim_conf OPTIM_CONF]
                        [--scheduler {reducelronplateau,lambdalr,steplr,multisteplr,exponentiallr,cosineannealinglr,noamlr,warmuplr,piecewiselinearwarmuplr,warmupsteplr,warmupreducelronplateau,cycliclr,onecyclelr,cosineannealingwarmrestarts,cosineannealingwarmuprestarts,tristagelr,exponentialdecaywarmup,None}]
                        [--scheduler_conf SCHEDULER_CONF]
                        [--token_list TOKEN_LIST]
                        [--init {xavier_uniform,xavier_normal,kaiming_uniform,kaiming_normal,None}]
                        [--input_size INPUT_SIZE] [--ctc_conf CTC_CONF]
                        [--use_preprocessor USE_PREPROCESSOR]
                        [--token_type {bpe,char,word,phn,hugging_face,whisper_en,whisper_multilingual}]
                        [--bpemodel BPEMODEL]
                        [--non_linguistic_symbols NON_LINGUISTIC_SYMBOLS]
                        [--cleaner {None,tacotron,jaconv,vietnamese,whisper_en,whisper_basic}]
                        [--g2p {None,g2p_en,g2p_en_no_space,pyopenjtalk,pyopenjtalk_kana,pyopenjtalk_accent,pyopenjtalk_accent_with_pause,pyopenjtalk_prosody,pypinyin_g2p,pypinyin_g2p_phone,pypinyin_g2p_phone_without_prosody,espeak_ng_arabic,espeak_ng_german,espeak_ng_french,espeak_ng_spanish,espeak_ng_russian,espeak_ng_greek,espeak_ng_finnish,espeak_ng_hungarian,espeak_ng_dutch,espeak_ng_english_us_vits,espeak_ng_hindi,espeak_ng_italian,espeak_ng_ukrainian,espeak_ng_polish,g2pk,g2pk_no_space,g2pk_explicit_space,korean_jaso,korean_jaso_no_space,g2p_is}]
                        [--speech_volume_normalize SPEECH_VOLUME_NORMALIZE]
                        [--rir_scp RIR_SCP] [--rir_apply_prob RIR_APPLY_PROB]
                        [--noise_scp NOISE_SCP]
                        [--noise_apply_prob NOISE_APPLY_PROB]
                        [--noise_db_range NOISE_DB_RANGE]
                        [--short_noise_thres SHORT_NOISE_THRES]
                        [--frontend {default,sliding_window,s3prl,fused,whisper}]
                        [--frontend_conf FRONTEND_CONF]
                        [--specaug {specaug,None}]
                        [--specaug_conf SPECAUG_CONF]
                        [--normalize {global_mvn,utterance_mvn,None}]
                        [--normalize_conf NORMALIZE_CONF]
                        [--model {espnet,espnet_ctc}]
                        [--model_conf MODEL_CONF]
                        [--promptencoder {conformer,transformer,branchformer,e_branchformer}]
                        [--promptencoder_conf PROMPTENCODER_CONF]
                        [--encoder {conformer,transformer,transformer_multispkr,contextual_block_transformer,contextual_block_conformer,vgg_rnn,rnn,wav2vec2,hubert,hubert_pretrain,torchaudiohubert,longformer,branchformer,whisper,e_branchformer,e_branchformer_ctc}]
                        [--encoder_conf ENCODER_CONF]
                        [--preprocessor {s2t,s2t_ctc}]
                        [--preprocessor_conf PREPROCESSOR_CONF]
s2t_train_ctc.py

s2t_train_ctc.py

Named Arguments

Common configuration