s2t_train_ctc.py
About 2 min
s2t_train_ctc.py
base parser
usage: s2t_train_ctc.py [-h] [--config CONFIG] [--print_config]
[--log_level {ERROR,WARNING,INFO,DEBUG,NOTSET}]
[--drop_last_iter DROP_LAST_ITER] [--dry_run DRY_RUN]
[--iterator_type {sequence,category,chunk,task,none}]
[--valid_iterator_type {sequence,category,chunk,task,none}]
[--output_dir OUTPUT_DIR] [--ngpu NGPU] [--seed SEED]
[--num_workers NUM_WORKERS]
[--num_att_plot NUM_ATT_PLOT]
[--dist_backend DIST_BACKEND]
[--dist_init_method DIST_INIT_METHOD]
[--dist_world_size DIST_WORLD_SIZE]
[--dist_rank DIST_RANK] [--local_rank LOCAL_RANK]
[--dist_master_addr DIST_MASTER_ADDR]
[--dist_master_port DIST_MASTER_PORT]
[--dist_launcher {slurm,mpi,None}]
[--multiprocessing_distributed MULTIPROCESSING_DISTRIBUTED]
[--unused_parameters UNUSED_PARAMETERS]
[--sharded_ddp SHARDED_DDP]
[--use_deepspeed USE_DEEPSPEED]
[--deepspeed_config DEEPSPEED_CONFIG]
[--gradient_as_bucket_view GRADIENT_AS_BUCKET_VIEW]
[--ddp_comm_hook {none,fp16_compress_hook,bf16_compress_hook}]
[--cudnn_enabled CUDNN_ENABLED]
[--cudnn_benchmark CUDNN_BENCHMARK]
[--cudnn_deterministic CUDNN_DETERMINISTIC]
[--use_tf32 USE_TF32] [--collect_stats COLLECT_STATS]
[--write_collected_feats WRITE_COLLECTED_FEATS]
[--max_epoch MAX_EPOCH] [--patience PATIENCE]
[--val_scheduler_criterion VAL_SCHEDULER_CRITERION VAL_SCHEDULER_CRITERION]
[--early_stopping_criterion EARLY_STOPPING_CRITERION EARLY_STOPPING_CRITERION EARLY_STOPPING_CRITERION]
[--best_model_criterion BEST_MODEL_CRITERION [BEST_MODEL_CRITERION ...]]
[--keep_nbest_models KEEP_NBEST_MODELS [KEEP_NBEST_MODELS ...]]
[--nbest_averaging_interval NBEST_AVERAGING_INTERVAL]
[--grad_clip GRAD_CLIP]
[--grad_clip_type GRAD_CLIP_TYPE]
[--grad_noise GRAD_NOISE] [--accum_grad ACCUM_GRAD]
[--no_forward_run NO_FORWARD_RUN] [--resume RESUME]
[--train_dtype {float16,float32,float64}]
[--use_amp USE_AMP] [--log_interval LOG_INTERVAL]
[--use_matplotlib USE_MATPLOTLIB]
[--use_tensorboard USE_TENSORBOARD]
[--create_graph_in_tensorboard CREATE_GRAPH_IN_TENSORBOARD]
[--use_wandb USE_WANDB]
[--wandb_project WANDB_PROJECT] [--wandb_id WANDB_ID]
[--wandb_entity WANDB_ENTITY]
[--wandb_name WANDB_NAME]
[--wandb_model_log_interval WANDB_MODEL_LOG_INTERVAL]
[--wandb_allow_val_change WANDB_ALLOW_VAL_CHANGE]
[--detect_anomaly DETECT_ANOMALY]
[--use_adapter USE_ADAPTER] [--adapter {lora,houlsby}]
[--save_strategy {all,adapter_only,required_grad_only}]
[--adapter_conf ADAPTER_CONF]
[--pretrain_path PRETRAIN_PATH]
[--init_param [INIT_PARAM ...]]
[--ignore_init_mismatch IGNORE_INIT_MISMATCH]
[--freeze_param [FREEZE_PARAM ...]]
[--num_iters_per_epoch NUM_ITERS_PER_EPOCH]
[--batch_size BATCH_SIZE]
[--valid_batch_size VALID_BATCH_SIZE]
[--batch_bins BATCH_BINS]
[--valid_batch_bins VALID_BATCH_BINS]
[--category_sample_size CATEGORY_SAMPLE_SIZE]
[--upsampling_factor UPSAMPLING_FACTOR]
[--category_upsampling_factor CATEGORY_UPSAMPLING_FACTOR]
[--dataset_upsampling_factor DATASET_UPSAMPLING_FACTOR]
[--dataset_scaling_factor DATASET_SCALING_FACTOR]
[--max_batch_size MAX_BATCH_SIZE]
[--min_batch_size MIN_BATCH_SIZE]
[--train_shape_file TRAIN_SHAPE_FILE]
[--valid_shape_file VALID_SHAPE_FILE]
[--batch_type {unsorted,sorted,folded,length,numel,catbel,catpow,catpow_balance_dataset}]
[--valid_batch_type {unsorted,sorted,folded,length,numel,catbel,catpow,catpow_balance_dataset,None}]
[--fold_length FOLD_LENGTH]
[--sort_in_batch {descending,ascending}]
[--shuffle_within_batch SHUFFLE_WITHIN_BATCH]
[--sort_batch {descending,ascending}]
[--multiple_iterator MULTIPLE_ITERATOR]
[--chunk_length CHUNK_LENGTH]
[--chunk_shift_ratio CHUNK_SHIFT_RATIO]
[--num_cache_chunks NUM_CACHE_CHUNKS]
[--chunk_excluded_key_prefixes CHUNK_EXCLUDED_KEY_PREFIXES [CHUNK_EXCLUDED_KEY_PREFIXES ...]]
[--chunk_default_fs CHUNK_DEFAULT_FS]
[--chunk_max_abs_length CHUNK_MAX_ABS_LENGTH]
[--chunk_discard_short_samples CHUNK_DISCARD_SHORT_SAMPLES]
[--train_data_path_and_name_and_type TRAIN_DATA_PATH_AND_NAME_AND_TYPE]
[--valid_data_path_and_name_and_type VALID_DATA_PATH_AND_NAME_AND_TYPE]
[--multi_task_dataset MULTI_TASK_DATASET]
[--allow_variable_data_keys ALLOW_VARIABLE_DATA_KEYS]
[--max_cache_size MAX_CACHE_SIZE]
[--max_cache_fd MAX_CACHE_FD]
[--allow_multi_rates ALLOW_MULTI_RATES]
[--valid_max_cache_size VALID_MAX_CACHE_SIZE]
[--exclude_weight_decay EXCLUDE_WEIGHT_DECAY]
[--exclude_weight_decay_conf EXCLUDE_WEIGHT_DECAY_CONF]
[--optim {adam,adamw,sgd,adadelta,adagrad,adamax,asgd,lbfgs,rmsprop,rprop,radam,accagd,adabound,adamod,diffgrad,lamb,novograd,pid,qhm,sgdw,yogi}]
[--optim_conf OPTIM_CONF]
[--scheduler {reducelronplateau,lambdalr,steplr,multisteplr,exponentiallr,cosineannealinglr,noamlr,warmuplr,piecewiselinearwarmuplr,warmupsteplr,warmupreducelronplateau,cycliclr,onecyclelr,cosineannealingwarmrestarts,cosineannealingwarmuprestarts,tristagelr,exponentialdecaywarmup,None}]
[--scheduler_conf SCHEDULER_CONF]
[--token_list TOKEN_LIST]
[--init {xavier_uniform,xavier_normal,kaiming_uniform,kaiming_normal,None}]
[--input_size INPUT_SIZE] [--ctc_conf CTC_CONF]
[--use_preprocessor USE_PREPROCESSOR]
[--token_type {bpe,char,word,phn,hugging_face,whisper_en,whisper_multilingual}]
[--bpemodel BPEMODEL]
[--non_linguistic_symbols NON_LINGUISTIC_SYMBOLS]
[--cleaner {None,tacotron,jaconv,vietnamese,whisper_en,whisper_basic}]
[--g2p {None,g2p_en,g2p_en_no_space,pyopenjtalk,pyopenjtalk_kana,pyopenjtalk_accent,pyopenjtalk_accent_with_pause,pyopenjtalk_prosody,pypinyin_g2p,pypinyin_g2p_phone,pypinyin_g2p_phone_without_prosody,espeak_ng_arabic,espeak_ng_german,espeak_ng_french,espeak_ng_spanish,espeak_ng_russian,espeak_ng_greek,espeak_ng_finnish,espeak_ng_hungarian,espeak_ng_dutch,espeak_ng_english_us_vits,espeak_ng_hindi,espeak_ng_italian,espeak_ng_ukrainian,espeak_ng_polish,g2pk,g2pk_no_space,g2pk_explicit_space,korean_jaso,korean_jaso_no_space,g2p_is}]
[--speech_volume_normalize SPEECH_VOLUME_NORMALIZE]
[--rir_scp RIR_SCP] [--rir_apply_prob RIR_APPLY_PROB]
[--noise_scp NOISE_SCP]
[--noise_apply_prob NOISE_APPLY_PROB]
[--noise_db_range NOISE_DB_RANGE]
[--short_noise_thres SHORT_NOISE_THRES]
[--frontend {default,sliding_window,s3prl,fused,whisper}]
[--frontend_conf FRONTEND_CONF]
[--specaug {specaug,None}]
[--specaug_conf SPECAUG_CONF]
[--normalize {global_mvn,utterance_mvn,None}]
[--normalize_conf NORMALIZE_CONF]
[--model {espnet,espnet_ctc}]
[--model_conf MODEL_CONF]
[--promptencoder {conformer,transformer,branchformer,e_branchformer}]
[--promptencoder_conf PROMPTENCODER_CONF]
[--encoder {conformer,transformer,transformer_multispkr,contextual_block_transformer,contextual_block_conformer,vgg_rnn,rnn,wav2vec2,hubert,hubert_pretrain,torchaudiohubert,longformer,branchformer,whisper,e_branchformer,e_branchformer_ctc}]
[--encoder_conf ENCODER_CONF]
[--preprocessor {s2t,s2t_ctc}]
[--preprocessor_conf PREPROCESSOR_CONF]