espnet2.train.preprocessor.CommonPreprocessor
espnet2.train.preprocessor.CommonPreprocessor
class espnet2.train.preprocessor.CommonPreprocessor(train: bool, use_lang_prompt: bool = False, use_nlp_prompt: bool = False, token_type: str | None = None, token_list: Path | str | Iterable[str] | None = None, bpemodel: Path | str | Iterable[str] | None = None, text_cleaner: Collection[str] | None = None, g2p_type: str | None = None, unk_symbol: str = '<unk>', space_symbol: str = '<space>', non_linguistic_symbols: Path | str | Iterable[str] | None = None, delimiter: str | None = None, force_single_channel: bool = False, rir_scp: str | None = None, rir_apply_prob: float = 1.0, noise_scp: str | None = None, noise_apply_prob: float = 1.0, noise_db_range: str = '3_10', short_noise_thres: float = 0.5, aux_task_names: Collection[str] | None = None, speech_volume_normalize: float | None = None, speech_name: str = 'speech', text_name: str = 'text', fs: int = 0, nonsplit_symbol: Iterable[str] | None = None, data_aug_effects: List | None = None, data_aug_num: List[int] = [1, 1], data_aug_prob: float = 0.0, min_sample_size: int = -1, audio_pad_value: float | int = 0.0, whisper_language: str | None = None, whisper_task: str | None = None)
Bases: AbsPreprocessor