espnet2.train.preprocessor.MutliTokenizerCommonPreprocessor
espnet2.train.preprocessor.MutliTokenizerCommonPreprocessor
class espnet2.train.preprocessor.MutliTokenizerCommonPreprocessor(train: bool, token_type: List[str] = [None], token_list: List[Path | str | Iterable[str]] = [None], bpemodel: List[Path | str | Iterable[str]] = [None], text_cleaner: Collection[str] | None = None, g2p_type: List[str] | str | None = None, unk_symbol: str = '<unk>', space_symbol: str = '<space>', non_linguistic_symbols: Path | str | Iterable[str] | None = None, delimiter: str | None = None, rir_scp: str | None = None, rir_apply_prob: float = 1.0, noise_scp: str | None = None, noise_apply_prob: float = 1.0, noise_db_range: str = '3_10', short_noise_thres: float = 0.5, speech_volume_normalize: float | None = None, speech_name: str = 'speech', text_name: List[str] = ['text'], tokenizer_encode_conf: List[Dict] = [{}, {}], fs: int = 0, data_aug_effects: List | None = None, data_aug_num: List[int] = [1, 1], data_aug_prob: float = 0.0, whisper_language: List[str] | None = None, whisper_task: str | None = None)
Bases: CommonPreprocessor