espnet2.bin package

espnet2.bin.tokenize_text

espnet2.bin.tokenize_text.field2slice(field: Optional[str]) → slice[source]

Convert field string to slice

Note that field string accepts 1-based integer.

Examples

>>> field2slice("1-")
slice(0, None, None)
>>> field2slice("1-3")
slice(0, 3, None)
>>> field2slice("-3")
slice(None, 3, None)
espnet2.bin.tokenize_text.get_parser() → argparse.ArgumentParser[source]
espnet2.bin.tokenize_text.main(cmd=None)[source]
espnet2.bin.tokenize_text.tokenize(input: str, output: str, field: Optional[str], delimiter: Optional[str], token_type: str, space_symbol: str, non_linguistic_symbols: Optional[str], bpemodel: Optional[str], log_level: str, write_vocabulary: bool, vocabulary_size: int, remove_non_linguistic_symbols: bool, cutoff: int, add_symbol: List[str], cleaner: Optional[str], g2p: Optional[str])[source]

espnet2.bin.asr_inference

class espnet2.bin.asr_inference.Speech2Text(asr_train_config: Union[pathlib.Path, str], asr_model_file: Union[pathlib.Path, str] = None, lm_train_config: Union[pathlib.Path, str] = None, lm_file: Union[pathlib.Path, str] = None, token_type: str = None, bpemodel: str = None, device: str = 'cpu', maxlenratio: float = 0.0, minlenratio: float = 0.0, batch_size: int = 1, dtype: str = 'float32', beam_size: int = 20, ctc_weight: float = 0.5, lm_weight: float = 1.0, penalty: float = 0.0, nbest: int = 1)[source]

Bases: object

Speech2Text class

Examples

>>> import soundfile
>>> speech2text = Speech2Text("asr_config.yml", "asr.pth")
>>> audio, rate = soundfile.read("speech.wav")
>>> speech2text(audio)
[(text, token, token_int, hypothesis object), ...]
espnet2.bin.asr_inference.get_parser()[source]
espnet2.bin.asr_inference.inference(output_dir: str, maxlenratio: float, minlenratio: float, batch_size: int, dtype: str, beam_size: int, ngpu: int, seed: int, ctc_weight: float, lm_weight: float, penalty: float, nbest: int, num_workers: int, log_level: Union[int, str], data_path_and_name_and_type: Sequence[Tuple[str, str, str]], key_file: Optional[str], asr_train_config: str, asr_model_file: str, lm_train_config: Optional[str], lm_file: Optional[str], word_lm_train_config: Optional[str], word_lm_file: Optional[str], token_type: Optional[str], bpemodel: Optional[str], allow_variable_data_keys: bool)[source]
espnet2.bin.asr_inference.main(cmd=None)[source]

espnet2.bin.launch

espnet2.bin.launch.get_parser()[source]
espnet2.bin.launch.main(cmd=None)[source]

espnet2.bin.pack

class espnet2.bin.pack.ASRPackedContents[source]

Bases: espnet2.bin.pack.PackedContents

files = ['asr_model_file', 'lm_file']
yaml_files = ['asr_train_config', 'lm_train_config']
class espnet2.bin.pack.EnhPackedContents[source]

Bases: espnet2.bin.pack.PackedContents

files = ['model_file']
yaml_files = ['train_config']
class espnet2.bin.pack.PackedContents[source]

Bases: object

files = []
yaml_files = []
class espnet2.bin.pack.TTSPackedContents[source]

Bases: espnet2.bin.pack.PackedContents

files = ['model_file']
yaml_files = ['train_config']
espnet2.bin.pack.add_arguments(parser: argparse.ArgumentParser, contents: Type[espnet2.bin.pack.PackedContents])[source]
espnet2.bin.pack.get_parser() → argparse.ArgumentParser[source]
espnet2.bin.pack.main(cmd=None)[source]

espnet2.bin.split_scps

espnet2.bin.split_scps.get_parser() → argparse.ArgumentParser[source]
espnet2.bin.split_scps.main(cmd=None)[source]
espnet2.bin.split_scps.split_scps(scps: List[str], num_splits: int, names: Optional[List[str]], output_dir: str, log_level: str)[source]

espnet2.bin.asr_train

espnet2.bin.asr_train.get_parser()[source]
espnet2.bin.asr_train.main(cmd=None)[source]

ASR training.

Example

% python asr_train.py asr –print_config –optim adadelta

> conf/train_asr.yaml

% python asr_train.py –config conf/train_asr.yaml

espnet2.bin.lm_calc_perplexity

espnet2.bin.lm_calc_perplexity.calc_perplexity(output_dir: str, batch_size: int, dtype: str, ngpu: int, seed: int, num_workers: int, log_level: Union[int, str], data_path_and_name_and_type: Sequence[Tuple[str, str, str]], key_file: Optional[str], train_config: Optional[str], model_file: Optional[str], log_base: Optional[float], allow_variable_data_keys: bool)[source]
espnet2.bin.lm_calc_perplexity.get_parser()[source]
espnet2.bin.lm_calc_perplexity.main(cmd=None)[source]

espnet2.bin.tts_train

espnet2.bin.tts_train.get_parser()[source]
espnet2.bin.tts_train.main(cmd=None)[source]

TTS training

Example

% python tts_train.py asr –print_config –optim adadelta % python tts_train.py –config conf/train_asr.yaml

espnet2.bin.lm_train

espnet2.bin.lm_train.get_parser()[source]
espnet2.bin.lm_train.main(cmd=None)[source]

LM training.

Example

% python lm_train.py asr –print_config –optim adadelta % python lm_train.py –config conf/train_asr.yaml

espnet2.bin.enh_train

espnet2.bin.enh_train.get_parser()[source]
espnet2.bin.enh_train.main(cmd=None)[source]

Enhancemnet frontend training.

Example

% python enh_train.py asr –print_config –optim adadelta

> conf/train_enh.yaml

% python enh_train.py –config conf/train_enh.yaml

espnet2.bin.aggregate_stats_dirs

espnet2.bin.aggregate_stats_dirs.aggregate_stats_dirs(input_dir: Iterable[Union[str, pathlib.Path]], output_dir: Union[str, pathlib.Path], log_level: str, skip_sum_stats: bool)[source]
espnet2.bin.aggregate_stats_dirs.get_parser() → argparse.ArgumentParser[source]
espnet2.bin.aggregate_stats_dirs.main(cmd=None)[source]

espnet2.bin.enh_scoring

espnet2.bin.enh_scoring.get_parser()[source]
espnet2.bin.enh_scoring.main(cmd=None)[source]
espnet2.bin.enh_scoring.scoring(output_dir: str, dtype: str, log_level: Union[int, str], key_file: str, ref_scp: List[str], inf_scp: List[str], ref_channel: int)[source]

espnet2.bin.enh_inference

espnet2.bin.enh_inference.get_parser()[source]
espnet2.bin.enh_inference.humanfriendly_or_none(value: str)[source]
espnet2.bin.enh_inference.inference(output_dir: str, batch_size: int, dtype: str, fs: int, ngpu: int, seed: int, num_workers: int, log_level: Union[int, str], data_path_and_name_and_type: Sequence[Tuple[str, str, str]], key_file: Optional[str], enh_train_config: str, enh_model_file: str, allow_variable_data_keys: bool, normalize_output_wav: bool)[source]
espnet2.bin.enh_inference.main(cmd=None)[source]

espnet2.bin.__init__

espnet2.bin.tts_inference

TTS mode decoding.

class espnet2.bin.tts_inference.Text2Speech(train_config: Union[pathlib.Path, str, None], model_file: Union[pathlib.Path, str, None] = None, threshold: float = 0.5, minlenratio: float = 0.0, maxlenratio: float = 10.0, use_teacher_forcing: bool = False, use_att_constraint: bool = False, backward_window: int = 1, forward_window: int = 3, speed_control_alpha: float = 1.0, vocoder_conf: dict = None, dtype: str = 'float32', device: str = 'cpu')[source]

Bases: object

Speech2Text class

Examples

>>> import soundfile
>>> text2speech = Text2Speech("config.yml", "model.pth")
>>> wav = text2speech("Hello World")[0]
>>> soundfile.write("out.wav", wav.numpy(), text2speech.fs, "PCM_16")
property fs
property use_speech

Check whether to require speech in inference.

Returns

True if speech is required else False.

Return type

bool

espnet2.bin.tts_inference.get_parser()[source]

Get argument parser.

espnet2.bin.tts_inference.inference(output_dir: str, batch_size: int, dtype: str, ngpu: int, seed: int, num_workers: int, log_level: Union[int, str], data_path_and_name_and_type: Sequence[Tuple[str, str, str]], key_file: Optional[str], train_config: Optional[str], model_file: Optional[str], threshold: float, minlenratio: float, maxlenratio: float, use_teacher_forcing: bool, use_att_constraint: bool, backward_window: int, forward_window: int, speed_control_alpha: float, allow_variable_data_keys: bool, vocoder_conf: dict)[source]

Perform TTS model decoding.

espnet2.bin.tts_inference.main(cmd=None)[source]

Run TTS model decoding.