Additional citations
About 3 min
Additional citations
ESPnet-EZ
@inproceedings{hayashi2020espnet,
title={ESPnet-EZ: Python-only ESPnet for Easy Fine-tuning and Integration},
author={Masao Someki and Kwanghee Choi and Siddhant Arora and William Chen and Samuele Cornell and Jionghao Han and Yifan Peng and Jiatong Shi and Vaibhav Srivastav and Shinji Watanabe},
booktitle={SLT},
year={2024},
}
Weakly-supervised Learning
# OWSM
@inproceedings{peng2023reproducing,
title={Reproducing Whisper-Style Training Using an Open-Source Toolkit and Publicly Available Data},
author={Peng, Yifan and Tian, Jinchuan and Yan, Brian and Berrebbi, Dan and Chang, Xuankai and Li, Xinjian and Shi, Jiatong and Arora, Siddhant and Chen, William and Sharma, Roshan and others},
booktitle={ASRU},
year={2023},
}
# OWSM v3.1
@inproceedings{peng2023reproducing,
title={OWSM v3.1: Better and Faster Open Whisper-Style Speech Models based on E-Branchformer},
author={Peng, Yifan and Tian, Jinchuan and Chen, William and Arora, Siddhant and Yan, Brian and Sudo, Yui and Shakeel, Muhammad and Choi, Kwanghee and Shi, Jiatong and Chang, Xuankai and others},
booktitle={Interspeech},
year={2024},
}
# OWSM v3.2
@inproceedings{tian2024effects,
title={On the Effects of Heterogeneous Data Sources on Speech-to-Text Foundation Models},
author={Tian, Jinchuan and Peng, Yifan and Chen, William and Choi, Kwanghee and Livescu, Karen and Watanabe, Shinji},
booktitle={Interspeech},
year={2024},
}
Self-supervised Learning
# HuBERT reproduction
@inproceedings{chen2023reducing,
title={Reducing Barriers to Self-Supervised Learning: HuBERT Pre-training with Academic Compute},
author={William Chen and Xuankai Chang and Yifan Peng and Zhaoheng Ni and Soumi Maiti and Shinji Watanabe},
booktitle={Interspeech},
year={2023},
}
Speaker Embeddings
@inproceedings{jung2024espnet,
title={ESPnet-SPK: full pipeline speaker embedding toolkit with reproducible recipes, self-supervised front-ends, and off-the-shelf models},
author={Jung, Jee-weon and Zhang, Wangyou and Shi, Jiatong and Aldeneh, Zakaria and Higuchi, Takuya and Theobald, Barry-John and Abdelaziz, Ahmed Hussen and Watanabe, Shinji},
booktitle={Interspeech},
year={2024},
}
TTS
@inproceedings{hayashi2020espnet,
title={ESPnet-TTS: Unified, reproducible, and integratable open source end-to-end text-to-speech toolkit},
author={Hayashi, Tomoki and Yamamoto, Ryuichi and Inoue, Katsuki and Yoshimura, Takenori and Watanabe, Shinji and Toda, Tomoki and Takeda, Kazuya and Zhang, Yu and Tan, Xu},
booktitle={ICASSP},
year={2020},
}
@article{hayashi2021espnet2,
title={ESPnet2-TTS: Extending the Edge of TTS Research},
author={Hayashi, Tomoki and Yamamoto, Ryuichi and Yoshimura, Takenori and Wu, Peter and Shi, Jiatong and Saeki, Takaaki and Ju, Yooncheol and Yasuda, Yusuke and Takamichi, Shinnosuke and Watanabe, Shinji},
journal={arXiv preprint arXiv:2110.07840},
year={2021}
}
Speech Translation
@inproceedings{inaguma2020espnet,
title={ESPnet-ST: All-in-one speech translation toolkit},
author={Inaguma, Hirofumi and Kiyono, Shun and Duh, Kevin and Karita, Shigeki and Soplin, Nelson Enrique Yalta and Hayashi, Tomoki and Watanabe, Shinji},
booktitle={ACL Demos},
year={2020},
}
@inproceedings{yan2023espnet,
title={ESPnet-ST-v2: Multipurpose Spoken Language Translation Toolkit},
author={Yan, Brian and Shi, Jiatong and Tang, Yun and Inaguma, Hirofumi and Peng, Yifan and Dalmia, Siddharth and Polak, Peter and Fernandes, Patrick and Berrebbi, Dan and Hayashi, Tomoki and Zhang, Xiaohui and Ni, Zhaoheng and Hira, Moto and Maiti, Soumi and Pino, Juan and Watanabe, Shinji},
booktitle={ACL Demos},
year={2023},
}
Speech Enhancement
@inproceedings{li2020espnetse,
title={ESPnet-SE: End-to-End Speech Enhancement and Separation Toolkit Designed for ASR Integration},
author={Chenda Li and Jing Shi and Wangyou Zhang and Aswin Shanmugam Subramanian and Xuankai Chang and Naoyuki Kamo and Moto Hira and Tomoki Hayashi and Christoph Boeddeker and Zhuo Chen and Shinji Watanabe},
booktitle={SLT},
year={2021},
}
@inproceedings{lu2022espnetsep,
author={Yen-Ju Lu and Xuankai Chang and Chenda Li and Wangyou Zhang and Samuele Cornell and Zhaoheng Ni and Yoshiki Masuyama and Brian Yan and Robin Scheibler and Zhong-Qiu Wang and Yu Tsao and Yanmin Qian and Shinji Watanabe},
title={ESPnet-SE++: Speech Enhancement for Robust Speech Recognition, Translation, and Understanding},
booktitle={Interspeech},
year={2022},
}
Spoken Language Understanding
@inproceedings{arora2021espnet,
title={ESPnet-SLU: Advancing Spoken Language Understanding through ESPnet},
author={Arora, Siddhant and Dalmia, Siddharth and Denisov, Pavel and Chang, Xuankai and Ueda, Yushi and Peng, Yifan and Zhang, Yuekai and Kumar, Sujay and Ganesan, Karthik and Yan, Brian and Vu, Ngoc Thang and Black, Alan W and Watanabe, Shinji},
booktitle={ICASSP},
year={2022},
}
Singing Voice Synthesis
@inproceedings{shi2022muskits,
author={Shi, Jiatong and Guo, Shuai and Qian, Tao and Huo, Nan and Hayashi, Tomoki and Wu, Yuning and Xu, Frank and Chang, Xuankai and Li, Huazhe and Wu, Peter and Watanabe, Shinji and Jin, Qin},
title={Muskits: an End-to-End Music Processing Toolkit for Singing Voice Synthesis},
booktitle={Interspeech},
year={2022},
}
Unsupervised ASR
@inproceedings{gao2022euro,
title={EURO: ESPnet Unsupervised ASR Open-source Toolkit},
author={Gao, Dongji and Shi, Jiatong and Chuang, Shun-Po and Garcia, Leibny Paola and Lee, Hung-yi and Watanabe, Shinji and Khudanpur, Sanjeev},
booktitle={ICASSP},
year={2023}
}
Speech summarization
@inproceedings{sharma2023espnet,
title={Espnet-Summ: Introducing a Novel Large Dataset, Toolkit, and a Cross-Corpora Evaluation of Speech Summarization Systems},
author={Sharma, Roshan and Chen, William and Kano, Takatomo and Sharma, Ruchira and Arora, Siddhant and Watanabe, Shinji and Ogawa, Atsunori and Delcroix, Marc and Singh, Rita and Raj, Bhiksha},
booktitle={ASRU},
year={2023},
}
Exporting models to ONNX
@inproceedings{someki2022espnet,
title={ESPnet-ONNX: Bridging a Gap Between Research and Production},
author={Someki, Masao and Higuchi, Yosuke and Hayashi, Tomoki and Watanabe, Shinji},
booktitle={APSIPA ASC},
year={2022},
}