# Source code for espnet2.tts.feats_extract.yin

# remove np from https://github.com/dhchoi99/NANSY/blob/master/models/yin.py
# https://github.com/NVIDIA/mellotron/blob/master/yin.py

import numpy as np
import torch
import torch.nn.functional as F

[docs]def differenceFunction(x, N, tau_max):
"""Compute difference function of data x. This corresponds to equation (6) in [1]

This solution is implemented directly with torch rfft.

:param x: audio data (Tensor)
:param N: length of data
:param tau_max: integration window size
:return: difference function
:rtype: list
"""

# x = np.array(x, np.float64) #[B,T]
assert x.dim() == 2
b, w = x.shape
if w < tau_max:
x,
(tau_max - w - (tau_max - w) // 2, (tau_max - w) // 2),
"constant",
mode="reflect",
)
w = tau_max
# x_cumsum = np.concatenate((np.array([0.]), (x * x).cumsum()))
x_cumsum = torch.cat(
[torch.zeros([b, 1], device=x.device), (x * x).cumsum(dim=1)], dim=1
)
size = w + tau_max
p2 = (size // 32).bit_length()
# p2 = ceil(log2(size+1 // 32))
nice_numbers = (16, 18, 20, 24, 25, 27, 30, 32)
size_pad = min(n * 2**p2 for n in nice_numbers if n * 2**p2 >= size)
fc = torch.fft.rfft(x, size_pad)  # [B,F]
conv = torch.fft.irfft(fc * fc.conj())[:, :tau_max]
return (
x_cumsum[:, w : w - tau_max : -1]
+ x_cumsum[:, w]
- x_cumsum[:, :tau_max]
- 2 * conv
)

[docs]def differenceFunction_np(x, N, tau_max):
"""Compute difference function of data x. This corresponds to equation (6) in [1]

This solution is implemented directly with Numpy fft.

:param x: audio data
:param N: length of data
:param tau_max: integration window size
:return: difference function
:rtype: list
"""

x = np.array(x, np.float64)
w = x.size
tau_max = min(tau_max, w)
x_cumsum = np.concatenate((np.array([0.0]), (x * x).cumsum()))
size = w + tau_max
p2 = (size // 32).bit_length()
nice_numbers = (16, 18, 20, 24, 25, 27, 30, 32)
size_pad = min(x * 2**p2 for x in nice_numbers if x * 2**p2 >= size)
conv = np.fft.irfft(fc * fc.conjugate())[:tau_max]
return x_cumsum[w : w - tau_max : -1] + x_cumsum[w] - x_cumsum[:tau_max] - 2 * conv

[docs]def cumulativeMeanNormalizedDifferenceFunction(df, N, eps=1e-8):
"""Compute cumulative mean normalized difference function (CMND).

This corresponds to equation (8) in [1]

:param df: Difference function
:param N: length of data
:return: cumulative mean normalized difference function
:rtype: list
"""
# np.seterr(divide='ignore', invalid='ignore')
# scipy method, assert df>0 for all element
#   cmndf = df[1:] * np.asarray(list(range(1, N)))
# / (np.cumsum(df[1:]).astype(float) + eps)
B, _ = df.shape
cmndf = (
df[:, 1:]
* torch.arange(1, N, device=df.device, dtype=df.dtype).view(1, -1)
/ (df[:, 1:].cumsum(dim=-1) + eps)
)
[torch.ones([B, 1], device=df.device, dtype=df.dtype), cmndf], dim=-1
)

[docs]def differenceFunctionTorch(xs: torch.Tensor, N, tau_max) -> torch.Tensor:
"""pytorch backend batch-wise differenceFunction

has 1e-4 level error with input shape of (32, 22050*1.5)
Args:
xs:
N:
tau_max:

Returns:

"""
xs = xs.double()
w = xs.shape[-1]
tau_max = min(tau_max, w)
x_cumsum = torch.cat(
(
torch.zeros((xs.shape[0], 1), device=xs.device),
(xs * xs).cumsum(dim=-1, dtype=torch.double),
),
dim=-1,
)  # B x w
size = w + tau_max
p2 = (size // 32).bit_length()
nice_numbers = (16, 18, 20, 24, 25, 27, 30, 32)
size_pad = min(x * 2**p2 for x in nice_numbers if x * 2**p2 >= size)

convs = torch.fft.irfft(fcs * fcs.conj())[:, :tau_max]
y1 = torch.flip(x_cumsum[:, w - tau_max + 1 : w + 1], dims=[-1])
y = y1 + x_cumsum[:, w].unsqueeze(-1) - x_cumsum[:, :tau_max] - 2 * convs
return y

[docs]def cumulativeMeanNormalizedDifferenceFunctionTorch(
dfs: torch.Tensor, N, eps=1e-8
) -> torch.Tensor:
arange = torch.arange(1, N, device=dfs.device, dtype=torch.float64)
cumsum = torch.cumsum(dfs[:, 1:], dim=-1, dtype=torch.float64).to(dfs.device)

cmndfs = dfs[:, 1:] * arange / (cumsum + eps)
cmndfs = torch.cat(
(torch.ones(cmndfs.shape[0], 1, device=dfs.device), cmndfs), dim=-1
)
return cmndfs

if __name__ == "__main__":
wav = torch.randn(32, int(22050 * 1.5)).cuda()
wav_numpy = wav.detach().cpu().numpy()
x = wav_numpy[0]

w_len = 2048
w_step = 256
tau_max = 2048
W = 2048

startFrames = list(range(0, x.shape[-1] - w_len, w_step))
startFrames = np.asarray(startFrames)
# times = startFrames / sr
frames = [x[..., t : t + W] for t in startFrames]
frames = np.asarray(frames)
frames_torch = torch.from_numpy(frames).cuda()

cmndfs0 = []
for idx, frame in enumerate(frames):
df = differenceFunction(frame, frame.shape[-1], tau_max)
cmndf = cumulativeMeanNormalizedDifferenceFunction(df, tau_max)
cmndfs0.append(cmndf)
cmndfs0 = np.asarray(cmndfs0)

dfs = differenceFunctionTorch(frames_torch, frames_torch.shape[-1], tau_max)
cmndfs1 = (
cumulativeMeanNormalizedDifferenceFunctionTorch(dfs, tau_max)
.detach()
.cpu()
.numpy()
)
print(cmndfs0.shape, cmndfs1.shape)
print(np.sum(np.abs(cmndfs0 - cmndfs1)))