Source code for nnmnkwii.preprocessing.f0

import numpy as np
from scipy import interpolate


[docs]def interp1d(f0, kind="slinear"): """Coutinuous F0 interpolation from discontinuous F0 trajectory This function generates continuous f0 from discontinuous f0 trajectory based on :func:`scipy.interpolate.interp1d`. This is meant to be used for continuous f0 modeling in statistical speech synthesis (e.g., see [1]_, [2]_). If ``kind`` = ``'slinear'``, then this does same thing as Merlin does. Args: f0 (ndarray): F0 or log-f0 trajectory kind (str): Kind of interpolation that :func:`scipy.interpolate.interp1d` supports. Default is ``'slinear'``, which means linear interpolation. Returns: 1d array (``T``, ) or 2d (``T`` x 1) array: Interpolated continuous f0 trajectory. Examples: >>> from nnmnkwii.preprocessing import interp1d >>> import numpy as np >>> from nnmnkwii.util import example_audio_file >>> from scipy.io import wavfile >>> import pyworld >>> fs, x = wavfile.read(example_audio_file()) >>> f0, timeaxis = pyworld.dio(x.astype(np.float64), fs, frame_period=5) >>> continuous_f0 = interp1d(f0, kind="slinear") >>> assert f0.shape == continuous_f0.shape .. [1] Yu, Kai, and Steve Young. "Continuous F0 modeling for HMM based statistical parametric speech synthesis." IEEE Transactions on Audio, Speech, and Language Processing 19.5 (2011): 1071-1079. .. [2] Takamichi, Shinnosuke, et al. "The NAIST text-to-speech system for the Blizzard Challenge 2015." Proc. Blizzard Challenge workshop. 2015. """ ndim = f0.ndim if len(f0) != f0.size: raise RuntimeError("1d array is only supported") continuous_f0 = f0.flatten() nonzero_indices = np.where(continuous_f0 > 0)[0] # Nothing to do if len(nonzero_indices) <= 0: return f0 # Need this to insert continuous values for the first/end silence segments continuous_f0[0] = continuous_f0[nonzero_indices[0]] continuous_f0[-1] = continuous_f0[nonzero_indices[-1]] # Build interpolation function nonzero_indices = np.where(continuous_f0 > 0)[0] interp_func = interpolate.interp1d( nonzero_indices, continuous_f0[continuous_f0 > 0], kind=kind ) # Fill silence segments with interpolated values zero_indices = np.where(continuous_f0 <= 0)[0] continuous_f0[zero_indices] = interp_func(zero_indices) if ndim == 2: return continuous_f0[:, None] return continuous_f0