Source code for nnmnkwii.preprocessing.f0

import numpy as np
from scipy import interpolate


[docs]def interp1d(f0, kind="slinear"):
    """Coutinuous F0 interpolation from discontinuous F0 trajectory

    This function generates continuous f0 from discontinuous f0 trajectory
    based on :func:`scipy.interpolate.interp1d`. This is meant to be used for
    continuous f0 modeling in statistical speech synthesis
    (e.g., see [1]_, [2]_).

    If ``kind`` = ``'slinear'``, then this does same thing as Merlin does.

    Args:
        f0 (ndarray): F0 or log-f0 trajectory
        kind (str): Kind of interpolation that :func:`scipy.interpolate.interp1d`
            supports. Default is ``'slinear'``, which means linear interpolation.

    Returns:
        1d array (``T``, ) or 2d (``T`` x 1) array: Interpolated continuous f0
        trajectory.

    Examples:
        >>> from nnmnkwii.preprocessing import interp1d
        >>> import numpy as np
        >>> from nnmnkwii.util import example_audio_file
        >>> from scipy.io import wavfile
        >>> import pyworld
        >>> fs, x = wavfile.read(example_audio_file())
        >>> f0, timeaxis = pyworld.dio(x.astype(np.float64), fs, frame_period=5)
        >>> continuous_f0 = interp1d(f0, kind="slinear")
        >>> assert f0.shape == continuous_f0.shape

    .. [1] Yu, Kai, and Steve Young. "Continuous F0 modeling for HMM based
        statistical parametric speech synthesis." IEEE Transactions on Audio,
        Speech, and Language Processing 19.5 (2011): 1071-1079.

    .. [2] Takamichi, Shinnosuke, et al. "The NAIST text-to-speech system for
        the Blizzard Challenge 2015." Proc. Blizzard Challenge workshop. 2015.
    """
    ndim = f0.ndim
    if len(f0) != f0.size:
        raise RuntimeError("1d array is only supported")
    continuous_f0 = f0.flatten()
    nonzero_indices = np.where(continuous_f0 > 0)[0]

    # Nothing to do
    if len(nonzero_indices) <= 0:
        return f0

    # Need this to insert continuous values for the first/end silence segments
    continuous_f0[0] = continuous_f0[nonzero_indices[0]]
    continuous_f0[-1] = continuous_f0[nonzero_indices[-1]]

    # Build interpolation function
    nonzero_indices = np.where(continuous_f0 > 0)[0]
    interp_func = interpolate.interp1d(
        nonzero_indices, continuous_f0[continuous_f0 > 0], kind=kind
    )

    # Fill silence segments with interpolated values
    zero_indices = np.where(continuous_f0 <= 0)[0]
    continuous_f0[zero_indices] = interp_func(zero_indices)

    if ndim == 2:
        return continuous_f0[:, None]
    return continuous_f0