Source code for nnmnkwii.autograd._impl.mlpg

# fmt: off
import numpy as np
import torch
from nnmnkwii import paramgen as G
from torch.autograd import Function


[docs]class MLPG(Function):
    """Generic MLPG as an autograd function.

    ``f : (T, D) -> (T, static_dim)``.

    This is meant to be used for Minimum Geneartion Error (MGE) training for
    speech synthesis and voice conversion. See [1]_ and [2]_ for details.

    It relies on :func:`nnmnkwii.paramgen.mlpg` and
    :func:`nnmnkwii.paramgen.mlpg_grad` for forward and backward computation,
    respectively.

    .. [1] Wu, Zhizheng, and Simon King. "Minimum trajectory error training
      for deep neural networks, combined with stacked bottleneck features."
      INTERSPEECH. 2015.
    .. [2] Xie, Feng-Long, et al. "Sequence error (SE) minimization training of
       neural network for voice conversion." Fifteenth Annual Conference of the
       International Speech Communication Association. 2014.

    Args:
        variances (torch.FloatTensor): Variances same as in
            :func:`nnmnkwii.paramgen.mlpg`.
        windows (list): same as in :func:`nnmnkwii.paramgen.mlpg`.

    Warnings:
        The function is generic but cannot run on CUDA. For faster
        differenciable MLPG, see :obj:`UnitVarianceMLPG`.

    See also:
        :func:`nnmnkwii.autograd.mlpg`,
        :func:`nnmnkwii.paramgen.mlpg`,
        :func:`nnmnkwii.paramgen.mlpg_grad`.
    """

[docs]    @staticmethod
    def forward(ctx, means, variances, windows):
        assert means.dim() == 2  # we cannot do MLPG on minibatch
        ctx.windows = windows
        ctx.save_for_backward(means, variances)

        assert means.size() == variances.size()

        means_np = means.detach().numpy()
        variances_np = variances.detach().numpy()
        y = G.mlpg(means_np, variances_np, ctx.windows)
        y = torch.from_numpy(y.astype(np.float32))
        return y

[docs]    @staticmethod
    def backward(ctx, grad_output):
        means, variances = ctx.saved_tensors

        grad_output_numpy = grad_output.detach().numpy()
        means_numpy = means.detach().numpy()
        variances_numpy = variances.detach().numpy()
        grads_numpy = G.mlpg_grad(
            means_numpy, variances_numpy, ctx.windows, grad_output_numpy
        )

        return torch.from_numpy(grads_numpy).clone(), None, None


[docs]class UnitVarianceMLPG(Function):
    r"""Special case of MLPG assuming data is normalized to have unit variance.

    ``f : (T x D) -> (T, static_dim)``. or
    ``f : (T*num_windows, static_dim) -> (T, static_dim)``.

    The funtion is theoretically a special case of :obj:`MLPG`. The function
    assumes input data is noramlized to have unit variance for each dimention.
    The property of the unit-variance greatly simplifies the backward
    computation of MLPG.

    Let :math:`\mu` is the input mean sequence (``num_windows*T x static_dim``),
    :math:`W` is a window matrix ``(T x num_windows*T)``, MLPG can be written
    as follows:

    .. math::

        y = R \mu

    where

    .. math::

        R = (W^{T} W)^{-1} W^{T}

    The matrix ``R`` can be computed by
    :func:`nnmnkwii.paramgen.unit_variance_mlpg_matrix`.

    Args:
        R: Unit-variance MLPG matrix of shape (``T x num_windows*T``). This
          should be created with
          :func:`nnmnkwii.paramgen.unit_variance_mlpg_matrix`.

    See also:
        :func:`nnmnkwii.autograd.unit_variance_mlpg`.
    """

[docs]    @staticmethod
    def forward(ctx, means, R):
        # TODO: remove this
        ctx.save_for_backward(means, R)
        ctx.num_windows = R.shape[-1] // R.shape[0]
        T = R.shape[0]
        dim = means.dim()

        # Add batch axis if necessary
        if dim == 2:
            T_, D = means.shape
            B = 1
            means = means.view(B, T_, D)
        else:
            B, T_, D = means.shape

        # Check if means has proper shape
        reshaped = not (T == T_)
        if not reshaped:
            static_dim = means.shape[-1] // ctx.num_windows
            reshaped_means = (
                means.contiguous()
                .view(B, T, ctx.num_windows, -1)
                .transpose(1, 2)
                .contiguous()
                .view(B, -1, static_dim)
            )
        else:
            static_dim = means.shape[-1]
            reshaped_means = means

        out = torch.matmul(R, reshaped_means)
        if dim == 2:
            return out.view(-1, static_dim)

        return out

[docs]    @staticmethod
    def backward(ctx, grad_output):
        means, R = ctx.saved_tensors
        T = R.shape[0]
        dim = means.dim()

        # Add batch axis if necessary
        if dim == 2:
            T_, D = means.shape
            B = 1
            grad_output = grad_output.view(B, T, -1)
        else:
            B, T_, D = means.shape

        grad = torch.matmul(R.transpose(0, 1), grad_output)

        reshaped = not (T == T_)
        if not reshaped:
            grad = (
                grad.view(B, ctx.num_windows, T, -1)
                .transpose(1, 2)
                .contiguous()
                .view(B, T, D)
            )

        if dim == 2:
            return grad.view(-1, D), None

        return grad, None


[docs]def mlpg(means, variances, windows):
    """Maximum Liklihood Paramter Generation (MLPG).

    The parameters are almost same as :func:`nnmnkwii.paramgen.mlpg` expects.
    The differences are:

    - The function assumes ``means`` as :obj:`torch.autograd.Variable`
      instead of :obj:`numpy.ndarray`.
    - The fucntion assumes ``variances_frames`` as :obj:`torch.FloatTensor`　
      instead of :obj:`numpy.ndarray`.

    Args:
        means (torch.autograd.Variable): Means
        variances (torch.FloatTensor): Variances
        windows (list): A sequence of window specification

    See also:
        :obj:`nnmnkwii.autograd.MLPG`, :func:`nnmnkwii.paramgen.mlpg`

    """
    T, D = means.size()
    if variances.dim() == 1 and variances.shape[0] == D:
        variances = variances.expand(T, D)
    assert means.size() == variances.size()
    return MLPG.apply(means, variances, windows)


[docs]def unit_variance_mlpg(R, means):
    """Special case of MLPG assuming data is normalized to have unit variance.

    Args:
        means (torch.autograd.Variable): Means, of shape (``T x D``) or
          (``T*num_windows x static_dim``). See
          :func:`nnmnkwii.paramgen.reshape_means` to reshape means from
          (``T x D``) to (``T*num_windows x static_dim``).
        R (torch.FloatTensor): MLPG matrix.

    See also:
        :obj:`nnmnkwii.autograd.UnitVarianceMLPG`,
        :func:`nnmnkwii.paramgen.unit_variance_mlpg_matrix`,
        :func:`reshape_means`.
    """
    return UnitVarianceMLPG.apply(means, R)