Source code for nnmnkwii.autograd._impl.mlpg

# fmt: off
import numpy as np
import torch
from nnmnkwii import paramgen as G
from torch.autograd import Function


[docs]class MLPG(Function): """Generic MLPG as an autograd function. ``f : (T, D) -> (T, static_dim)``. This is meant to be used for Minimum Geneartion Error (MGE) training for speech synthesis and voice conversion. See [1]_ and [2]_ for details. It relies on :func:`nnmnkwii.paramgen.mlpg` and :func:`nnmnkwii.paramgen.mlpg_grad` for forward and backward computation, respectively. .. [1] Wu, Zhizheng, and Simon King. "Minimum trajectory error training for deep neural networks, combined with stacked bottleneck features." INTERSPEECH. 2015. .. [2] Xie, Feng-Long, et al. "Sequence error (SE) minimization training of neural network for voice conversion." Fifteenth Annual Conference of the International Speech Communication Association. 2014. Args: variances (torch.FloatTensor): Variances same as in :func:`nnmnkwii.paramgen.mlpg`. windows (list): same as in :func:`nnmnkwii.paramgen.mlpg`. Warnings: The function is generic but cannot run on CUDA. For faster differenciable MLPG, see :obj:`UnitVarianceMLPG`. See also: :func:`nnmnkwii.autograd.mlpg`, :func:`nnmnkwii.paramgen.mlpg`, :func:`nnmnkwii.paramgen.mlpg_grad`. """
[docs] @staticmethod def forward(ctx, means, variances, windows): assert means.dim() == 2 # we cannot do MLPG on minibatch ctx.windows = windows ctx.save_for_backward(means, variances) assert means.size() == variances.size() means_np = means.detach().numpy() variances_np = variances.detach().numpy() y = G.mlpg(means_np, variances_np, ctx.windows) y = torch.from_numpy(y.astype(np.float32)) return y
[docs] @staticmethod def backward(ctx, grad_output): means, variances = ctx.saved_tensors grad_output_numpy = grad_output.detach().numpy() means_numpy = means.detach().numpy() variances_numpy = variances.detach().numpy() grads_numpy = G.mlpg_grad( means_numpy, variances_numpy, ctx.windows, grad_output_numpy ) return torch.from_numpy(grads_numpy).clone(), None, None
[docs]class UnitVarianceMLPG(Function): r"""Special case of MLPG assuming data is normalized to have unit variance. ``f : (T x D) -> (T, static_dim)``. or ``f : (T*num_windows, static_dim) -> (T, static_dim)``. The funtion is theoretically a special case of :obj:`MLPG`. The function assumes input data is noramlized to have unit variance for each dimention. The property of the unit-variance greatly simplifies the backward computation of MLPG. Let :math:`\mu` is the input mean sequence (``num_windows*T x static_dim``), :math:`W` is a window matrix ``(T x num_windows*T)``, MLPG can be written as follows: .. math:: y = R \mu where .. math:: R = (W^{T} W)^{-1} W^{T} The matrix ``R`` can be computed by :func:`nnmnkwii.paramgen.unit_variance_mlpg_matrix`. Args: R: Unit-variance MLPG matrix of shape (``T x num_windows*T``). This should be created with :func:`nnmnkwii.paramgen.unit_variance_mlpg_matrix`. See also: :func:`nnmnkwii.autograd.unit_variance_mlpg`. """
[docs] @staticmethod def forward(ctx, means, R): # TODO: remove this ctx.save_for_backward(means, R) ctx.num_windows = R.shape[-1] // R.shape[0] T = R.shape[0] dim = means.dim() # Add batch axis if necessary if dim == 2: T_, D = means.shape B = 1 means = means.view(B, T_, D) else: B, T_, D = means.shape # Check if means has proper shape reshaped = not (T == T_) if not reshaped: static_dim = means.shape[-1] // ctx.num_windows reshaped_means = ( means.contiguous() .view(B, T, ctx.num_windows, -1) .transpose(1, 2) .contiguous() .view(B, -1, static_dim) ) else: static_dim = means.shape[-1] reshaped_means = means out = torch.matmul(R, reshaped_means) if dim == 2: return out.view(-1, static_dim) return out
[docs] @staticmethod def backward(ctx, grad_output): means, R = ctx.saved_tensors T = R.shape[0] dim = means.dim() # Add batch axis if necessary if dim == 2: T_, D = means.shape B = 1 grad_output = grad_output.view(B, T, -1) else: B, T_, D = means.shape grad = torch.matmul(R.transpose(0, 1), grad_output) reshaped = not (T == T_) if not reshaped: grad = ( grad.view(B, ctx.num_windows, T, -1) .transpose(1, 2) .contiguous() .view(B, T, D) ) if dim == 2: return grad.view(-1, D), None return grad, None
[docs]def mlpg(means, variances, windows): """Maximum Liklihood Paramter Generation (MLPG). The parameters are almost same as :func:`nnmnkwii.paramgen.mlpg` expects. The differences are: - The function assumes ``means`` as :obj:`torch.autograd.Variable` instead of :obj:`numpy.ndarray`. - The fucntion assumes ``variances_frames`` as :obj:`torch.FloatTensor`  instead of :obj:`numpy.ndarray`. Args: means (torch.autograd.Variable): Means variances (torch.FloatTensor): Variances windows (list): A sequence of window specification See also: :obj:`nnmnkwii.autograd.MLPG`, :func:`nnmnkwii.paramgen.mlpg` """ T, D = means.size() if variances.dim() == 1 and variances.shape[0] == D: variances = variances.expand(T, D) assert means.size() == variances.size() return MLPG.apply(means, variances, windows)
[docs]def unit_variance_mlpg(R, means): """Special case of MLPG assuming data is normalized to have unit variance. Args: means (torch.autograd.Variable): Means, of shape (``T x D``) or (``T*num_windows x static_dim``). See :func:`nnmnkwii.paramgen.reshape_means` to reshape means from (``T x D``) to (``T*num_windows x static_dim``). R (torch.FloatTensor): MLPG matrix. See also: :obj:`nnmnkwii.autograd.UnitVarianceMLPG`, :func:`nnmnkwii.paramgen.unit_variance_mlpg_matrix`, :func:`reshape_means`. """ return UnitVarianceMLPG.apply(means, R)