Source code for nnmnkwii.autograd._impl.mlpg

from __future__ import with_statement, print_function, absolute_import

from nnmnkwii import functions as F

# TODO: should this be exported?
from nnmnkwii.functions._impl.mlpg import build_win_mats

from torch.autograd import Function
import torch
import numpy as np
import bandmat as bm


# Note: this is written for pytorch 0.1.12 and may not compatible with
# pytorch master.
[docs]class MLPG(Function):
    """MLPG as an autograd function ``f : (T, D) -> (T, static_dim)``.

    This is meant to be used for Minimum Geneartion Error (MGE) training for
    speech synthesis and voice conversion. See [1]_ for details.

    .. [1] Wu, Zhizheng, and Simon King. "Minimum trajectory error training
      for deep neural networks, combined with stacked bottleneck features."
      INTERSPEECH. 2015.

    Let :math:`d` is the index of static features, :math:`l` is the index
    of windows, gradients :math:`g_{d,l}` can be computed by:

    .. math::

        g_{d,l} = (\sum_{l} W_{l}^{T}P_{d,l}W_{l})^{-1} W_{l}^{T}P_{d,l}

    where :math:`W_{l}` is a banded window matrix and :math:`P_{d,l}` is a
    diagonal precision matrix.

    Assuming the variances are diagonals, MLPG can be performed in
    dimention-by-dimention efficiently.

    Let :math:`o_{d}` be ``T`` dimentional back-propagated gradients, the
    resulting gradients :math:`g'_{l,d}` to be propagated are
    computed as follows:

    .. math::

        g'_{d,l} = o_{d}^{T} g_{d,l}

    Attributes:
        static_dim (int): number of static dimentions
        variance_frames (torch.FloatTensor): Variances same as in
            :func:`nnmnkwii.functions.mlpg`.
        windows (list): same as in :func:`nnmnkwii.functions.mlpg`.

    TODO:
        CUDA implementation

    See also:
        :func:`nnmnkwii.functions.mlpg`.
    """

    def __init__(self, static_dim, variance_frames, windows):
        super(MLPG, self).__init__()
        self.static_dim = static_dim
        self.windows = windows
        self.variance_frames = variance_frames

    def forward(self, mean_frames):
        assert mean_frames.dim() == 2  # we cannot do MLPG on minibatch
        variance_frames = self.variance_frames
        self.save_for_backward(mean_frames)

        T, D = mean_frames.size()
        assert mean_frames.size() == variance_frames.size()
        assert self.static_dim == D // len(self.windows)

        mean_frames_np = mean_frames.numpy()
        variance_frames_np = variance_frames.numpy()
        y = F.mlpg(mean_frames_np, variance_frames_np, self.windows)
        y = torch.from_numpy(y.astype(np.float32))
        return y

    def backward(self, grad_output):
        mean_frames, = self.saved_tensors
        variance_frames = self.variance_frames

        T, D = mean_frames.size()
        win_mats = build_win_mats(self.windows, T)

        grads = torch.zeros(T, D)
        for d in range(self.static_dim):
            sdw = max([win_mat.l + win_mat.u for win_mat in win_mats])

            # R: \sum_{l} W_{l}^{T}P_{d,l}W_{l}
            R = bm.zeros(sdw, sdw, T)  # overwritten in the loop

            # dtype = np.float64 for bandmat
            precisions = np.zeros((len(self.windows), T), dtype=np.float64)

            for win_idx, win_mat in enumerate(win_mats):
                precisions[win_idx] = 1 / \
                    variance_frames[:, win_idx * self.static_dim + d].numpy()

                bm.dot_mm_plus_equals(win_mat.T, win_mat,
                                      target_bm=R, diag=precisions[win_idx])

            for win_idx, win_mat in enumerate(win_mats):
                # r: W_{l}^{T}P_{d,l}
                r = bm.dot_mm(win_mat.T, bm.diag(precisions[win_idx]))

                # grad_{d, l} = R^{-1r}
                grad = np.linalg.solve(R.full(), r.full())
                assert grad.shape == (T, T)

                # Finally we get grad for a particular dimention
                grads[:, win_idx * self.static_dim +
                      d] = torch.from_numpy(grad_output[:, d].numpy().T.dot(grad))

        return grads


[docs]def mlpg(mean_frames, variance_frames, windows):
    """Maximum Liklihood Paramter Generation (MLPG).

    The parameters are almost same as :func:`nnmnkwii.functions.mlpg` expects.
    The differences are:

    - The function assumes ``mean_frames`` as :obj:`torch.autograd.Variable`
      instead of :obj:`numpy.ndarray`.
    - The fucntion assumes ``variances_frames`` as :obj:`torch.FloatTensor`　
      instead of :obj:`numpy.ndarray`.

    Args:
        mean_frames (torch.autograd.Variable): Means
        variance_frames (torch.FloatTensor): Variances
        windows (list): A sequence of window specification

    See also:
        :func:`nnmnkwii.functions.mlpg`

    """
    T, D = mean_frames.size()
    assert mean_frames.size() == variance_frames.size()
    static_dim = D // len(windows)
    return MLPG(static_dim, variance_frames, windows)(mean_frames)