Source code for nnmnkwii.datasets.vcc2016

# coding: utf-8
from __future__ import absolute_import, print_function, with_statement

from os import listdir
from os.path import isdir, join, splitext

import numpy as np
from nnmnkwii.datasets import FileDataSource

# List of available speakers.
available_speakers = [
    "SF1",
    "SF2",
    "SF3",
    "SM1",
    "SM2",
    "TF1",
    "TF2",
    "TM1",
    "TM2",
    "TM3",
]


[docs]class WavFileDataSource(FileDataSource): """Wav file data source for Voice Conversion Challenge (VCC) 2016 dataset. The data source collects wav files from VCC2016 dataset. Users are expected to inherit the class and implement ``collect_features`` method, which defines how features are computed given a wav file path. .. note:: VCC2016 datasets are composed of training data and evaluation data, which can be downloaded separately. ``data_root`` should point to the directory that contains both the training and evaluation data. Directory structure should look like for example: .. code-block:: shell > tree -d ~/data/vcc2016/ /home/ryuichi/data/vcc2016/ ├── evaluation_all │   ├── SF1 │   ├── SF2 │   ├── SF3 │   ├── SM1 │   ├── SM2 │   ├── TF1 │   ├── TF2 │   ├── TM1 │   ├── TM2 │   └── TM3 └── vcc2016_training ├── SF1 ├── SF2 ├── SF3 ├── SM1 ├── SM2 ├── TF1 ├── TF2 ├── TM1 ├── TM2 └── TM3 Args: data_root (str): Data root. It's assumed that training and evaluation data are placed at ``${data_root}/vcc2016_training`` and ``${data_root}/evaluation_all``, respectively, by default. speakers (list): List of speakers to find. Supported names of speaker are ``SF1``, ``SF2``, ``SF3``, ``SM1``, ``SM2``, ``TF1``, ``TF2``, ``TM1``, ``TM2`` and ``TM3``. labelmap (dict[optional]): Dict of speaker labels. If None, it's assigned as incrementally (i.e., 0, 1, 2) for specified speakers. max_files (int): Total number of files to be collected. training_data_root: If specified, try to search training data to the directory. If None, set to ``${data_root}/vcc2016_training``. evaluation_data_root: If specified, try to search evaluation data to the directory. If None, set to ``${data_root}/evaluation_all``. training (bool): Whether it collects training data or not. If False, it collects evaluation data. Attributes: labels (numpy.ndarray): Speaker labels paired with collected files. Stored in ``collect_files``. This is useful to build multi-speaker models. """ def __init__( self, data_root, speakers, labelmap=None, max_files=None, training_data_root=None, evaluation_data_root=None, training=True, ): if training_data_root is None: training_data_root = join(data_root, "vcc2016_training") if evaluation_data_root is None: evaluation_data_root = join(data_root, "evaluation_all") for speaker in speakers: if speaker not in available_speakers: raise ValueError( "Unknown speaker '{}'. It should be one of {}".format( speaker, available_speakers ) ) self.data_root = data_root self.training_data_root = training_data_root self.evaluation_data_root = evaluation_data_root self.training = training self.speakers = speakers if labelmap is None: labelmap = {} for idx, speaker in enumerate(speakers): labelmap[speaker] = idx self.labelmap = labelmap self.max_files = max_files self.labels = None
[docs] def collect_files(self): """Collect wav files for specific speakers. Returns: list: List of collected wav files. """ data_root = ( self.training_data_root if self.training else self.evaluation_data_root ) speaker_dirs = list(map(lambda x: join(data_root, x), self.speakers)) paths = [] labels = [] if self.max_files is None: max_files_per_speaker = None else: max_files_per_speaker = self.max_files // len(self.speakers) for (i, d) in enumerate(speaker_dirs): if not isdir(d): raise RuntimeError("{} doesn't exist.".format(d)) files = [join(speaker_dirs[i], f) for f in listdir(d)] files = list(filter(lambda x: splitext(x)[1] == ".wav", files)) files = sorted(files) files = files[:max_files_per_speaker] for f in files[:max_files_per_speaker]: paths.append(f) labels.append(self.labelmap[self.speakers[i]]) self.labels = np.array(labels, dtype=np.int32) return paths