Source code for ttslearn.tacotron.frontend.text

# 語彙の定義
characters = "abcdefghijklmnopqrstuvwxyz!'(),-.:;? "
# その他特殊記号
extra_symbols = [
    "^",  # 文の先頭を表す特殊記号 <SOS>
    "$",  # 文の末尾を表す特殊記号 <EOS>
]
_pad = "~"

# NOTE: パディングを 0 番目に配置
symbols = [_pad] + extra_symbols + list(characters)

# 文字列⇔数値の相互変換のための辞書
_symbol_to_id = {s: i for i, s in enumerate(symbols)}
_id_to_symbol = {i: s for i, s in enumerate(symbols)}


[docs]def num_vocab(): """Get number of vocabraries Returns: int: Number of vocabraries Examples: >>> from ttslearn.tacotron.frontend.text import num_vocab >>> num_vocab() >>> 40 """ return len(symbols)
[docs]def text_to_sequence(text): """Convert text to sequence of numbers Args: text (str): Input text Returns: list: List of numbers Examples: >>> from ttslearn.tacotron.frontend.text import text_to_sequence >>> text_to_sequence("Hello world") >>> [1, 10, 7, 14, 14, 17, 39, 25, 17, 20, 14, 6, 2] """ # 簡易のため、大文字と小文字を区別せず、全ての大文字を小文字に変換 text = text.lower() # 文頭を表す<SOS> seq = [_symbol_to_id["^"]] # 本文 seq += [_symbol_to_id[s] for s in text] # 文末を表す<EOS> seq.append(_symbol_to_id["$"]) return seq
[docs]def sequence_to_text(seq): """Convert sequence of numbers to text Args: seq (list): Input sequence of numbers Returns: str: Text Examples: >>> from ttslearn.tacotron.frontend.text import sequence_to_text >>> sequence_to_text([1, 10, 7, 14, 14, 17, 39, 25, 17, 20, 14, 6, 2]) >>> ['^', 'h', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', '$'] """ return [_id_to_symbol[s] for s in seq]