From 84711f8fe21aaaf974650fe5a3ab59700f73392b Mon Sep 17 00:00:00 2001 From: Damien Elmes Date: Tue, 21 Jan 2020 17:34:47 +1000 Subject: [PATCH] add Windows TTS support --- qt/aqt/sound.py | 5 + qt/aqt/tts.py | 258 +++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 261 insertions(+), 2 deletions(-) diff --git a/qt/aqt/sound.py b/qt/aqt/sound.py index 0f974f06f..4ee49ae7c 100644 --- a/qt/aqt/sound.py +++ b/qt/aqt/sound.py @@ -639,5 +639,10 @@ def setup_audio(taskman: TaskManager, base_folder: str) -> None: from aqt.tts import MacTTSPlayer av_player.players.append(MacTTSPlayer(taskman)) + elif isWin: + from aqt.tts import WindowsTTSPlayer + av_player.players.append(WindowsTTSPlayer(taskman)) + + # cleanup at shutdown atexit.register(av_player.shutdown) diff --git a/qt/aqt/tts.py b/qt/aqt/tts.py index 8f29ff880..7ce7ce877 100644 --- a/qt/aqt/tts.py +++ b/qt/aqt/tts.py @@ -30,10 +30,10 @@ import re import subprocess from concurrent.futures import Future from dataclasses import dataclass -from typing import List, Optional +from typing import List, Optional, Any, cast from anki.sound import AVTag, TTSTag -from anki.utils import tmpdir +from anki.utils import tmpdir, isWin from aqt.sound import OnDoneCallback, PlayerInterrupted, SimpleProcessPlayer @@ -178,3 +178,257 @@ class MacTTSFilePlayer(MacTTSPlayer): # then tell player to advance, which will cause the file to be played cb() + +# Windows support +########################################################################## + +@dataclass +class WindowsVoice(TTSVoice): + handle: Any + +if isWin: + import win32com.client + + # language ID map from https://github.com/sindresorhus/lcid/blob/master/lcid.json + LCIDS = { + "4": "zh_CHS", + "1025": "ar_SA", + "1026": "bg_BG", + "1027": "ca_ES", + "1028": "zh_TW", + "1029": "cs_CZ", + "1030": "da_DK", + "1031": "de_DE", + "1032": "el_GR", + "1033": "en_US", + "1034": "es_ES", + "1035": "fi_FI", + "1036": "fr_FR", + "1037": "he_IL", + "1038": "hu_HU", + "1039": "is_IS", + "1040": "it_IT", + "1041": "ja_JP", + "1042": "ko_KR", + "1043": "nl_NL", + "1044": "nb_NO", + "1045": "pl_PL", + "1046": "pt_BR", + "1047": "rm_CH", + "1048": "ro_RO", + "1049": "ru_RU", + "1050": "hr_HR", + "1051": "sk_SK", + "1052": "sq_AL", + "1053": "sv_SE", + "1054": "th_TH", + "1055": "tr_TR", + "1056": "ur_PK", + "1057": "id_ID", + "1058": "uk_UA", + "1059": "be_BY", + "1060": "sl_SI", + "1061": "et_EE", + "1062": "lv_LV", + "1063": "lt_LT", + "1064": "tg_TJ", + "1065": "fa_IR", + "1066": "vi_VN", + "1067": "hy_AM", + "1069": "eu_ES", + "1070": "wen_DE", + "1071": "mk_MK", + "1074": "tn_ZA", + "1076": "xh_ZA", + "1077": "zu_ZA", + "1078": "af_ZA", + "1079": "ka_GE", + "1080": "fo_FO", + "1081": "hi_IN", + "1082": "mt_MT", + "1083": "se_NO", + "1086": "ms_MY", + "1087": "kk_KZ", + "1088": "ky_KG", + "1089": "sw_KE", + "1090": "tk_TM", + "1092": "tt_RU", + "1093": "bn_IN", + "1094": "pa_IN", + "1095": "gu_IN", + "1096": "or_IN", + "1097": "ta_IN", + "1098": "te_IN", + "1099": "kn_IN", + "1100": "ml_IN", + "1101": "as_IN", + "1102": "mr_IN", + "1103": "sa_IN", + "1104": "mn_MN", + "1105": "bo_CN", + "1106": "cy_GB", + "1107": "kh_KH", + "1108": "lo_LA", + "1109": "my_MM", + "1110": "gl_ES", + "1111": "kok_IN", + "1114": "syr_SY", + "1115": "si_LK", + "1118": "am_ET", + "1121": "ne_NP", + "1122": "fy_NL", + "1123": "ps_AF", + "1124": "fil_PH", + "1125": "div_MV", + "1128": "ha_NG", + "1130": "yo_NG", + "1131": "quz_BO", + "1132": "ns_ZA", + "1133": "ba_RU", + "1134": "lb_LU", + "1135": "kl_GL", + "1144": "ii_CN", + "1146": "arn_CL", + "1148": "moh_CA", + "1150": "br_FR", + "1152": "ug_CN", + "1153": "mi_NZ", + "1154": "oc_FR", + "1155": "co_FR", + "1156": "gsw_FR", + "1157": "sah_RU", + "1158": "qut_GT", + "1159": "rw_RW", + "1160": "wo_SN", + "1164": "gbz_AF", + "2049": "ar_IQ", + "2052": "zh_CN", + "2055": "de_CH", + "2057": "en_GB", + "2058": "es_MX", + "2060": "fr_BE", + "2064": "it_CH", + "2067": "nl_BE", + "2068": "nn_NO", + "2070": "pt_PT", + "2077": "sv_FI", + "2080": "ur_IN", + "2092": "az_AZ", + "2094": "dsb_DE", + "2107": "se_SE", + "2108": "ga_IE", + "2110": "ms_BN", + "2115": "uz_UZ", + "2128": "mn_CN", + "2129": "bo_BT", + "2141": "iu_CA", + "2143": "tmz_DZ", + "2155": "quz_EC", + "3073": "ar_EG", + "3076": "zh_HK", + "3079": "de_AT", + "3081": "en_AU", + "3082": "es_ES", + "3084": "fr_CA", + "3098": "sr_SP", + "3131": "se_FI", + "3179": "quz_PE", + "4097": "ar_LY", + "4100": "zh_SG", + "4103": "de_LU", + "4105": "en_CA", + "4106": "es_GT", + "4108": "fr_CH", + "4122": "hr_BA", + "4155": "smj_NO", + "5121": "ar_DZ", + "5124": "zh_MO", + "5127": "de_LI", + "5129": "en_NZ", + "5130": "es_CR", + "5132": "fr_LU", + "5179": "smj_SE", + "6145": "ar_MA", + "6153": "en_IE", + "6154": "es_PA", + "6156": "fr_MC", + "6203": "sma_NO", + "7169": "ar_TN", + "7177": "en_ZA", + "7178": "es_DO", + "7194": "sr_BA", + "7227": "sma_SE", + "8193": "ar_OM", + "8201": "en_JA", + "8202": "es_VE", + "8218": "bs_BA", + "8251": "sms_FI", + "9217": "ar_YE", + "9225": "en_CB", + "9226": "es_CO", + "9275": "smn_FI", + "10241": "ar_SY", + "10249": "en_BZ", + "10250": "es_PE", + "11265": "ar_JO", + "11273": "en_TT", + "11274": "es_AR", + "12289": "ar_LB", + "12297": "en_ZW", + "12298": "es_EC", + "13313": "ar_KW", + "13321": "en_PH", + "13322": "es_CL", + "14337": "ar_AE", + "14346": "es_UR", + "15361": "ar_BH", + "15370": "es_PY", + "16385": "ar_QA", + "16394": "es_BO", + "17417": "en_MY", + "17418": "es_SV", + "18441": "en_IN", + "18442": "es_HN", + "19466": "es_NI", + "20490": "es_PR", + "21514": "es_US", + "31748": "zh_CHT" + } + + def lcid_hex_str_to_lang_code(hex: str) -> str: + dec_str = str(int(hex, 16)) + return LCIDS.get(dec_str, "unknown") + + + class WindowsTTSPlayer(TTSProcessPlayer): + speaker = win32com.client.Dispatch("SAPI.SpVoice") + + def get_available_voices(self) -> List[TTSVoice]: + return list(map(self._voice_to_object, self.speaker.GetVoices())) + + def _voice_to_object(self, voice: Any): + lang = voice.GetAttribute("language") + lang = lcid_hex_str_to_lang_code(lang) + return WindowsVoice(name=voice.GetAttribute("name"), + lang=lang, + handle=voice) + + def _play(self, tag: AVTag) -> None: + match = self.voice_for_tag(tag) + assert match + voice = cast(WindowsVoice, match.voice) + + try: + native_voice = voice.handle + self.speaker.Voice = native_voice + self.speaker.Speak(tag.field_text, 1) + + # wait 100ms + while not self.speaker.WaitUntilDone(100): + if self._terminate_flag: + # stop playing + self.speaker.Skip("Sentence", 2**15) + return + finally: + self._terminate_flag = False +