WinRT TTS API support

2025-12-24 12:22:56 -05:00 · 2020-12-14 21:49:57 -06:00 · 2020-12-14 21:49:57 -06:00 · 56703e5f3a
commit 56703e5f3a
parent 165007180a
2 changed files with 64 additions and 1 deletions
--- a/qt/aqt/sound.py
+++ b/qt/aqt/sound.py
@ -916,9 +916,10 @@ def setup_audio(taskman: TaskManager, base_folder: str) -> None:

        av_player.players.append(MacTTSPlayer(taskman))
    elif isWin:
-        from aqt.tts import WindowsTTSPlayer
+        from aqt.tts import WindowsTTSPlayer, WindowsRTTTSFilePlayer

        av_player.players.append(WindowsTTSPlayer(taskman))
+        av_player.players.append(WindowsRTTTSFilePlayer(taskman))

    # cleanup at shutdown
    atexit.register(av_player.shutdown)
--- a/qt/aqt/tts.py
+++ b/qt/aqt/tts.py
@ -28,6 +28,8 @@ from __future__ import annotations
 import os
 import re
 import subprocess
+import threading
+import asyncio
 from concurrent.futures import Future
 from dataclasses import dataclass
 from operator import attrgetter
@ -516,3 +518,63 @@ if isWin:
            "eg. 1.5 -> 15, 0.5 -> -5"
            speed = (speed * 10) - 10
            return int(max(-10, min(10, speed)))
+
+    @dataclass
+    class WindowsRTVoice(TTSVoice):
+        id: Any
+
+    class WindowsRTTTSFilePlayer(TTSProcessPlayer):
+        voice_list = None
+        tmppath = os.path.join(tmpdir(), "tts.wav")
+
+        def import_voices(self) -> None:
+            import winrt.windows.media.speechsynthesis as speechsynthesis
+            self.voice_list = speechsynthesis.SpeechSynthesizer.get_all_voices()
+        def get_available_voices(self) -> List[TTSVoice]:
+            t = threading.Thread(target=self.import_voices)
+            t.start()
+            t.join()
+            return list(map(self._voice_to_object, self.voice_list))
+        def _voice_to_object(self, voice: Any) -> TTSVoice:
+            return WindowsRTVoice(id=voice.id, name=voice.display_name.replace(" ", "_"), lang=voice.language.replace("-", "_"))
+        def _play(self, tag: AVTag) -> None:
+            assert isinstance(tag, TTSTag)
+            match = self.voice_for_tag(tag)
+            assert match
+            voice = cast(WindowsRTVoice, match.voice)
+
+            self._taskman.run_on_main(
+                lambda: gui_hooks.av_player_did_begin_playing(self, tag)
+            )
+            asyncio.run(self.speakText(tag, voice.id))
+        def _on_done(self, ret: Future, cb: OnDoneCallback):
+            ret.result()
+
+            # inject file into the top of the audio queue
+            from aqt.sound import av_player
+
+            av_player.insert_file(self.tmppath)
+
+            # then tell player to advance, which will cause the file to be played
+            cb()
+        async def speakText(self, tag: TTSTag, voice_id):
+            import winrt.windows.media.speechsynthesis as speechsynthesis
+            import winrt.windows.storage.streams as streams
+            synthesizer = speechsynthesis.SpeechSynthesizer()
+
+            voices = speechsynthesis.SpeechSynthesizer.get_all_voices()
+            voice_match = next(filter(lambda v: v.id == voice_id, voices))
+
+            assert voice_match
+
+            synthesizer.voice = voice_match
+            synthesizer.options.speaking_rate = tag.speed
+
+            stream = await synthesizer.synthesize_text_to_stream_async(tag.field_text)
+            inputStream = stream.get_input_stream_at(0)
+            dataReader = streams.DataReader(inputStream)
+            dataReader.load_async(stream.size)
+            f = open(self.tmppath, 'wb')
+            for x in range(stream.size):
+                f.write(bytes([dataReader.read_byte()]))
+            f.close()