use QtMultimedia for recording instead of PyAudio

The unmute-on-first-duration-change approach is to try to prevent clicks/pops that can happen at the start of recordings. If it doesn't solve the problem, we may need to drop down to the lower-level QAudioInput(). Closes https://github.com/ankitects/help-wanted/issues/23 May fix https://forums.ankiweb.net/t/anki-crashes-periodically-after-clicking-record-audio-button/5824, which I suspect was caused by processEvents()
2025-12-24 12:22:56 -05:00 · 2020-12-16 19:09:45 +10:00 · 2020-12-16 19:09:45 +10:00 · ecb7c1482f
commit ecb7c1482f
parent 3b84a8da40
10 changed files with 141 additions and 186 deletions
--- a/docs/development.md
+++ b/docs/development.md
@ -186,15 +186,6 @@ useful completions for things like i18n.TR.
 Audio playing requires `mpv` or `mplayer` to be in your system path.
 Currently pyaudio is not included as part of the build or the generated wheel
 requirements, so audio recording will not work when running in place. When installing
 the wheels, you can optionally install pyaudio as well.
 On Linux/Mac, install the portaudio libs: (`apt install portaudio19-dev` / `brew install portaudio`), then `pip install pyaudio`.
 On Windows, install the Python 3.8 wheel from
 https://github.com/ankitects/windows-ci-tools.
 Recording also requires `lame` to be in your system path.
 ## Tracing build problems
--- a/docs/linux.md
+++ b/docs/linux.md
@ -72,8 +72,7 @@ This will build Anki and run it in place.
 The first build will take a while, as it downloads and builds a bunch of
 dependencies. When the build is complete, Anki will automatically start.
-To play audio, install mpv. At the time of writing, recording is
+To play and record audio, install mpv and lame.
 not yet supported, as currently pyaudio is not being installed.
 If you or your distro has made ccache the standard compiler, you will need to
 set CC and CXX to point directly to gcc/g++ or clang/clang++ prior to building
--- a/docs/mac.md
+++ b/docs/mac.md
@ -50,8 +50,7 @@ This will build Anki and run it in place.
 The first build will take a while, as it downloads and builds a bunch of
 dependencies. When the build is complete, Anki will automatically start.
-To play audio, use Homebrew to install mpv. At the time of writing, recording is
+To play audio, use Homebrew to install mpv and lame.
 not yet supported, as currently pyaudio is not being installed.
 ## Optimized builds
--- a/docs/windows.md
+++ b/docs/windows.md
@ -81,9 +81,7 @@ This will build Anki and run it in place.
 The first build will take a while, as it downloads and builds a bunch of
 dependencies. When the build is complete, Anki will automatically start.
-To play audio, mpv.exe or mplayer.exe must be on the path. At the time
+To play and record audio, mpv.exe and lame.exe must be on the path.
 of writing, recording is not yet supported, as currently pyaudio is
 not being installed.
 ## Optimized builds
--- a/pylib/mypy.ini
+++ b/pylib/mypy.ini
@ -19,8 +19,6 @@ ignore_missing_imports = True
 ignore_missing_imports = True
 [mypy-distro]
 ignore_missing_imports = True
 [mypy-pyaudio]
 ignore_missing_imports = True
 [mypy-win32api]
 ignore_missing_imports = True
 [mypy-xml.dom]
--- a/qt/aqt/BUILD.bazel
+++ b/qt/aqt/BUILD.bazel
@ -100,7 +100,6 @@ py_wheel(
    python_tag = "py3",
    python_version = ">=3.8",
    requires = [
        # "pyaudio", # currently off as a pain to install on Windows for code completion
        "beautifulsoup4",
        "requests",
        "send2trash",
--- a/qt/aqt/editor.py
+++ b/qt/aqt/editor.py
@ -27,7 +27,7 @@ from anki.utils import checksum, isLin, isWin, namedtmp, stripHTMLMedia
 from aqt import AnkiQt, gui_hooks
 from aqt.main import ResetReason
 from aqt.qt import *
-from aqt.sound import av_player, getAudio
+from aqt.sound import av_player
 from aqt.theme import theme_manager
 from aqt.utils import (
    TR,
@ -751,17 +751,12 @@ class Editor:
        return self.mw.col.media.writeData(fname, data)
    def onRecSound(self):
-        try:
+        aqt.sound.record_audio(
-            file = getAudio(self.widget)
+            self.parentWindow,
-        except Exception as e:
+            self.mw,
-            showWarning(
+            True,
-                tr(TR.EDITING_COULDNT_RECORD_AUDIO_HAVE_YOU_INSTALLED)
+            lambda file: self.addMedia(file, canDelete=True),
                + "\n\n"
                + repr(str(e))
        )
            return
        if file:
            self.addMedia(file)
    # Media downloads
    ######################################################################
--- a/qt/aqt/reviewer.py
+++ b/qt/aqt/reviewer.py
@ -17,7 +17,7 @@ from anki.cards import Card
 from anki.utils import stripHTML
 from aqt import AnkiQt, gui_hooks
 from aqt.qt import *
-from aqt.sound import av_player, getAudio, play_clicked_audio
+from aqt.sound import av_player, play_clicked_audio, record_audio
 from aqt.theme import theme_manager
 from aqt.toolbar import BottomBar
 from aqt.utils import (
@ -833,9 +833,13 @@ time = %(time)d;
        tooltip(tr(TR.STUDYING_NOTE_BURIED))
    def onRecordVoice(self) -> None:
-        self._recordedAudio = getAudio(self.mw, encode=False)
+        def after_record(path: str):
            self._recordedAudio = path
            print(path)
            self.onReplayRecorded()
        record_audio(self.mw, self.mw.taskman, False, after_record)
    def onReplayRecorded(self) -> None:
        if not self._recordedAudio:
            tooltip(tr(TR.STUDYING_YOU_HAVENT_RECORDED_YOUR_VOICE_YET))
--- a/qt/aqt/sound.py
+++ b/qt/aqt/sound.py
@ -1,5 +1,8 @@
 # Copyright: Ankitects Pty Ltd and contributors
 # License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
 from __future__ import annotations
 import atexit
 import os
 import re
@ -11,24 +14,21 @@ import wave
 from abc import ABC, abstractmethod
 from concurrent.futures import Future
 from operator import itemgetter
-from typing import Any, Callable, Dict, List, Optional, Tuple
+from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple
 import aqt
 from anki import hooks
 from anki.cards import Card
 from anki.sound import AV_REF_RE, AVTag, SoundOrVideoTag
-from anki.utils import isLin, isMac, isWin
+from anki.utils import isLin, isMac, isWin, namedtmp
 from aqt import gui_hooks
 from aqt.mpv import MPV, MPVBase, MPVCommandError
 from aqt.qt import *
 from aqt.taskman import TaskManager
 from aqt.utils import TR, restoreGeom, saveGeom, showWarning, startup_info, tr
-try:
+if TYPE_CHECKING:
-    import pyaudio
+    from PyQt5.QtMultimedia import QAudioRecorder
 except:
    pyaudio = None
 # AV player protocol
 ##########################################################################
@ -461,163 +461,137 @@ class SimpleMplayerSlaveModePlayer(SimpleMplayerPlayer):
        self.command("pause")
-# PyAudio recording
+# MP3 transcoding
 ##########################################################################
-PYAU_CHANNELS = 1
+def _encode_mp3(src_wav: str, dst_mp3: str) -> None:
-PYAU_INPUT_INDEX: Optional[int] = None
+    cmd = ["lame", src_wav, dst_mp3, "--noreplaygain", "--quiet"]
-
+    cmd, env = _packagedCmd(cmd)
 processingSrc = "rec.wav"
 processingDst = "rec.mp3"
 recFiles: List[str] = []
 processingChain: List[List[str]] = [
    ["lame", processingSrc, processingDst, "--noreplaygain", "--quiet"],
 ]
 class _Recorder:
    def postprocess(self, encode=True) -> None:
        self.encode = encode
        for c in processingChain:
            # print c
            if not self.encode and c[0] == "lame":
                continue
    try:
-                cmd, env = _packagedCmd(c)
+        retcode = retryWait(subprocess.Popen(cmd, startupinfo=startup_info(), env=env))
-                ret = retryWait(
+    except Exception as e:
-                    subprocess.Popen(cmd, startupinfo=startup_info(), env=env)
+        raise Exception(tr(TR.MEDIA_ERROR_RUNNING, val=" ").join(cmd)) from e
-                )
+    if retcode != 0:
            except:
                ret = True
            finally:
                self.cleanup()
            if ret:
        raise Exception(tr(TR.MEDIA_ERROR_RUNNING, val=" ").join(cmd))
-    def cleanup(self) -> None:
+    os.unlink(src_wav)
        if os.path.exists(processingSrc):
            os.unlink(processingSrc)
-class PyAudioThreadedRecorder(threading.Thread):
+def encode_mp3(mw: aqt.AnkiQt, src_wav: str, on_done: Callable[[str], None]) -> None:
-    def __init__(self, startupDelay: float) -> None:
+    "Encode the provided wav file to .mp3, and call on_done() with the path."
-        threading.Thread.__init__(self)
+    dst_mp3 = src_wav.replace(".wav", "%d.mp3" % time.time())
        self.startupDelay = startupDelay
        self.finish = False
        if isMac and qtminor > 12:
            # trigger permission prompt
            from PyQt5.QtMultimedia import QAudioDeviceInfo
-            QAudioDeviceInfo.defaultInputDevice()
+    def _on_done(fut: Future):
        fut.result()
        on_done(dst_mp3)
-    def run(self) -> None:
+    mw.taskman.run_in_background(lambda: _encode_mp3(src_wav, dst_mp3), _on_done)
        chunk = 1024
        p = pyaudio.PyAudio()
        rate = int(p.get_default_input_device_info()["defaultSampleRate"])
        wait = int(rate * self.startupDelay)
        PYAU_FORMAT = pyaudio.paInt16
        stream = p.open(
            format=PYAU_FORMAT,
            channels=PYAU_CHANNELS,
            rate=rate,
            input=True,
            input_device_index=PYAU_INPUT_INDEX,
            frames_per_buffer=chunk,
        )
        stream.read(wait, exception_on_overflow=False)
        data = b""
        while not self.finish:
            data += stream.read(chunk, exception_on_overflow=False)
        stream.close()
        p.terminate()
        wf = wave.open(processingSrc, "wb")
        wf.setnchannels(PYAU_CHANNELS)
        wf.setsampwidth(p.get_sample_size(PYAU_FORMAT))
        wf.setframerate(rate)
        wf.writeframes(data)
        wf.close()
 class PyAudioRecorder(_Recorder):
    # discard first 250ms which may have pops/cracks
    startupDelay = 0.25
    def __init__(self) -> None:
        for t in recFiles + [processingSrc, processingDst]:
            try:
                os.unlink(t)
            except OSError:
                pass
        self.encode = False
    def start(self) -> None:
        self.thread = PyAudioThreadedRecorder(startupDelay=self.startupDelay)
        self.thread.start()
    def stop(self) -> None:
        self.thread.finish = True
        self.thread.join()
    def file(self) -> str:
        if self.encode:
            tgt = "rec%d.mp3" % time.time()
            os.rename(processingDst, tgt)
            return tgt
        else:
            return processingSrc
 Recorder = PyAudioRecorder
 # Recording dialog
 ##########################################################################
-def getAudio(parent: QWidget, encode: bool = True) -> Optional[str]:
+class RecordDialog(QDialog):
-    "Record and return filename"
+    _recorder: QAudioRecorder
-    if not pyaudio:
+
-        showWarning("Please install pyaudio.")
+    def __init__(
-        return None
+        self,
-    # record first
+        parent: QWidget,
-    r = Recorder()
+        mw: aqt.AnkiQt,
-    mb = QMessageBox(parent)
+        on_success: Callable[[str], None],
-    restoreGeom(mb, "audioRecorder")
+    ):
-    mb.setWindowTitle("Anki")
+        QDialog.__init__(self, parent)
-    mb.setIconPixmap(QPixmap(":/icons/media-record.png"))
+        self._parent = parent
-    but = QPushButton(tr(TR.ACTIONS_SAVE))
+        self.mw = mw
-    mb.addButton(but, QMessageBox.AcceptRole)
+        self._on_success = on_success
-    but.setDefault(True)
+
-    but = QPushButton(tr(TR.ACTIONS_CANCEL))
+        self._start_recording()
-    mb.addButton(but, QMessageBox.RejectRole)
+        self._setup_dialog()
-    mb.setEscapeButton(but)
+
-    t = time.time()
+    def _setup_dialog(self):
-    r.start()
+        self.setWindowTitle("Anki")
-    time.sleep(r.startupDelay)
+        icon = QLabel()
-    QApplication.instance().processEvents()  # type: ignore
+        icon.setPixmap(QPixmap(":/icons/media-record.png"))
-    while not mb.clickedButton():
+        self.label = QLabel("")
-        txt = tr(TR.MEDIA_RECORDINGTIME)
+        hbox = QHBoxLayout()
-        mb.setText(txt % (time.time() - t))
+        hbox.addWidget(icon)
-        mb.show()
+        hbox.addWidget(self.label)
-        QApplication.instance().processEvents()  # type: ignore
+        v = QVBoxLayout()
-    if mb.clickedButton() == mb.escapeButton():
+        v.addLayout(hbox)
-        r.stop()
+        buts = QDialogButtonBox.Save | QDialogButtonBox.Cancel
-        r.cleanup()
+        b = QDialogButtonBox(buts)  # type: ignore
-        return None
+        v.addWidget(b)
-    saveGeom(mb, "audioRecorder")
+        self.setLayout(v)
-    # ensure at least a second captured
+        save_button = b.button(QDialogButtonBox.Save)
-    while time.time() - t < 1:
+        save_button.setDefault(True)
-        time.sleep(0.1)
+        save_button.setAutoDefault(True)
-    r.stop()
+        qconnect(save_button.clicked, self.accept)
-    # process
+        cancel_button = b.button(QDialogButtonBox.Cancel)
-    r.postprocess(encode)
+        cancel_button.setDefault(False)
-    return r.file()
+        cancel_button.setAutoDefault(False)
        qconnect(cancel_button.clicked, self.reject)
        restoreGeom(self, "audioRecorder2")
        self.show()
    def _save_diag(self):
        saveGeom(self, "audioRecorder2")
    def _start_recording(self):
        from PyQt5.QtMultimedia import QAudioRecorder
        # start recording
        self._recorder = QAudioRecorder(self._parent)
        self._output = namedtmp("rec.wav")
        self._recorder.setEncodingSettings(
            self._recorder.audioSettings(),
            self._recorder.videoSettings(),
            "audio/x-wav",
        )
        self._recorder.setOutputLocation(QUrl.fromLocalFile(self._output))
        self._recorder.setMuted(True)
        self._recorder.record()
        self._timer = t = QTimer(self._parent)
        t.timeout.connect(self._on_timer)
        t.setSingleShot(False)
        t.start(300)
    def _on_timer(self):
        duration = self._recorder.duration()
        # disable mute after recording starts to avoid clicks/pops
        if duration > 0 and self._recorder.isMuted():
            self._recorder.setMuted(False)
        self.label.setText(tr(TR.MEDIA_RECORDINGTIME) % (duration/1000.0))
    def accept(self):
        try:
            self._recorder.stop()
            self._save_diag()
        finally:
            QDialog.accept(self)
        self._on_success(self._output)
    def reject(self):
        try:
            self._recorder.stop()
            os.unlink(self._output)
        finally:
            QDialog.reject(self)
 def record_audio(
    parent: QWidget, mw: aqt.AnkiQt, encode: bool, on_done: Callable[[str], None]
 ):
    def after_record(path: str):
        if not encode:
            on_done(path)
        else:
            encode_mp3(mw, path, on_done)
    _diag = RecordDialog(parent, mw, after_record)
 # Legacy audio interface
--- a/qt/mypy.ini
+++ b/qt/mypy.ini
@ -22,8 +22,6 @@ ignore_missing_imports = True
 ignore_missing_imports = True
 [mypy-distro]
 ignore_missing_imports = True
 [mypy-pyaudio]
 ignore_missing_imports = True
 [mypy-win32api]
 ignore_missing_imports = True
 [mypy-xml.dom]