use QtMultimedia for recording instead of PyAudio

The unmute-on-first-duration-change approach is to try to prevent clicks/pops that can happen at the start of recordings. If it doesn't solve the problem, we may need to drop down to the lower-level QAudioInput(). Closes https://github.com/ankitects/help-wanted/issues/23 May fix https://forums.ankiweb.net/t/anki-crashes-periodically-after-clicking-record-audio-button/5824, which I suspect was caused by processEvents()
2025-11-06 04:37:22 -05:00 · 2020-12-16 19:09:45 +10:00 · 2020-12-16 19:09:45 +10:00 · ecb7c1482f
commit ecb7c1482f
parent 3b84a8da40
10 changed files with 141 additions and 186 deletions
--- a/docs/development.md
+++ b/docs/development.md
@ -186,15 +186,6 @@ useful completions for things like i18n.TR.

 Audio playing requires `mpv` or `mplayer` to be in your system path.

-Currently pyaudio is not included as part of the build or the generated wheel
-requirements, so audio recording will not work when running in place. When installing
-the wheels, you can optionally install pyaudio as well.
-
-On Linux/Mac, install the portaudio libs: (`apt install portaudio19-dev` / `brew install portaudio`), then `pip install pyaudio`.
-
-On Windows, install the Python 3.8 wheel from
-https://github.com/ankitects/windows-ci-tools.
-
 Recording also requires `lame` to be in your system path.

 ## Tracing build problems
--- a/docs/linux.md
+++ b/docs/linux.md
@ -72,8 +72,7 @@ This will build Anki and run it in place.
 The first build will take a while, as it downloads and builds a bunch of
 dependencies. When the build is complete, Anki will automatically start.

-To play audio, install mpv. At the time of writing, recording is
-not yet supported, as currently pyaudio is not being installed.
+To play and record audio, install mpv and lame.

 If you or your distro has made ccache the standard compiler, you will need to
 set CC and CXX to point directly to gcc/g++ or clang/clang++ prior to building
--- a/docs/mac.md
+++ b/docs/mac.md
@ -50,8 +50,7 @@ This will build Anki and run it in place.
 The first build will take a while, as it downloads and builds a bunch of
 dependencies. When the build is complete, Anki will automatically start.

-To play audio, use Homebrew to install mpv. At the time of writing, recording is
-not yet supported, as currently pyaudio is not being installed.
+To play audio, use Homebrew to install mpv and lame.

 ## Optimized builds

--- a/docs/windows.md
+++ b/docs/windows.md
@ -81,9 +81,7 @@ This will build Anki and run it in place.
 The first build will take a while, as it downloads and builds a bunch of
 dependencies. When the build is complete, Anki will automatically start.

-To play audio, mpv.exe or mplayer.exe must be on the path. At the time
-of writing, recording is not yet supported, as currently pyaudio is
-not being installed.
+To play and record audio, mpv.exe and lame.exe must be on the path.

 ## Optimized builds

--- a/pylib/mypy.ini
+++ b/pylib/mypy.ini
@ -19,8 +19,6 @@ ignore_missing_imports = True
 ignore_missing_imports = True
 [mypy-distro]
 ignore_missing_imports = True
-[mypy-pyaudio]
-ignore_missing_imports = True
 [mypy-win32api]
 ignore_missing_imports = True
 [mypy-xml.dom]
--- a/qt/aqt/BUILD.bazel
+++ b/qt/aqt/BUILD.bazel
@ -100,7 +100,6 @@ py_wheel(
    python_tag = "py3",
    python_version = ">=3.8",
    requires = [
-        # "pyaudio", # currently off as a pain to install on Windows for code completion
        "beautifulsoup4",
        "requests",
        "send2trash",
--- a/qt/aqt/editor.py
+++ b/qt/aqt/editor.py
@ -27,7 +27,7 @@ from anki.utils import checksum, isLin, isWin, namedtmp, stripHTMLMedia
 from aqt import AnkiQt, gui_hooks
 from aqt.main import ResetReason
 from aqt.qt import *
-from aqt.sound import av_player, getAudio
+from aqt.sound import av_player
 from aqt.theme import theme_manager
 from aqt.utils import (
    TR,
@ -751,17 +751,12 @@ class Editor:
        return self.mw.col.media.writeData(fname, data)

    def onRecSound(self):
-        try:
-            file = getAudio(self.widget)
-        except Exception as e:
-            showWarning(
-                tr(TR.EDITING_COULDNT_RECORD_AUDIO_HAVE_YOU_INSTALLED)
-                + "\n\n"
-                + repr(str(e))
-            )
-            return
-        if file:
-            self.addMedia(file)
+        aqt.sound.record_audio(
+            self.parentWindow,
+            self.mw,
+            True,
+            lambda file: self.addMedia(file, canDelete=True),
+        )

    # Media downloads
    ######################################################################
--- a/qt/aqt/reviewer.py
+++ b/qt/aqt/reviewer.py
@ -17,7 +17,7 @@ from anki.cards import Card
 from anki.utils import stripHTML
 from aqt import AnkiQt, gui_hooks
 from aqt.qt import *
-from aqt.sound import av_player, getAudio, play_clicked_audio
+from aqt.sound import av_player, play_clicked_audio, record_audio
 from aqt.theme import theme_manager
 from aqt.toolbar import BottomBar
 from aqt.utils import (
@ -833,8 +833,12 @@ time = %(time)d;
        tooltip(tr(TR.STUDYING_NOTE_BURIED))

    def onRecordVoice(self) -> None:
-        self._recordedAudio = getAudio(self.mw, encode=False)
-        self.onReplayRecorded()
+        def after_record(path: str):
+            self._recordedAudio = path
+            print(path)
+            self.onReplayRecorded()
+
+        record_audio(self.mw, self.mw.taskman, False, after_record)

    def onReplayRecorded(self) -> None:
        if not self._recordedAudio:
--- a/qt/aqt/sound.py
+++ b/qt/aqt/sound.py
@ -1,5 +1,8 @@
 # Copyright: Ankitects Pty Ltd and contributors
 # License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
+
+from __future__ import annotations
+
 import atexit
 import os
 import re
@ -11,24 +14,21 @@ import wave
 from abc import ABC, abstractmethod
 from concurrent.futures import Future
 from operator import itemgetter
-from typing import Any, Callable, Dict, List, Optional, Tuple
+from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple

 import aqt
 from anki import hooks
 from anki.cards import Card
 from anki.sound import AV_REF_RE, AVTag, SoundOrVideoTag
-from anki.utils import isLin, isMac, isWin
+from anki.utils import isLin, isMac, isWin, namedtmp
 from aqt import gui_hooks
 from aqt.mpv import MPV, MPVBase, MPVCommandError
 from aqt.qt import *
 from aqt.taskman import TaskManager
 from aqt.utils import TR, restoreGeom, saveGeom, showWarning, startup_info, tr

-try:
-    import pyaudio
-except:
-    pyaudio = None
-
+if TYPE_CHECKING:
+    from PyQt5.QtMultimedia import QAudioRecorder

 # AV player protocol
 ##########################################################################
@ -461,163 +461,137 @@ class SimpleMplayerSlaveModePlayer(SimpleMplayerPlayer):
        self.command("pause")


-# PyAudio recording
+# MP3 transcoding
 ##########################################################################


-PYAU_CHANNELS = 1
-PYAU_INPUT_INDEX: Optional[int] = None
+def _encode_mp3(src_wav: str, dst_mp3: str) -> None:
+    cmd = ["lame", src_wav, dst_mp3, "--noreplaygain", "--quiet"]
+    cmd, env = _packagedCmd(cmd)
+    try:
+        retcode = retryWait(subprocess.Popen(cmd, startupinfo=startup_info(), env=env))
+    except Exception as e:
+        raise Exception(tr(TR.MEDIA_ERROR_RUNNING, val=" ").join(cmd)) from e
+    if retcode != 0:
+        raise Exception(tr(TR.MEDIA_ERROR_RUNNING, val=" ").join(cmd))

-processingSrc = "rec.wav"
-processingDst = "rec.mp3"
-recFiles: List[str] = []
-
-processingChain: List[List[str]] = [
-    ["lame", processingSrc, processingDst, "--noreplaygain", "--quiet"],
-]
+    os.unlink(src_wav)


-class _Recorder:
-    def postprocess(self, encode=True) -> None:
-        self.encode = encode
-        for c in processingChain:
-            # print c
-            if not self.encode and c[0] == "lame":
-                continue
-            try:
-                cmd, env = _packagedCmd(c)
-                ret = retryWait(
-                    subprocess.Popen(cmd, startupinfo=startup_info(), env=env)
-                )
-            except:
-                ret = True
-            finally:
-                self.cleanup()
-            if ret:
-                raise Exception(tr(TR.MEDIA_ERROR_RUNNING, val=" ").join(cmd))
+def encode_mp3(mw: aqt.AnkiQt, src_wav: str, on_done: Callable[[str], None]) -> None:
+    "Encode the provided wav file to .mp3, and call on_done() with the path."
+    dst_mp3 = src_wav.replace(".wav", "%d.mp3" % time.time())

-    def cleanup(self) -> None:
-        if os.path.exists(processingSrc):
-            os.unlink(processingSrc)
+    def _on_done(fut: Future):
+        fut.result()
+        on_done(dst_mp3)

+    mw.taskman.run_in_background(lambda: _encode_mp3(src_wav, dst_mp3), _on_done)

-class PyAudioThreadedRecorder(threading.Thread):
-    def __init__(self, startupDelay: float) -> None:
-        threading.Thread.__init__(self)
-        self.startupDelay = startupDelay
-        self.finish = False
-        if isMac and qtminor > 12:
-            # trigger permission prompt
-            from PyQt5.QtMultimedia import QAudioDeviceInfo
-
-            QAudioDeviceInfo.defaultInputDevice()
-
-    def run(self) -> None:
-        chunk = 1024
-        p = pyaudio.PyAudio()
-
-        rate = int(p.get_default_input_device_info()["defaultSampleRate"])
-        wait = int(rate * self.startupDelay)
-        PYAU_FORMAT = pyaudio.paInt16
-
-        stream = p.open(
-            format=PYAU_FORMAT,
-            channels=PYAU_CHANNELS,
-            rate=rate,
-            input=True,
-            input_device_index=PYAU_INPUT_INDEX,
-            frames_per_buffer=chunk,
-        )
-
-        stream.read(wait, exception_on_overflow=False)
-
-        data = b""
-        while not self.finish:
-            data += stream.read(chunk, exception_on_overflow=False)
-        stream.close()
-        p.terminate()
-        wf = wave.open(processingSrc, "wb")
-        wf.setnchannels(PYAU_CHANNELS)
-        wf.setsampwidth(p.get_sample_size(PYAU_FORMAT))
-        wf.setframerate(rate)
-        wf.writeframes(data)
-        wf.close()
-
-
-class PyAudioRecorder(_Recorder):
-
-    # discard first 250ms which may have pops/cracks
-    startupDelay = 0.25
-
-    def __init__(self) -> None:
-        for t in recFiles + [processingSrc, processingDst]:
-            try:
-                os.unlink(t)
-            except OSError:
-                pass
-        self.encode = False
-
-    def start(self) -> None:
-        self.thread = PyAudioThreadedRecorder(startupDelay=self.startupDelay)
-        self.thread.start()
-
-    def stop(self) -> None:
-        self.thread.finish = True
-        self.thread.join()
-
-    def file(self) -> str:
-        if self.encode:
-            tgt = "rec%d.mp3" % time.time()
-            os.rename(processingDst, tgt)
-            return tgt
-        else:
-            return processingSrc
-
-
-Recorder = PyAudioRecorder

 # Recording dialog
 ##########################################################################


-def getAudio(parent: QWidget, encode: bool = True) -> Optional[str]:
-    "Record and return filename"
-    if not pyaudio:
-        showWarning("Please install pyaudio.")
-        return None
-    # record first
-    r = Recorder()
-    mb = QMessageBox(parent)
-    restoreGeom(mb, "audioRecorder")
-    mb.setWindowTitle("Anki")
-    mb.setIconPixmap(QPixmap(":/icons/media-record.png"))
-    but = QPushButton(tr(TR.ACTIONS_SAVE))
-    mb.addButton(but, QMessageBox.AcceptRole)
-    but.setDefault(True)
-    but = QPushButton(tr(TR.ACTIONS_CANCEL))
-    mb.addButton(but, QMessageBox.RejectRole)
-    mb.setEscapeButton(but)
-    t = time.time()
-    r.start()
-    time.sleep(r.startupDelay)
-    QApplication.instance().processEvents()  # type: ignore
-    while not mb.clickedButton():
-        txt = tr(TR.MEDIA_RECORDINGTIME)
-        mb.setText(txt % (time.time() - t))
-        mb.show()
-        QApplication.instance().processEvents()  # type: ignore
-    if mb.clickedButton() == mb.escapeButton():
-        r.stop()
-        r.cleanup()
-        return None
-    saveGeom(mb, "audioRecorder")
-    # ensure at least a second captured
-    while time.time() - t < 1:
-        time.sleep(0.1)
-    r.stop()
-    # process
-    r.postprocess(encode)
-    return r.file()
+class RecordDialog(QDialog):
+    _recorder: QAudioRecorder
+
+    def __init__(
+        self,
+        parent: QWidget,
+        mw: aqt.AnkiQt,
+        on_success: Callable[[str], None],
+    ):
+        QDialog.__init__(self, parent)
+        self._parent = parent
+        self.mw = mw
+        self._on_success = on_success
+
+        self._start_recording()
+        self._setup_dialog()
+
+    def _setup_dialog(self):
+        self.setWindowTitle("Anki")
+        icon = QLabel()
+        icon.setPixmap(QPixmap(":/icons/media-record.png"))
+        self.label = QLabel("")
+        hbox = QHBoxLayout()
+        hbox.addWidget(icon)
+        hbox.addWidget(self.label)
+        v = QVBoxLayout()
+        v.addLayout(hbox)
+        buts = QDialogButtonBox.Save | QDialogButtonBox.Cancel
+        b = QDialogButtonBox(buts)  # type: ignore
+        v.addWidget(b)
+        self.setLayout(v)
+        save_button = b.button(QDialogButtonBox.Save)
+        save_button.setDefault(True)
+        save_button.setAutoDefault(True)
+        qconnect(save_button.clicked, self.accept)
+        cancel_button = b.button(QDialogButtonBox.Cancel)
+        cancel_button.setDefault(False)
+        cancel_button.setAutoDefault(False)
+        qconnect(cancel_button.clicked, self.reject)
+        restoreGeom(self, "audioRecorder2")
+        self.show()
+
+    def _save_diag(self):
+        saveGeom(self, "audioRecorder2")
+
+    def _start_recording(self):
+        from PyQt5.QtMultimedia import QAudioRecorder
+
+        # start recording
+        self._recorder = QAudioRecorder(self._parent)
+        self._output = namedtmp("rec.wav")
+        self._recorder.setEncodingSettings(
+            self._recorder.audioSettings(),
+            self._recorder.videoSettings(),
+            "audio/x-wav",
+        )
+        self._recorder.setOutputLocation(QUrl.fromLocalFile(self._output))
+        self._recorder.setMuted(True)
+        self._recorder.record()
+
+        self._timer = t = QTimer(self._parent)
+        t.timeout.connect(self._on_timer)
+        t.setSingleShot(False)
+        t.start(300)
+
+    def _on_timer(self):
+        duration = self._recorder.duration()
+        # disable mute after recording starts to avoid clicks/pops
+        if duration > 0 and self._recorder.isMuted():
+            self._recorder.setMuted(False)
+        self.label.setText(tr(TR.MEDIA_RECORDINGTIME) % (duration/1000.0))
+
+    def accept(self):
+        try:
+            self._recorder.stop()
+            self._save_diag()
+        finally:
+            QDialog.accept(self)
+
+        self._on_success(self._output)
+
+    def reject(self):
+        try:
+            self._recorder.stop()
+            os.unlink(self._output)
+        finally:
+            QDialog.reject(self)
+
+
+def record_audio(
+    parent: QWidget, mw: aqt.AnkiQt, encode: bool, on_done: Callable[[str], None]
+):
+    def after_record(path: str):
+        if not encode:
+            on_done(path)
+        else:
+            encode_mp3(mw, path, on_done)
+
+    _diag = RecordDialog(parent, mw, after_record)


 # Legacy audio interface
--- a/qt/mypy.ini
+++ b/qt/mypy.ini
@ -22,8 +22,6 @@ ignore_missing_imports = True
 ignore_missing_imports = True
 [mypy-distro]
 ignore_missing_imports = True
-[mypy-pyaudio]
-ignore_missing_imports = True
 [mypy-win32api]
 ignore_missing_imports = True
 [mypy-xml.dom]