From b4b1c2013f8712a0fb77bc07243d6d6e03af89a7 Mon Sep 17 00:00:00 2001
From: Damien Elmes <gpg@ankiweb.net>
Date: Thu, 4 Sep 2025 12:54:02 +1000
Subject: [PATCH] Use the audio input device's preferred format

19f9afba644c5e219e7305c87d48887d59db4a5d broke recording for devices that
only support a single channel. Instead of hard-coding the values, we should
be using what the device prefers.

Apparently some devices may only support float formats, so conversion code
has been added to handle that case as well.

https://forums.ankiweb.net/t/cant-record-my-voice-after-upgrading-to-25-7-3/64453
---
 qt/aqt/sound.py | 68 +++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 54 insertions(+), 14 deletions(-)

diff --git a/qt/aqt/sound.py b/qt/aqt/sound.py
index acf531efb..f54ebd3e8 100644
--- a/qt/aqt/sound.py
+++ b/qt/aqt/sound.py
@@ -631,18 +631,44 @@ class QtAudioInputRecorder(Recorder):
         self.mw = mw
         self._parent = parent
 
-        from PyQt6.QtMultimedia import QAudioFormat, QAudioSource  # type: ignore
+        from PyQt6.QtMultimedia import QAudioSource, QMediaDevices  # type: ignore
 
-        format = QAudioFormat()
-        format.setChannelCount(2)
-        format.setSampleRate(44100)
-        format.setSampleFormat(QAudioFormat.SampleFormat.Int16)
+        # Get the default audio input device
+        device = QMediaDevices.defaultAudioInput()
 
-        source = QAudioSource(format, parent)
+        # Try to use Int16 format first (avoids conversion)
+        preferred_format = device.preferredFormat()
+        int16_format = preferred_format
+        int16_format.setSampleFormat(preferred_format.SampleFormat.Int16)
 
+        if device.isFormatSupported(int16_format):
+            # Use Int16 if supported
+            format = int16_format
+        else:
+            # Fall back to device's preferred format
+            format = preferred_format
+
+        # Create the audio source with the chosen format
+        source = QAudioSource(device, format, parent)
+
+        # Store the actual format being used
         self._format = source.format()
         self._audio_input = source
 
+    def _convert_float_to_int16(self, float_buffer: bytearray) -> bytes:
+        """Convert float32 audio samples to int16 format for WAV output."""
+        import struct
+
+        float_count = len(float_buffer) // 4  # 4 bytes per float32
+        floats = struct.unpack(f"{float_count}f", float_buffer)
+
+        # Convert to int16 range, clipping and scaling in one step
+        int16_samples = [
+            max(-32768, min(32767, int(max(-1.0, min(1.0, f)) * 32767))) for f in floats
+        ]
+
+        return struct.pack(f"{len(int16_samples)}h", *int16_samples)
+
     def start(self, on_done: Callable[[], None]) -> None:
         self._iodevice = self._audio_input.start()
         self._buffer = bytearray()
@@ -665,18 +691,32 @@ class QtAudioInputRecorder(Recorder):
                 return
 
             def write_file() -> None:
-                # swallow the first 300ms to allow audio device to quiesce
-                wait = int(44100 * self.STARTUP_DELAY)
-                if len(self._buffer) <= wait:
-                    return
-                self._buffer = self._buffer[wait:]
+                from PyQt6.QtMultimedia import QAudioFormat
 
-                # write out the wave file
+                # swallow the first 300ms to allow audio device to quiesce
+                bytes_per_frame = self._format.bytesPerFrame()
+                frames_to_skip = int(self._format.sampleRate() * self.STARTUP_DELAY)
+                bytes_to_skip = frames_to_skip * bytes_per_frame
+
+                if len(self._buffer) <= bytes_to_skip:
+                    return
+                self._buffer = self._buffer[bytes_to_skip:]
+
+                # Check if we need to convert float samples to int16
+                if self._format.sampleFormat() == QAudioFormat.SampleFormat.Float:
+                    audio_data = self._convert_float_to_int16(self._buffer)
+                    sample_width = 2  # int16 is 2 bytes
+                else:
+                    # For integer formats, use the data as-is
+                    audio_data = bytes(self._buffer)
+                    sample_width = self._format.bytesPerSample()
+
+                # write out the wave file with the correct format parameters
                 wf = wave.open(self.output_path, "wb")
                 wf.setnchannels(self._format.channelCount())
-                wf.setsampwidth(2)
+                wf.setsampwidth(sample_width)
                 wf.setframerate(self._format.sampleRate())
-                wf.writeframes(self._buffer)
+                wf.writeframes(audio_data)
                 wf.close()
 
             def and_then(fut: Future) -> None: