From b4b1c2013f8712a0fb77bc07243d6d6e03af89a7 Mon Sep 17 00:00:00 2001 From: Damien Elmes Date: Thu, 4 Sep 2025 12:54:02 +1000 Subject: [PATCH] Use the audio input device's preferred format 19f9afba644c5e219e7305c87d48887d59db4a5d broke recording for devices that only support a single channel. Instead of hard-coding the values, we should be using what the device prefers. Apparently some devices may only support float formats, so conversion code has been added to handle that case as well. https://forums.ankiweb.net/t/cant-record-my-voice-after-upgrading-to-25-7-3/64453 --- qt/aqt/sound.py | 68 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 54 insertions(+), 14 deletions(-) diff --git a/qt/aqt/sound.py b/qt/aqt/sound.py index acf531efb..f54ebd3e8 100644 --- a/qt/aqt/sound.py +++ b/qt/aqt/sound.py @@ -631,18 +631,44 @@ class QtAudioInputRecorder(Recorder): self.mw = mw self._parent = parent - from PyQt6.QtMultimedia import QAudioFormat, QAudioSource # type: ignore + from PyQt6.QtMultimedia import QAudioSource, QMediaDevices # type: ignore - format = QAudioFormat() - format.setChannelCount(2) - format.setSampleRate(44100) - format.setSampleFormat(QAudioFormat.SampleFormat.Int16) + # Get the default audio input device + device = QMediaDevices.defaultAudioInput() - source = QAudioSource(format, parent) + # Try to use Int16 format first (avoids conversion) + preferred_format = device.preferredFormat() + int16_format = preferred_format + int16_format.setSampleFormat(preferred_format.SampleFormat.Int16) + if device.isFormatSupported(int16_format): + # Use Int16 if supported + format = int16_format + else: + # Fall back to device's preferred format + format = preferred_format + + # Create the audio source with the chosen format + source = QAudioSource(device, format, parent) + + # Store the actual format being used self._format = source.format() self._audio_input = source + def _convert_float_to_int16(self, float_buffer: bytearray) -> bytes: + """Convert float32 audio samples to int16 format for WAV output.""" + import struct + + float_count = len(float_buffer) // 4 # 4 bytes per float32 + floats = struct.unpack(f"{float_count}f", float_buffer) + + # Convert to int16 range, clipping and scaling in one step + int16_samples = [ + max(-32768, min(32767, int(max(-1.0, min(1.0, f)) * 32767))) for f in floats + ] + + return struct.pack(f"{len(int16_samples)}h", *int16_samples) + def start(self, on_done: Callable[[], None]) -> None: self._iodevice = self._audio_input.start() self._buffer = bytearray() @@ -665,18 +691,32 @@ class QtAudioInputRecorder(Recorder): return def write_file() -> None: - # swallow the first 300ms to allow audio device to quiesce - wait = int(44100 * self.STARTUP_DELAY) - if len(self._buffer) <= wait: - return - self._buffer = self._buffer[wait:] + from PyQt6.QtMultimedia import QAudioFormat - # write out the wave file + # swallow the first 300ms to allow audio device to quiesce + bytes_per_frame = self._format.bytesPerFrame() + frames_to_skip = int(self._format.sampleRate() * self.STARTUP_DELAY) + bytes_to_skip = frames_to_skip * bytes_per_frame + + if len(self._buffer) <= bytes_to_skip: + return + self._buffer = self._buffer[bytes_to_skip:] + + # Check if we need to convert float samples to int16 + if self._format.sampleFormat() == QAudioFormat.SampleFormat.Float: + audio_data = self._convert_float_to_int16(self._buffer) + sample_width = 2 # int16 is 2 bytes + else: + # For integer formats, use the data as-is + audio_data = bytes(self._buffer) + sample_width = self._format.bytesPerSample() + + # write out the wave file with the correct format parameters wf = wave.open(self.output_path, "wb") wf.setnchannels(self._format.channelCount()) - wf.setsampwidth(2) + wf.setsampwidth(sample_width) wf.setframerate(self._format.sampleRate()) - wf.writeframes(self._buffer) + wf.writeframes(audio_data) wf.close() def and_then(fut: Future) -> None: