Progress in milestone 2

Have a very basic UI
2026-05-03 16:45:56 +02:00
parent 041955345b
commit dbad2c34d7
5 changed files with 84 additions and 12 deletions
--- a/client_node/build.rs
+++ b/client_node/build.rs
@@ -0,0 +1,5 @@
+fn main() {
+    // Force linking of additional abseil libraries that webrtc-audio-processing-sys
+    // might miss depending on the system's abseil version.
+    println!("cargo:rustc-link-lib=absl_strings_internal");
+}
--- a/client_node/src/audio/dsp.rs
+++ b/client_node/src/audio/dsp.rs
@@ -22,7 +22,7 @@ use webrtc_audio_processing_config::{
    Config, EchoCanceller, NoiseSuppression, NoiseSuppressionLevel,
 };

-use super::{FRAME_SIZE, SAMPLE_RATE};
+use super::SAMPLE_RATE;

 /// RMS threshold below which a frame is considered silence.
 /// This provides a simple amplitude-based VAD since the WebRTC v2 API
@@ -37,6 +37,7 @@ const VAD_RMS_THRESHOLD: f32 = 0.01;
 pub fn spawn_dsp_thread(
    mut consumer: HeapCons<f32>,
    ptt_flag: Arc<AtomicBool>,
+    audio_dumper_flag: Arc<AtomicBool>,
    active_speaker_tx: watch::Sender<bool>,
 ) {
    thread::spawn(move || {
@@ -60,21 +61,62 @@ pub fn spawn_dsp_thread(
        };
        ap.set_config(config);

-        // Mono capture: one channel with FRAME_SIZE samples.
-        let mut frame_buf = vec![vec![0.0f32; FRAME_SIZE]];
+        // WebRTC strictly requires 10ms frames (480 samples at 48kHz).
+        const DSP_FRAME_SIZE: usize = 480;
+        let mut frame_buf = vec![vec![0.0f32; DSP_FRAME_SIZE]];
+
+        let wav_spec = hound::WavSpec {
+            channels: 1,
+            sample_rate: SAMPLE_RATE,
+            bits_per_sample: 32,
+            sample_format: hound::SampleFormat::Float,
+        };
+        let mut raw_writer: Option<hound::WavWriter<std::io::BufWriter<std::fs::File>>> = None;
+        let mut dsp_writer: Option<hound::WavWriter<std::io::BufWriter<std::fs::File>>> = None;

        loop {
-            // Wait until we have a full 20ms frame (960 samples at 48kHz).
-            if consumer.occupied_len() >= FRAME_SIZE {
+            // Wait until we have a full 10ms frame (480 samples at 48kHz).
+            if consumer.occupied_len() >= DSP_FRAME_SIZE {
                let _ = consumer.pop_slice(&mut frame_buf[0]);

                let is_transmitting = ptt_flag.load(Ordering::Relaxed);
+                let dumper_enabled = audio_dumper_flag.load(Ordering::Relaxed);
+
+                let mut raw_frame = None;
+                if dumper_enabled {
+                    raw_frame = Some(frame_buf.clone());
+                }

                // Run the WebRTC DSP pipeline on the capture frame.
                if let Err(e) = ap.process_capture_frame(&mut frame_buf) {
                    tracing::warn!("APM processing failed: {:?}", e);
                }

+                if dumper_enabled {
+                    if raw_writer.is_none() {
+                        raw_writer = hound::WavWriter::create("raw_mic.wav", wav_spec).ok();
+                        dsp_writer = hound::WavWriter::create("post_dsp.wav", wav_spec).ok();
+                    }
+                    if let (Some(writer), Some(raw)) = (&mut raw_writer, &raw_frame) {
+                        for &sample in &raw[0] {
+                            let _ = writer.write_sample(sample);
+                        }
+                    }
+                    if let Some(writer) = &mut dsp_writer {
+                        for &sample in &frame_buf[0] {
+                            let _ = writer.write_sample(sample);
+                        }
+                    }
+                } else if raw_writer.is_some() {
+                    // Close writers when disabled
+                    if let Some(writer) = raw_writer.take() {
+                        let _ = writer.finalize();
+                    }
+                    if let Some(writer) = dsp_writer.take() {
+                        let _ = writer.finalize();
+                    }
+                }
+
                // Simple RMS-based VAD since webrtc-audio-processing v2
                // removed the dedicated VoiceDetection config field.
                let rms = compute_rms(&frame_buf[0]);
--- a/client_node/src/audio/mod.rs
+++ b/client_node/src/audio/mod.rs
@@ -16,4 +16,5 @@ pub const SAMPLE_RATE: u32 = 48_000;
 pub const INPUT_CHANNELS: u16 = 1;

 /// The exact number of samples required per frame for Opus (20ms).
+#[allow(dead_code)]
 pub const FRAME_SIZE: usize = 960;
--- a/client_node/src/main.rs
+++ b/client_node/src/main.rs
@@ -28,13 +28,19 @@ fn main() -> Result<()> {
    // Setup communication channels
    let (active_speaker_tx, active_speaker_rx) = watch::channel(false);
    let ptt_flag = Arc::new(AtomicBool::new(false));
+    let audio_dumper_flag = Arc::new(AtomicBool::new(false));

    // Setup lock-free ringbuffer for audio capture (4096 capacity)
    let audio_rb = HeapRb::<f32>::new(4096);
    let (producer, consumer) = audio_rb.split();

    // Spawn DSP and audio capture threads
-    audio::dsp::spawn_dsp_thread(consumer, ptt_flag.clone(), active_speaker_tx);
+    audio::dsp::spawn_dsp_thread(
+        consumer,
+        ptt_flag.clone(),
+        audio_dumper_flag.clone(),
+        active_speaker_tx,
+    );
    let _stream = audio::capture::start_audio_capture(producer).map_err(|e| {
        error!("Failed to start audio capture: {:?}", e);
        e
@@ -68,7 +74,12 @@ fn main() -> Result<()> {
    eframe::run_native(
        "Voice App",
        options,
-        Box::new(|_cc| Ok(Box::new(ui::VoiceApp::new(active_speaker_rx)))),
+        Box::new(|_cc| {
+            Ok(Box::new(ui::VoiceApp::new(
+                active_speaker_rx,
+                audio_dumper_flag,
+            )))
+        }),
    )
    .map_err(|e| anyhow::anyhow!("eframe error: {e:?}"))?;

--- a/client_node/src/ui/app.rs
+++ b/client_node/src/ui/app.rs
@@ -8,23 +8,28 @@
 //! instead of a raw `egui::Context`.

 use eframe::egui;
+use std::sync::Arc;
+use std::sync::atomic::{AtomicBool, Ordering};
 use tokio::sync::watch;

 /// The central state for the eframe UI.
 pub struct VoiceApp {
    /// Receiver for the active speaker state, updated by the DSP thread.
    pub active_speaker_rx: watch::Receiver<bool>,
-    /// Whether the audio dumper is enabled for debugging.
-    pub audio_dumper_enabled: bool,
+    /// Shared flag to enable/disable the audio dumper.
+    pub audio_dumper_flag: Arc<AtomicBool>,
 }

 impl VoiceApp {
    /// Creates a new `VoiceApp` instance.
    #[must_use]
-    pub fn new(active_speaker_rx: watch::Receiver<bool>) -> Self {
+    pub fn new(
+        active_speaker_rx: watch::Receiver<bool>,
+        audio_dumper_flag: Arc<AtomicBool>,
+    ) -> Self {
        Self {
            active_speaker_rx,
-            audio_dumper_enabled: false,
+            audio_dumper_flag,
        }
    }
 }
@@ -55,7 +60,15 @@ impl eframe::App for VoiceApp {

            columns[1].separator();
            columns[1].heading("Developer Settings");
-            columns[1].checkbox(&mut self.audio_dumper_enabled, "Enable Audio Dumper (.wav)");
+
+            let mut dumper_enabled = self.audio_dumper_flag.load(Ordering::Relaxed);
+            if columns[1]
+                .checkbox(&mut dumper_enabled, "Enable Audio Dumper (.wav)")
+                .changed()
+            {
+                self.audio_dumper_flag
+                    .store(dumper_enabled, Ordering::Relaxed);
+            }
        });

        // Force continuous repaint so the watch channel updates immediately reflect.