Progress in milestone 2

Have a very basic UI
This commit is contained in:
sam
2026-05-03 16:45:56 +02:00
parent 041955345b
commit dbad2c34d7
5 changed files with 84 additions and 12 deletions

5
client_node/build.rs Normal file
View File

@@ -0,0 +1,5 @@
fn main() {
// Force linking of additional abseil libraries that webrtc-audio-processing-sys
// might miss depending on the system's abseil version.
println!("cargo:rustc-link-lib=absl_strings_internal");
}

View File

@@ -22,7 +22,7 @@ use webrtc_audio_processing_config::{
Config, EchoCanceller, NoiseSuppression, NoiseSuppressionLevel,
};
use super::{FRAME_SIZE, SAMPLE_RATE};
use super::SAMPLE_RATE;
/// RMS threshold below which a frame is considered silence.
/// This provides a simple amplitude-based VAD since the WebRTC v2 API
@@ -37,6 +37,7 @@ const VAD_RMS_THRESHOLD: f32 = 0.01;
pub fn spawn_dsp_thread(
mut consumer: HeapCons<f32>,
ptt_flag: Arc<AtomicBool>,
audio_dumper_flag: Arc<AtomicBool>,
active_speaker_tx: watch::Sender<bool>,
) {
thread::spawn(move || {
@@ -60,21 +61,62 @@ pub fn spawn_dsp_thread(
};
ap.set_config(config);
// Mono capture: one channel with FRAME_SIZE samples.
let mut frame_buf = vec![vec![0.0f32; FRAME_SIZE]];
// WebRTC strictly requires 10ms frames (480 samples at 48kHz).
const DSP_FRAME_SIZE: usize = 480;
let mut frame_buf = vec![vec![0.0f32; DSP_FRAME_SIZE]];
let wav_spec = hound::WavSpec {
channels: 1,
sample_rate: SAMPLE_RATE,
bits_per_sample: 32,
sample_format: hound::SampleFormat::Float,
};
let mut raw_writer: Option<hound::WavWriter<std::io::BufWriter<std::fs::File>>> = None;
let mut dsp_writer: Option<hound::WavWriter<std::io::BufWriter<std::fs::File>>> = None;
loop {
// Wait until we have a full 20ms frame (960 samples at 48kHz).
if consumer.occupied_len() >= FRAME_SIZE {
// Wait until we have a full 10ms frame (480 samples at 48kHz).
if consumer.occupied_len() >= DSP_FRAME_SIZE {
let _ = consumer.pop_slice(&mut frame_buf[0]);
let is_transmitting = ptt_flag.load(Ordering::Relaxed);
let dumper_enabled = audio_dumper_flag.load(Ordering::Relaxed);
let mut raw_frame = None;
if dumper_enabled {
raw_frame = Some(frame_buf.clone());
}
// Run the WebRTC DSP pipeline on the capture frame.
if let Err(e) = ap.process_capture_frame(&mut frame_buf) {
tracing::warn!("APM processing failed: {:?}", e);
}
if dumper_enabled {
if raw_writer.is_none() {
raw_writer = hound::WavWriter::create("raw_mic.wav", wav_spec).ok();
dsp_writer = hound::WavWriter::create("post_dsp.wav", wav_spec).ok();
}
if let (Some(writer), Some(raw)) = (&mut raw_writer, &raw_frame) {
for &sample in &raw[0] {
let _ = writer.write_sample(sample);
}
}
if let Some(writer) = &mut dsp_writer {
for &sample in &frame_buf[0] {
let _ = writer.write_sample(sample);
}
}
} else if raw_writer.is_some() {
// Close writers when disabled
if let Some(writer) = raw_writer.take() {
let _ = writer.finalize();
}
if let Some(writer) = dsp_writer.take() {
let _ = writer.finalize();
}
}
// Simple RMS-based VAD since webrtc-audio-processing v2
// removed the dedicated VoiceDetection config field.
let rms = compute_rms(&frame_buf[0]);

View File

@@ -16,4 +16,5 @@ pub const SAMPLE_RATE: u32 = 48_000;
pub const INPUT_CHANNELS: u16 = 1;
/// The exact number of samples required per frame for Opus (20ms).
#[allow(dead_code)]
pub const FRAME_SIZE: usize = 960;

View File

@@ -28,13 +28,19 @@ fn main() -> Result<()> {
// Setup communication channels
let (active_speaker_tx, active_speaker_rx) = watch::channel(false);
let ptt_flag = Arc::new(AtomicBool::new(false));
let audio_dumper_flag = Arc::new(AtomicBool::new(false));
// Setup lock-free ringbuffer for audio capture (4096 capacity)
let audio_rb = HeapRb::<f32>::new(4096);
let (producer, consumer) = audio_rb.split();
// Spawn DSP and audio capture threads
audio::dsp::spawn_dsp_thread(consumer, ptt_flag.clone(), active_speaker_tx);
audio::dsp::spawn_dsp_thread(
consumer,
ptt_flag.clone(),
audio_dumper_flag.clone(),
active_speaker_tx,
);
let _stream = audio::capture::start_audio_capture(producer).map_err(|e| {
error!("Failed to start audio capture: {:?}", e);
e
@@ -68,7 +74,12 @@ fn main() -> Result<()> {
eframe::run_native(
"Voice App",
options,
Box::new(|_cc| Ok(Box::new(ui::VoiceApp::new(active_speaker_rx)))),
Box::new(|_cc| {
Ok(Box::new(ui::VoiceApp::new(
active_speaker_rx,
audio_dumper_flag,
)))
}),
)
.map_err(|e| anyhow::anyhow!("eframe error: {e:?}"))?;

View File

@@ -8,23 +8,28 @@
//! instead of a raw `egui::Context`.
use eframe::egui;
use std::sync::Arc;
use std::sync::atomic::{AtomicBool, Ordering};
use tokio::sync::watch;
/// The central state for the eframe UI.
pub struct VoiceApp {
/// Receiver for the active speaker state, updated by the DSP thread.
pub active_speaker_rx: watch::Receiver<bool>,
/// Whether the audio dumper is enabled for debugging.
pub audio_dumper_enabled: bool,
/// Shared flag to enable/disable the audio dumper.
pub audio_dumper_flag: Arc<AtomicBool>,
}
impl VoiceApp {
/// Creates a new `VoiceApp` instance.
#[must_use]
pub fn new(active_speaker_rx: watch::Receiver<bool>) -> Self {
pub fn new(
active_speaker_rx: watch::Receiver<bool>,
audio_dumper_flag: Arc<AtomicBool>,
) -> Self {
Self {
active_speaker_rx,
audio_dumper_enabled: false,
audio_dumper_flag,
}
}
}
@@ -55,7 +60,15 @@ impl eframe::App for VoiceApp {
columns[1].separator();
columns[1].heading("Developer Settings");
columns[1].checkbox(&mut self.audio_dumper_enabled, "Enable Audio Dumper (.wav)");
let mut dumper_enabled = self.audio_dumper_flag.load(Ordering::Relaxed);
if columns[1]
.checkbox(&mut dumper_enabled, "Enable Audio Dumper (.wav)")
.changed()
{
self.audio_dumper_flag
.store(dumper_enabled, Ordering::Relaxed);
}
});
// Force continuous repaint so the watch channel updates immediately reflect.