first live transcription test code

2026-02-26 15:54:20 -05:00 · 2026-02-26 15:54:20 -05:00 · 1432c5b400
commit 1432c5b400
2 changed files with 66 additions and 0 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,3 @@
 numpy
 sounddevice
 faster-whisper
--- a/test_live.py
+++ b/test_live.py
@ -0,0 +1,63 @@
 import sounddevice as sd
 import numpy as np
 import queue
 import threading
 from faster_whisper import WhisperModel
 # --- Config ---
 MODEL_SIZE = "base"
 SAMPLE_RATE = 16000      # Whisper expects 16kHz
 CHUNK_SECONDS = 3        # record N seconds, then transcribe
 DEVICE = None            # None = default mic, or set to device index
 # --- Setup ---
 model = WhisperModel(MODEL_SIZE, device="cpu", compute_type="int8")
 audio_queue = queue.Queue()
 def audio_callback(indata, frames, time, status):
    """Called by sounddevice for each audio chunk."""
    if status:
        print("Audio status:", status)
    audio_queue.put(indata.copy())
 def transcribe_loop():
    """Continuously pull audio from queue and transcribe."""
    print("Listening... (Ctrl+C to stop)\n")
    buffer = []
    while True:
        chunk = audio_queue.get()
        buffer.append(chunk)
        # Once we have enough seconds of audio, transcribe
        total_samples = sum(c.shape[0] for c in buffer)
        if total_samples >= SAMPLE_RATE * CHUNK_SECONDS:
            audio_data = np.concatenate(buffer, axis=0).flatten().astype(np.float32)
            buffer = []
            segments, _ = model.transcribe(
                audio_data,
                language="en",
                vad_filter=True  # ignore silence
            )
            text = " ".join(s.text for s in segments).strip()
            if text:
                print(f">> {text}")
 # --- Run ---
 t = threading.Thread(target=transcribe_loop, daemon=True)
 t.start()
 with sd.InputStream(
    samplerate=16000, # audio gets resampled to 16k by ffmpeg anyway
    channels=1,
    dtype="float32",
    blocksize=int(SAMPLE_RATE * 0.5),  # 0.5s blocks fed to callback
    device=None, # use default device from system
    callback=audio_callback
 ):
    try:
        threading.Event().wait()  # block main thread forever
    except KeyboardInterrupt:
        print("\nStopped.")