interprete/test_live.py

import sounddevice as sd
import numpy as np
import queue
import threading
from faster_whisper import WhisperModel

# --- Config ---
MODEL_SIZE = "base"
SAMPLE_RATE = 16000      # Whisper expects 16kHz
CHUNK_SECONDS = 3        # record N seconds, then transcribe
DEVICE = None            # None = default mic, or set to device index

# --- Setup ---
model = WhisperModel(MODEL_SIZE, device="cuda", compute_type="int8")
audio_queue = queue.Queue()

def audio_callback(indata, frames, time, status):
    """Called by sounddevice for each audio chunk."""
    if status:
        print("Audio status:", status)
    audio_queue.put(indata.copy())

def transcribe_loop():
    """Continuously pull audio from queue and transcribe."""
    print("Listening... (Ctrl+C to stop)\n")
    buffer = []

    while True:
        chunk = audio_queue.get()
        buffer.append(chunk)

        # Once we have enough seconds of audio, transcribe
        total_samples = sum(c.shape[0] for c in buffer)
        if total_samples >= SAMPLE_RATE * CHUNK_SECONDS:
            audio_data = np.concatenate(buffer, axis=0).flatten().astype(np.float32)
            buffer = []

            segments, _ = model.transcribe(
                audio_data,
                language="en",
                vad_filter=True  # ignore silence
            )

            text = " ".join(s.text for s in segments).strip()
            if text:
                print(f">> {text}")

# --- Run ---
t = threading.Thread(target=transcribe_loop, daemon=True)
t.start()

with sd.InputStream(
    samplerate=16000, # audio gets resampled to 16k by ffmpeg anyway
    channels=1,
    dtype="float32",
    blocksize=int(SAMPLE_RATE * 0.5),  # 0.5s blocks fed to callback
    device=None, # use default device from system
    callback=audio_callback
):
    try:
        threading.Event().wait()  # block main thread forever
    except KeyboardInterrupt:
        print("\nStopped.")