interprete/test_live.py
2026-02-26 23:17:44 -05:00

63 lines
1.9 KiB
Python

import sounddevice as sd
import numpy as np
import queue
import threading
from faster_whisper import WhisperModel
# --- Config ---
MODEL_SIZE = "base"
SAMPLE_RATE = 16000 # Whisper expects 16kHz
CHUNK_SECONDS = 3 # record N seconds, then transcribe
DEVICE = None # None = default mic, or set to device index
# --- Setup ---
model = WhisperModel(MODEL_SIZE, device="cuda", compute_type="int8")
audio_queue = queue.Queue()
def audio_callback(indata, frames, time, status):
"""Called by sounddevice for each audio chunk."""
if status:
print("Audio status:", status)
audio_queue.put(indata.copy())
def transcribe_loop():
"""Continuously pull audio from queue and transcribe."""
print("Listening... (Ctrl+C to stop)\n")
buffer = []
while True:
chunk = audio_queue.get()
buffer.append(chunk)
# Once we have enough seconds of audio, transcribe
total_samples = sum(c.shape[0] for c in buffer)
if total_samples >= SAMPLE_RATE * CHUNK_SECONDS:
audio_data = np.concatenate(buffer, axis=0).flatten().astype(np.float32)
buffer = []
segments, _ = model.transcribe(
audio_data,
language="en",
vad_filter=True # ignore silence
)
text = " ".join(s.text for s in segments).strip()
if text:
print(f">> {text}")
# --- Run ---
t = threading.Thread(target=transcribe_loop, daemon=True)
t.start()
with sd.InputStream(
samplerate=16000, # audio gets resampled to 16k by ffmpeg anyway
channels=1,
dtype="float32",
blocksize=int(SAMPLE_RATE * 0.5), # 0.5s blocks fed to callback
device=None, # use default device from system
callback=audio_callback
):
try:
threading.Event().wait() # block main thread forever
except KeyboardInterrupt:
print("\nStopped.")