import sounddevice as sd import numpy as np import queue import threading from faster_whisper import WhisperModel # --- Config --- MODEL_SIZE = "base" SAMPLE_RATE = 16000 # Whisper expects 16kHz CHUNK_SECONDS = 3 # record N seconds, then transcribe DEVICE = None # None = default mic, or set to device index # --- Setup --- model = WhisperModel(MODEL_SIZE, device="cpu", compute_type="int8") audio_queue = queue.Queue() def audio_callback(indata, frames, time, status): """Called by sounddevice for each audio chunk.""" if status: print("Audio status:", status) audio_queue.put(indata.copy()) def transcribe_loop(): """Continuously pull audio from queue and transcribe.""" print("Listening... (Ctrl+C to stop)\n") buffer = [] while True: chunk = audio_queue.get() buffer.append(chunk) # Once we have enough seconds of audio, transcribe total_samples = sum(c.shape[0] for c in buffer) if total_samples >= SAMPLE_RATE * CHUNK_SECONDS: audio_data = np.concatenate(buffer, axis=0).flatten().astype(np.float32) buffer = [] segments, _ = model.transcribe( audio_data, language="en", vad_filter=True # ignore silence ) text = " ".join(s.text for s in segments).strip() if text: print(f">> {text}") # --- Run --- t = threading.Thread(target=transcribe_loop, daemon=True) t.start() with sd.InputStream( samplerate=16000, # audio gets resampled to 16k by ffmpeg anyway channels=1, dtype="float32", blocksize=int(SAMPLE_RATE * 0.5), # 0.5s blocks fed to callback device=None, # use default device from system callback=audio_callback ): try: threading.Event().wait() # block main thread forever except KeyboardInterrupt: print("\nStopped.")