commit 1432c5b40024f2e90e0501fdf992269416715f16 Author: grizzly Date: Thu Feb 26 15:54:20 2026 -0500 first live transcription test code diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..df3340d --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +numpy +sounddevice +faster-whisper diff --git a/test_live.py b/test_live.py new file mode 100644 index 0000000..ad2b890 --- /dev/null +++ b/test_live.py @@ -0,0 +1,63 @@ +import sounddevice as sd +import numpy as np +import queue +import threading +from faster_whisper import WhisperModel + +# --- Config --- +MODEL_SIZE = "base" +SAMPLE_RATE = 16000 # Whisper expects 16kHz +CHUNK_SECONDS = 3 # record N seconds, then transcribe +DEVICE = None # None = default mic, or set to device index + +# --- Setup --- +model = WhisperModel(MODEL_SIZE, device="cpu", compute_type="int8") +audio_queue = queue.Queue() + +def audio_callback(indata, frames, time, status): + """Called by sounddevice for each audio chunk.""" + if status: + print("Audio status:", status) + audio_queue.put(indata.copy()) + +def transcribe_loop(): + """Continuously pull audio from queue and transcribe.""" + print("Listening... (Ctrl+C to stop)\n") + buffer = [] + + while True: + chunk = audio_queue.get() + buffer.append(chunk) + + # Once we have enough seconds of audio, transcribe + total_samples = sum(c.shape[0] for c in buffer) + if total_samples >= SAMPLE_RATE * CHUNK_SECONDS: + audio_data = np.concatenate(buffer, axis=0).flatten().astype(np.float32) + buffer = [] + + segments, _ = model.transcribe( + audio_data, + language="en", + vad_filter=True # ignore silence + ) + + text = " ".join(s.text for s in segments).strip() + if text: + print(f">> {text}") + +# --- Run --- +t = threading.Thread(target=transcribe_loop, daemon=True) +t.start() + +with sd.InputStream( + samplerate=16000, # audio gets resampled to 16k by ffmpeg anyway + channels=1, + dtype="float32", + blocksize=int(SAMPLE_RATE * 0.5), # 0.5s blocks fed to callback + device=None, # use default device from system + callback=audio_callback +): + try: + threading.Event().wait() # block main thread forever + except KeyboardInterrupt: + print("\nStopped.")