first live transcription test code
This commit is contained in:
commit
1432c5b400
2 changed files with 66 additions and 0 deletions
3
requirements.txt
Normal file
3
requirements.txt
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
numpy
|
||||
sounddevice
|
||||
faster-whisper
|
||||
63
test_live.py
Normal file
63
test_live.py
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
import sounddevice as sd
|
||||
import numpy as np
|
||||
import queue
|
||||
import threading
|
||||
from faster_whisper import WhisperModel
|
||||
|
||||
# --- Config ---
|
||||
MODEL_SIZE = "base"
|
||||
SAMPLE_RATE = 16000 # Whisper expects 16kHz
|
||||
CHUNK_SECONDS = 3 # record N seconds, then transcribe
|
||||
DEVICE = None # None = default mic, or set to device index
|
||||
|
||||
# --- Setup ---
|
||||
model = WhisperModel(MODEL_SIZE, device="cpu", compute_type="int8")
|
||||
audio_queue = queue.Queue()
|
||||
|
||||
def audio_callback(indata, frames, time, status):
|
||||
"""Called by sounddevice for each audio chunk."""
|
||||
if status:
|
||||
print("Audio status:", status)
|
||||
audio_queue.put(indata.copy())
|
||||
|
||||
def transcribe_loop():
|
||||
"""Continuously pull audio from queue and transcribe."""
|
||||
print("Listening... (Ctrl+C to stop)\n")
|
||||
buffer = []
|
||||
|
||||
while True:
|
||||
chunk = audio_queue.get()
|
||||
buffer.append(chunk)
|
||||
|
||||
# Once we have enough seconds of audio, transcribe
|
||||
total_samples = sum(c.shape[0] for c in buffer)
|
||||
if total_samples >= SAMPLE_RATE * CHUNK_SECONDS:
|
||||
audio_data = np.concatenate(buffer, axis=0).flatten().astype(np.float32)
|
||||
buffer = []
|
||||
|
||||
segments, _ = model.transcribe(
|
||||
audio_data,
|
||||
language="en",
|
||||
vad_filter=True # ignore silence
|
||||
)
|
||||
|
||||
text = " ".join(s.text for s in segments).strip()
|
||||
if text:
|
||||
print(f">> {text}")
|
||||
|
||||
# --- Run ---
|
||||
t = threading.Thread(target=transcribe_loop, daemon=True)
|
||||
t.start()
|
||||
|
||||
with sd.InputStream(
|
||||
samplerate=16000, # audio gets resampled to 16k by ffmpeg anyway
|
||||
channels=1,
|
||||
dtype="float32",
|
||||
blocksize=int(SAMPLE_RATE * 0.5), # 0.5s blocks fed to callback
|
||||
device=None, # use default device from system
|
||||
callback=audio_callback
|
||||
):
|
||||
try:
|
||||
threading.Event().wait() # block main thread forever
|
||||
except KeyboardInterrupt:
|
||||
print("\nStopped.")
|
||||
Loading…
Add table
Add a link
Reference in a new issue