first live transcription test code
This commit is contained in:
commit
1432c5b400
2 changed files with 66 additions and 0 deletions
3
requirements.txt
Normal file
3
requirements.txt
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
numpy
|
||||||
|
sounddevice
|
||||||
|
faster-whisper
|
||||||
63
test_live.py
Normal file
63
test_live.py
Normal file
|
|
@ -0,0 +1,63 @@
|
||||||
|
import sounddevice as sd
|
||||||
|
import numpy as np
|
||||||
|
import queue
|
||||||
|
import threading
|
||||||
|
from faster_whisper import WhisperModel
|
||||||
|
|
||||||
|
# --- Config ---
|
||||||
|
MODEL_SIZE = "base"
|
||||||
|
SAMPLE_RATE = 16000 # Whisper expects 16kHz
|
||||||
|
CHUNK_SECONDS = 3 # record N seconds, then transcribe
|
||||||
|
DEVICE = None # None = default mic, or set to device index
|
||||||
|
|
||||||
|
# --- Setup ---
|
||||||
|
model = WhisperModel(MODEL_SIZE, device="cpu", compute_type="int8")
|
||||||
|
audio_queue = queue.Queue()
|
||||||
|
|
||||||
|
def audio_callback(indata, frames, time, status):
|
||||||
|
"""Called by sounddevice for each audio chunk."""
|
||||||
|
if status:
|
||||||
|
print("Audio status:", status)
|
||||||
|
audio_queue.put(indata.copy())
|
||||||
|
|
||||||
|
def transcribe_loop():
|
||||||
|
"""Continuously pull audio from queue and transcribe."""
|
||||||
|
print("Listening... (Ctrl+C to stop)\n")
|
||||||
|
buffer = []
|
||||||
|
|
||||||
|
while True:
|
||||||
|
chunk = audio_queue.get()
|
||||||
|
buffer.append(chunk)
|
||||||
|
|
||||||
|
# Once we have enough seconds of audio, transcribe
|
||||||
|
total_samples = sum(c.shape[0] for c in buffer)
|
||||||
|
if total_samples >= SAMPLE_RATE * CHUNK_SECONDS:
|
||||||
|
audio_data = np.concatenate(buffer, axis=0).flatten().astype(np.float32)
|
||||||
|
buffer = []
|
||||||
|
|
||||||
|
segments, _ = model.transcribe(
|
||||||
|
audio_data,
|
||||||
|
language="en",
|
||||||
|
vad_filter=True # ignore silence
|
||||||
|
)
|
||||||
|
|
||||||
|
text = " ".join(s.text for s in segments).strip()
|
||||||
|
if text:
|
||||||
|
print(f">> {text}")
|
||||||
|
|
||||||
|
# --- Run ---
|
||||||
|
t = threading.Thread(target=transcribe_loop, daemon=True)
|
||||||
|
t.start()
|
||||||
|
|
||||||
|
with sd.InputStream(
|
||||||
|
samplerate=16000, # audio gets resampled to 16k by ffmpeg anyway
|
||||||
|
channels=1,
|
||||||
|
dtype="float32",
|
||||||
|
blocksize=int(SAMPLE_RATE * 0.5), # 0.5s blocks fed to callback
|
||||||
|
device=None, # use default device from system
|
||||||
|
callback=audio_callback
|
||||||
|
):
|
||||||
|
try:
|
||||||
|
threading.Event().wait() # block main thread forever
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("\nStopped.")
|
||||||
Loading…
Add table
Add a link
Reference in a new issue