interprete/main.py

93 lines
2.7 KiB
Python

import sounddevice as sd
import numpy as np
import time
import queue
import threading
import argparse
from faster_whisper import WhisperModel
audio_queue = queue.Queue()
def audio_callback(indata, frames, time, status):
if status:
print(f"Audio Status: {status}")
audio_queue.put(indata.copy())
def transcribe_loop(stop_event):
"""Listen for chunks of audio, transcribe, and save to file."""
sample_rate = 16000
chunk_seconds = 3
# Setup whisper
wmodel = WhisperModel(args.wmodel, device=args.whisperdevice, compute_type="int8")
buffer = []
print(">> Transcribing")
while not stop_event.is_set():
try:
buffer.append(audio_queue.get(timeout=1)) # get the latest data from the audio queue
except:
continue
# check if there is enought audio for transcription
total_samples = sum(c.shape[0] for c in buffer)
if total_samples >= sample_rate * chunk_seconds:
# get the audio data and empty the buffer
audio_data = np.concatenate(buffer, axis=0).flatten().astype(np.float32)
buffer = []
segments, _ = wmodel.transcribe(
audio_data,
language="en",
vad_filter=True
)
text = " ".join(s.text for s in segments).strip()
if text:
print(text)
def listen(stop_event):
print(">> Listening")
with sd.InputStream(
samplerate=16000,
channels=1,
dtype="float32",
blocksize=8000,
device=None, # default system device
callback=audio_callback):
try:
while not stop_event.is_set():
time.sleep(0.1)
except KeyboardInterrupt:
print("\nListening Thread Stopped")
def main(args):
stop_event = threading.Event()
# Start listening thread
t_listen = threading.Thread(target=listen, args=(stop_event,), daemon=True)
t_listen.start()
# Start transcription thread
t_transcribe = threading.Thread(target=transcribe_loop, args=(stop_event,), daemon=True)
t_transcribe.start()
try:
threading.Event().wait()
except KeyboardInterrupt:
print("Main Thread Stopped")
if __name__ == "__main__":
parser = argparse.ArgumentParser(
prog='Interprete',
description='Live EN->FR Voice-to-text Translation',
epilog='Well... go and use it now.')
parser.add_argument('-w', '--wmodel', default="small")
parser.add_argument('--whisperdevice', default="cpu")
args = parser.parse_args()
main(args)
print("Terminating.")