interprete/main.py

import sounddevice as sd
import numpy as np
import time
import queue
import threading
import argparse
from faster_whisper import WhisperModel

audio_queue = queue.Queue()

def audio_callback(indata, frames, time, status):
    if status:
        print(f"Audio Status: {status}")
    audio_queue.put(indata.copy())

def transcribe_loop(stop_event):
    """Listen for chunks of audio, transcribe, and save to file."""
    sample_rate = 16000
    chunk_seconds = 3

    # Setup whisper
    wmodel = WhisperModel(args.wmodel, device=args.whisperdevice, compute_type="int8")

    buffer = []

    print(">> Transcribing")
    while not stop_event.is_set():

        try:
            buffer.append(audio_queue.get(timeout=1)) # get the latest data from the audio queue
        except:
            continue
        # check if there is enought audio for transcription
        total_samples = sum(c.shape[0] for c in buffer)
        if total_samples >= sample_rate * chunk_seconds:
            # get the audio data and empty the buffer
            audio_data = np.concatenate(buffer, axis=0).flatten().astype(np.float32)
            buffer = []

            segments, _ = wmodel.transcribe(
                    audio_data,
                    language="en",
                    vad_filter=True
            )

            text = " ".join(s.text for s in segments).strip()
            if text:
                print(text)

def listen(stop_event):
    print(">> Listening")
    with sd.InputStream(
            samplerate=16000,
            channels=1,
            dtype="float32",
            blocksize=8000,
            device=None, # default system device
            callback=audio_callback):
        try:
            while not stop_event.is_set():
                time.sleep(0.1)
        except KeyboardInterrupt:
            print("\nListening Thread Stopped")

def main(args):

    stop_event = threading.Event()

    # Start listening thread
    t_listen = threading.Thread(target=listen, args=(stop_event,), daemon=True)
    t_listen.start()

    # Start transcription thread
    t_transcribe = threading.Thread(target=transcribe_loop, args=(stop_event,), daemon=True)
    t_transcribe.start()


    try:
        threading.Event().wait()
    except KeyboardInterrupt:
        print("Main Thread Stopped")

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
                    prog='Interprete',
                    description='Live EN->FR Voice-to-text Translation',
                    epilog='Well... go and use it now.')
    parser.add_argument('-w', '--wmodel', default="small")
    parser.add_argument('--whisperdevice', default="cpu")

    args = parser.parse_args()
    main(args)
    print("Terminating.")