import sounddevice as sd import numpy as np import time import queue import threading import argparse from faster_whisper import WhisperModel audio_queue = queue.Queue() def audio_callback(indata, frames, time, status): if status: print(f"Audio Status: {status}") audio_queue.put(indata.copy()) def transcribe_loop(stop_event): """Listen for chunks of audio, transcribe, and save to file.""" sample_rate = 16000 chunk_seconds = 3 # Setup whisper wmodel = WhisperModel(args.wmodel, device=args.whisperdevice, compute_type="int8") buffer = [] print(">> Transcribing") while not stop_event.is_set(): try: buffer.append(audio_queue.get(timeout=1)) # get the latest data from the audio queue except: continue # check if there is enought audio for transcription total_samples = sum(c.shape[0] for c in buffer) if total_samples >= sample_rate * chunk_seconds: # get the audio data and empty the buffer audio_data = np.concatenate(buffer, axis=0).flatten().astype(np.float32) buffer = [] segments, _ = wmodel.transcribe( audio_data, language="en", vad_filter=True ) text = " ".join(s.text for s in segments).strip() if text: print(text) def listen(stop_event): print(">> Listening") with sd.InputStream( samplerate=16000, channels=1, dtype="float32", blocksize=8000, device=None, # default system device callback=audio_callback): try: while not stop_event.is_set(): time.sleep(0.1) except KeyboardInterrupt: print("\nListening Thread Stopped") def main(args): stop_event = threading.Event() # Start listening thread t_listen = threading.Thread(target=listen, args=(stop_event,), daemon=True) t_listen.start() # Start transcription thread t_transcribe = threading.Thread(target=transcribe_loop, args=(stop_event,), daemon=True) t_transcribe.start() try: threading.Event().wait() except KeyboardInterrupt: print("Main Thread Stopped") if __name__ == "__main__": parser = argparse.ArgumentParser( prog='Interprete', description='Live EN->FR Voice-to-text Translation', epilog='Well... go and use it now.') parser.add_argument('-w', '--wmodel', default="small") parser.add_argument('--whisperdevice', default="cpu") args = parser.parse_args() main(args) print("Terminating.")