diff --git a/main.py b/main.py index cbd6bcf..08ee2cb 100644 --- a/main.py +++ b/main.py @@ -1,5 +1,6 @@ import sounddevice as sd import numpy as np +import time import queue import threading import argparse @@ -12,7 +13,7 @@ def audio_callback(indata, frames, time, status): print(f"Audio Status: {status}") audio_queue.put(indata.copy()) -def transcribe_loop(): +def transcribe_loop(stop_event): """Listen for chunks of audio, transcribe, and save to file.""" sample_rate = 16000 chunk_seconds = 3 @@ -23,9 +24,12 @@ def transcribe_loop(): buffer = [] print(">> Transcribing") - while True: - buffer.append(audio_queue.get()) # get the latest data from the audio queue + while not stop_event.is_set(): + try: + buffer.append(audio_queue.get(timeout=1)) # get the latest data from the audio queue + except: + continue # check if there is enought audio for transcription total_samples = sum(c.shape[0] for c in buffer) if total_samples >= sample_rate * chunk_seconds: @@ -43,7 +47,7 @@ def transcribe_loop(): if text: print(text) -def listen(): +def listen(stop_event): print(">> Listening") with sd.InputStream( samplerate=16000, @@ -53,20 +57,24 @@ def listen(): device=None, # default system device callback=audio_callback): try: - threading.Event().wait() + while not stop_event.is_set(): + time.sleep(0.1) except KeyboardInterrupt: print("\nListening Thread Stopped") def main(args): + stop_event = threading.Event() + # Start listening thread - t_listen = threading.Thread(target=listen) + t_listen = threading.Thread(target=listen, args=(stop_event,), daemon=True) t_listen.start() # Start transcription thread - t_transcribe = threading.Thread(target=transcribe_loop) + t_transcribe = threading.Thread(target=transcribe_loop, args=(stop_event,), daemon=True) t_transcribe.start() + try: threading.Event().wait() except KeyboardInterrupt: @@ -77,7 +85,7 @@ if __name__ == "__main__": prog='Interprete', description='Live EN->FR Voice-to-text Translation', epilog='Well... go and use it now.') - parser.add_argument('-w', '--wmodel', default="base") + parser.add_argument('-w', '--wmodel', default="small") parser.add_argument('--whisperdevice', default="cpu") args = parser.parse_args()