diff --git a/main.py b/main.py new file mode 100644 index 0000000..cbd6bcf --- /dev/null +++ b/main.py @@ -0,0 +1,85 @@ +import sounddevice as sd +import numpy as np +import queue +import threading +import argparse +from faster_whisper import WhisperModel + +audio_queue = queue.Queue() + +def audio_callback(indata, frames, time, status): + if status: + print(f"Audio Status: {status}") + audio_queue.put(indata.copy()) + +def transcribe_loop(): + """Listen for chunks of audio, transcribe, and save to file.""" + sample_rate = 16000 + chunk_seconds = 3 + + # Setup whisper + wmodel = WhisperModel(args.wmodel, device=args.whisperdevice, compute_type="int8") + + buffer = [] + + print(">> Transcribing") + while True: + buffer.append(audio_queue.get()) # get the latest data from the audio queue + + # check if there is enought audio for transcription + total_samples = sum(c.shape[0] for c in buffer) + if total_samples >= sample_rate * chunk_seconds: + # get the audio data and empty the buffer + audio_data = np.concatenate(buffer, axis=0).flatten().astype(np.float32) + buffer = [] + + segments, _ = wmodel.transcribe( + audio_data, + language="en", + vad_filter=True + ) + + text = " ".join(s.text for s in segments).strip() + if text: + print(text) + +def listen(): + print(">> Listening") + with sd.InputStream( + samplerate=16000, + channels=1, + dtype="float32", + blocksize=8000, + device=None, # default system device + callback=audio_callback): + try: + threading.Event().wait() + except KeyboardInterrupt: + print("\nListening Thread Stopped") + +def main(args): + + # Start listening thread + t_listen = threading.Thread(target=listen) + t_listen.start() + + # Start transcription thread + t_transcribe = threading.Thread(target=transcribe_loop) + t_transcribe.start() + + try: + threading.Event().wait() + except KeyboardInterrupt: + print("Main Thread Stopped") + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + prog='Interprete', + description='Live EN->FR Voice-to-text Translation', + epilog='Well... go and use it now.') + parser.add_argument('-w', '--wmodel', default="base") + parser.add_argument('--whisperdevice', default="cpu") + + args = parser.parse_args() + main(args) + print("Terminating.")