Start new clean programm to combine transcription and translation

2026-02-27 19:51:37 -05:00 · 2026-02-27 19:51:37 -05:00 · 82b86216e9
commit 82b86216e9
parent f026debfa0
1 changed files with 85 additions and 0 deletions
--- a/main.py
+++ b/main.py
@ -0,0 +1,85 @@
+import sounddevice as sd
+import numpy as np
+import queue
+import threading
+import argparse
+from faster_whisper import WhisperModel
+
+audio_queue = queue.Queue()
+
+def audio_callback(indata, frames, time, status):
+    if status:
+        print(f"Audio Status: {status}")
+    audio_queue.put(indata.copy())
+
+def transcribe_loop():
+    """Listen for chunks of audio, transcribe, and save to file."""
+    sample_rate = 16000
+    chunk_seconds = 3
+
+    # Setup whisper
+    wmodel = WhisperModel(args.wmodel, device=args.whisperdevice, compute_type="int8")
+
+    buffer = []
+
+    print(">> Transcribing")
+    while True:
+        buffer.append(audio_queue.get()) # get the latest data from the audio queue
+
+        # check if there is enought audio for transcription
+        total_samples = sum(c.shape[0] for c in buffer)
+        if total_samples >= sample_rate * chunk_seconds:
+            # get the audio data and empty the buffer
+            audio_data = np.concatenate(buffer, axis=0).flatten().astype(np.float32)
+            buffer = []
+
+            segments, _ = wmodel.transcribe(
+                    audio_data,
+                    language="en",
+                    vad_filter=True
+            )
+
+            text = " ".join(s.text for s in segments).strip()
+            if text:
+                print(text)
+
+def listen():
+    print(">> Listening")
+    with sd.InputStream(
+            samplerate=16000,
+            channels=1,
+            dtype="float32",
+            blocksize=8000,
+            device=None, # default system device
+            callback=audio_callback):
+        try:
+            threading.Event().wait()
+        except KeyboardInterrupt:
+            print("\nListening Thread Stopped")
+
+def main(args):
+
+    # Start listening thread
+    t_listen = threading.Thread(target=listen)
+    t_listen.start()
+    
+    # Start transcription thread
+    t_transcribe = threading.Thread(target=transcribe_loop)
+    t_transcribe.start()
+
+    try:
+        threading.Event().wait()
+    except KeyboardInterrupt:
+        print("Main Thread Stopped")
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+                    prog='Interprete',
+                    description='Live EN->FR Voice-to-text Translation',
+                    epilog='Well... go and use it now.')
+    parser.add_argument('-w', '--wmodel', default="base")
+    parser.add_argument('--whisperdevice', default="cpu")
+    
+    args = parser.parse_args()
+    main(args)
+    print("Terminating.")