diff --git a/cuda_env.sh b/cuda_env.sh new file mode 100644 index 0000000..3499c32 --- /dev/null +++ b/cuda_env.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# Add NVIDIA CUDA libraries to LD_LIBRARY_PATH for faster-whisper +NVIDIA_LIB_BASE="$(pwd)/.venv/lib/python3.14/site-packages/nvidia" + +export LD_LIBRARY_PATH="\ +$NVIDIA_LIB_BASE/cublas/lib:\ +$NVIDIA_LIB_BASE/cudnn/lib:\ +$NVIDIA_LIB_BASE/cuda_runtime/lib:\ +$LD_LIBRARY_PATH" + +echo "CUDA library paths set." diff --git a/requirements.txt b/requirements.txt index df3340d..f2f4d6e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,7 @@ numpy sounddevice faster-whisper +transformers +torch +sentencepiece +sacremoses diff --git a/test_live.py b/test_live.py index ad2b890..1a256f7 100644 --- a/test_live.py +++ b/test_live.py @@ -11,7 +11,7 @@ CHUNK_SECONDS = 3 # record N seconds, then transcribe DEVICE = None # None = default mic, or set to device index # --- Setup --- -model = WhisperModel(MODEL_SIZE, device="cpu", compute_type="int8") +model = WhisperModel(MODEL_SIZE, device="cuda", compute_type="int8") audio_queue = queue.Queue() def audio_callback(indata, frames, time, status): diff --git a/test_live_translate.py b/test_live_translate.py new file mode 100644 index 0000000..bc1061f --- /dev/null +++ b/test_live_translate.py @@ -0,0 +1,49 @@ +import time +import torch +from transformers import MarianMTModel, MarianTokenizer + +MODEL_NAME = "Helsinki-NLP/opus-mt-en-fr" +tokenizer = MarianTokenizer.from_pretrained(MODEL_NAME) +model = MarianMTModel.from_pretrained(MODEL_NAME) + +DEVICE = "cuda" if torch.cuda.is_available() else "cpu" +model = model.to(DEVICE) +model.eval() # disable dropout, slightly faster inference + +FILE_PATH = "input.txt" +OUTPUT_PATH = "output_fr.txt" + +def translate(text: str) -> str: + inputs = tokenizer( + text, + return_tensors="pt", + padding=True, + truncation=True, + max_length=512 + ).to(DEVICE) + + with torch.no_grad(): # saves memory, faster on GPU + translated = model.generate(**inputs) + + return tokenizer.decode(translated[0], skip_special_tokens=True) + +def tail_and_translate(filepath: str): + with open(filepath, "r", encoding="utf-8") as f: + f.seek(0, 2) # jump to end of file + print(f"Watching {filepath}...") + + with open(OUTPUT_PATH, "a", encoding="utf-8") as out: + while True: + line = f.readline() + if line: + line = line.strip() + if line: + translated = translate(line) + print(f"EN: {line}") + print(f"FR: {translated}") + out.write(translated + "\n") + out.flush() + else: + time.sleep(0.2) + +tail_and_translate(FILE_PATH)