Start new clean programm to combine transcription and translation
This commit is contained in:
parent
f026debfa0
commit
82b86216e9
1 changed files with 85 additions and 0 deletions
85
main.py
Normal file
85
main.py
Normal file
|
|
@ -0,0 +1,85 @@
|
||||||
|
import sounddevice as sd
|
||||||
|
import numpy as np
|
||||||
|
import queue
|
||||||
|
import threading
|
||||||
|
import argparse
|
||||||
|
from faster_whisper import WhisperModel
|
||||||
|
|
||||||
|
audio_queue = queue.Queue()
|
||||||
|
|
||||||
|
def audio_callback(indata, frames, time, status):
|
||||||
|
if status:
|
||||||
|
print(f"Audio Status: {status}")
|
||||||
|
audio_queue.put(indata.copy())
|
||||||
|
|
||||||
|
def transcribe_loop():
|
||||||
|
"""Listen for chunks of audio, transcribe, and save to file."""
|
||||||
|
sample_rate = 16000
|
||||||
|
chunk_seconds = 3
|
||||||
|
|
||||||
|
# Setup whisper
|
||||||
|
wmodel = WhisperModel(args.wmodel, device=args.whisperdevice, compute_type="int8")
|
||||||
|
|
||||||
|
buffer = []
|
||||||
|
|
||||||
|
print(">> Transcribing")
|
||||||
|
while True:
|
||||||
|
buffer.append(audio_queue.get()) # get the latest data from the audio queue
|
||||||
|
|
||||||
|
# check if there is enought audio for transcription
|
||||||
|
total_samples = sum(c.shape[0] for c in buffer)
|
||||||
|
if total_samples >= sample_rate * chunk_seconds:
|
||||||
|
# get the audio data and empty the buffer
|
||||||
|
audio_data = np.concatenate(buffer, axis=0).flatten().astype(np.float32)
|
||||||
|
buffer = []
|
||||||
|
|
||||||
|
segments, _ = wmodel.transcribe(
|
||||||
|
audio_data,
|
||||||
|
language="en",
|
||||||
|
vad_filter=True
|
||||||
|
)
|
||||||
|
|
||||||
|
text = " ".join(s.text for s in segments).strip()
|
||||||
|
if text:
|
||||||
|
print(text)
|
||||||
|
|
||||||
|
def listen():
|
||||||
|
print(">> Listening")
|
||||||
|
with sd.InputStream(
|
||||||
|
samplerate=16000,
|
||||||
|
channels=1,
|
||||||
|
dtype="float32",
|
||||||
|
blocksize=8000,
|
||||||
|
device=None, # default system device
|
||||||
|
callback=audio_callback):
|
||||||
|
try:
|
||||||
|
threading.Event().wait()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("\nListening Thread Stopped")
|
||||||
|
|
||||||
|
def main(args):
|
||||||
|
|
||||||
|
# Start listening thread
|
||||||
|
t_listen = threading.Thread(target=listen)
|
||||||
|
t_listen.start()
|
||||||
|
|
||||||
|
# Start transcription thread
|
||||||
|
t_transcribe = threading.Thread(target=transcribe_loop)
|
||||||
|
t_transcribe.start()
|
||||||
|
|
||||||
|
try:
|
||||||
|
threading.Event().wait()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("Main Thread Stopped")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
prog='Interprete',
|
||||||
|
description='Live EN->FR Voice-to-text Translation',
|
||||||
|
epilog='Well... go and use it now.')
|
||||||
|
parser.add_argument('-w', '--wmodel', default="base")
|
||||||
|
parser.add_argument('--whisperdevice', default="cpu")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
main(args)
|
||||||
|
print("Terminating.")
|
||||||
Loading…
Add table
Add a link
Reference in a new issue