Hello all, I need kindly your help to develop a python program which converts speech to text. I am using whisper, torch and sys modules but the app does not seems to work correctly yet. Consequently, I do not know yet how to implement and approach this problem yet.
This is my code
import whisper
import torch
import sys
def transcribe_audio(audio_path, language="el"):
"""
Transcribes the given audio file to text in the specified language using Whisper.
Args:
audio_path (str): Path to the audio file.
language (str): Language code for transcription (default is 'el' for Greek).
Returns:
str: The transcribed text.
"""
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
model = whisper.load_model("small", device=device)
print(f"Transcribing {audio_path} in language: {language}...")
result = model.transcribe(audio_path, language=language, task="transcribe")
return result["text"]
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python speech_to_text.py <audio_file_path>")
sys.exit(1)
audio_file = sys.argv[1]
greek_text = transcribe_audio(audio_file, language="el")
print("Transcribed Greek Text:")
print(greek_text)
Thank you in advance,
Alex