speech2text.py

This script specifically will reverse the audio (mp3/wav). Then, will convert the speech to text. Can detect audio specifically english and malay

import argparse
from pydub import AudioSegment
import speech_recognition as sr
import os

def convert_to_wav(input_file, output_file):
    # Load the input audio file (MP3 or WAV) and convert it to WAV
    audio = AudioSegment.from_file(input_file)
    audio.export(output_file, format="wav")

def reverse_wav(input_file, output_file):
    # Load the WAV file
    audio = AudioSegment.from_wav(input_file)
    
    # Reverse the audio
    reversed_audio = audio.reverse()
    
    # Save the reversed audio to a new file
    reversed_audio.export(output_file, format="wav")

def convert_speech_to_text(wav_file, language='ms'):
    recognizer = sr.Recognizer()
    
    # Load the WAV file
    with sr.AudioFile(wav_file) as source:
        # Record the audio
        audio_data = recognizer.record(source)
        
        # Recognize speech using Google Web Speech API
        try:
            text = recognizer.recognize_google(audio_data, language=language)
            return text
        except sr.UnknownValueError:
            return "Speech Recognition could not understand audio"
        except sr.RequestError as e:
            return f"Could not request results; {e}"

def main():
    # Set up argument parser
    parser = argparse.ArgumentParser(description="Reverse an audio file (WAV or MP3) and convert Malay speech to text.")
    parser.add_argument('input_file', type=str, help="Path to the input audio file (WAV or MP3)")
    args = parser.parse_args()
    
    input_file = args.input_file
    base_name, ext = os.path.splitext(input_file)
    
    if ext.lower() not in ['.wav', '.mp3']:
        print("Unsupported file format. Please provide a WAV or MP3 file.")
        return

    # Temporary WAV file for processing
    temp_wav = base_name + '_temp.wav'
    reversed_wav = base_name + '_reversed.wav'
    
    # Convert MP3 to WAV if needed
    if ext.lower() == '.mp3':
        convert_to_wav(input_file, temp_wav)
    else:
        temp_wav = input_file

    # Reverse the WAV file
    try:
        reverse_wav(temp_wav, reversed_wav)
    except Exception as e:
        print(f"Error reversing audio file: {e}")
        return
    
    # Convert the reversed WAV file to text with Malay language
    text = convert_speech_to_text(reversed_wav, language='ms')
    
    print("Transcribed Text:")
    print(text)
    
    # Clean up temporary files
    if ext.lower() == '.mp3':
        os.remove(temp_wav)
    os.remove(reversed_wav)

if __name__ == "__main__":
    main()

Last updated