speech2text.py
This script specifically will reverse the audio (mp3/wav). Then, will convert the speech to text. Can detect audio specifically english and malay
import argparse
from pydub import AudioSegment
import speech_recognition as sr
import os
def convert_to_wav(input_file, output_file):
# Load the input audio file (MP3 or WAV) and convert it to WAV
audio = AudioSegment.from_file(input_file)
audio.export(output_file, format="wav")
def reverse_wav(input_file, output_file):
# Load the WAV file
audio = AudioSegment.from_wav(input_file)
# Reverse the audio
reversed_audio = audio.reverse()
# Save the reversed audio to a new file
reversed_audio.export(output_file, format="wav")
def convert_speech_to_text(wav_file, language='ms'):
recognizer = sr.Recognizer()
# Load the WAV file
with sr.AudioFile(wav_file) as source:
# Record the audio
audio_data = recognizer.record(source)
# Recognize speech using Google Web Speech API
try:
text = recognizer.recognize_google(audio_data, language=language)
return text
except sr.UnknownValueError:
return "Speech Recognition could not understand audio"
except sr.RequestError as e:
return f"Could not request results; {e}"
def main():
# Set up argument parser
parser = argparse.ArgumentParser(description="Reverse an audio file (WAV or MP3) and convert Malay speech to text.")
parser.add_argument('input_file', type=str, help="Path to the input audio file (WAV or MP3)")
args = parser.parse_args()
input_file = args.input_file
base_name, ext = os.path.splitext(input_file)
if ext.lower() not in ['.wav', '.mp3']:
print("Unsupported file format. Please provide a WAV or MP3 file.")
return
# Temporary WAV file for processing
temp_wav = base_name + '_temp.wav'
reversed_wav = base_name + '_reversed.wav'
# Convert MP3 to WAV if needed
if ext.lower() == '.mp3':
convert_to_wav(input_file, temp_wav)
else:
temp_wav = input_file
# Reverse the WAV file
try:
reverse_wav(temp_wav, reversed_wav)
except Exception as e:
print(f"Error reversing audio file: {e}")
return
# Convert the reversed WAV file to text with Malay language
text = convert_speech_to_text(reversed_wav, language='ms')
print("Transcribed Text:")
print(text)
# Clean up temporary files
if ext.lower() == '.mp3':
os.remove(temp_wav)
os.remove(reversed_wav)
if __name__ == "__main__":
main()
Last updated