Cluster computing

Tuesday, March 4, 2025

This follows up on a previous article to split a large text for use with text-to-speech api:

import azure.cognitiveservices.speech as speechsdk

import io

import wave

def split_text(text, max_chunk_size=5000):

"""Split text into chunks of approximately max_chunk_size characters."""

words = text.split()

chunks = []

current_chunk = []

current_size = 0

for word in words:

if current_size + len(word) + 1 > max_chunk_size:

chunks.append(' '.join(current_chunk))

current_chunk = [word]

current_size = len(word)

else:

current_chunk.append(word)

current_size += len(word) + 1

if current_chunk:

chunks.append(' '.join(current_chunk))

return chunks

def synthesize_text(speech_synthesizer, text):

"""Synthesize speech from text."""

result = speech_synthesizer.speak_text_async(text).get()

if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:

return result.audio_data

else:

print(f"Speech synthesis failed: {result.reason}")

return None

def combine_audio(audio_chunks):

"""Combine multiple audio chunks into a single WAV file."""

combined = io.BytesIO()

with wave.open(combined, 'wb') as wav_file:

for i, audio_chunk in enumerate(audio_chunks):

if i == 0:

# Set parameters from the first chunk

with wave.open(io.BytesIO(audio_chunk), 'rb') as first_chunk:

params = first_chunk.getparams()

wav_file.setparams(params)

wav_file.writeframes(audio_chunk)

return combined.getvalue()

def process_large_text(text, speech_key, service_region):

"""Process large text by splitting, synthesizing, and combining audio."""

speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)

speech_config.set_speech_synthesis_output_format(speechsdk.SpeechSynthesisOutputFormat.Riff24Khz16BitMonoPcm)

speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=None)

chunks = split_text(text)

audio_chunks = []

for chunk in chunks:

audio_data = synthesize_text(speech_synthesizer, chunk)

if audio_data:

audio_chunks.append(audio_data)

if audio_chunks:

combined_audio = combine_audio(audio_chunks)

return combined_audio

else:

return None

# Usage example

if __name__ == "__main__":

speech_key = "YOUR_SPEECH_KEY"

service_region = "YOUR_SERVICE_REGION"

large_text = "Your very large text goes here... " * 1000 # Example of a large text

result = process_large_text(large_text, speech_key, service_region)

if result:

with open("output.wav", "wb") as audio_file:

audio_file.write(result)

print("Audio file 'output.wav' has been created.")

else:

print("Failed to process the text.")

A large document can be split into text as shown:

from docx import Document import os

input_file = Document1.docx'

output_file = 'Text1.txt'

def process_large_file(input_file_path, output_file_path):

try:

doc = Document(input_file_path)

print(f"Number of paragraphs: {len(doc.paragraphs)}")

with open(output_file_path, 'a', encoding='utf-8') as output_file:

for para in doc.paragraphs: chunk = para.text

if chunk:

output_file.write(chunk)

output_file.write("\r\n")

except Exception as e: print(f"An error occurred: {e}")

process_large_file(input_file, output_file)

print(f"Text has been extracted from {input_file} and written to {output_file}")

https://ezcloudiac.com/info/index.html

Cluster computing

Tuesday, March 4, 2025

No comments:

Post a Comment