This follows up on a previous article to split a large text for use with text-to-speech api:
import azure.cognitiveservices.speech as speechsdk
import io
import wave
def split_text(text, max_chunk_size=5000):
"""Split text into chunks of approximately max_chunk_size characters."""
words = text.split()
chunks = []
current_chunk = []
current_size = 0
for word in words:
if current_size + len(word) + 1 > max_chunk_size:
chunks.append(' '.join(current_chunk))
current_chunk = [word]
current_size = len(word)
else:
current_chunk.append(word)
current_size += len(word) + 1
if current_chunk:
chunks.append(' '.join(current_chunk))
return chunks
def synthesize_text(speech_synthesizer, text):
"""Synthesize speech from text."""
result = speech_synthesizer.speak_text_async(text).get()
if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
return result.audio_data
else:
print(f"Speech synthesis failed: {result.reason}")
return None
def combine_audio(audio_chunks):
"""Combine multiple audio chunks into a single WAV file."""
combined = io.BytesIO()
with wave.open(combined, 'wb') as wav_file:
for i, audio_chunk in enumerate(audio_chunks):
if i == 0:
# Set parameters from the first chunk
with wave.open(io.BytesIO(audio_chunk), 'rb') as first_chunk:
params = first_chunk.getparams()
wav_file.setparams(params)
wav_file.writeframes(audio_chunk)
return combined.getvalue()
def process_large_text(text, speech_key, service_region):
"""Process large text by splitting, synthesizing, and combining audio."""
speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)
speech_config.set_speech_synthesis_output_format(speechsdk.SpeechSynthesisOutputFormat.Riff24Khz16BitMonoPcm)
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=None)
chunks = split_text(text)
audio_chunks = []
for chunk in chunks:
audio_data = synthesize_text(speech_synthesizer, chunk)
if audio_data:
audio_chunks.append(audio_data)
if audio_chunks:
combined_audio = combine_audio(audio_chunks)
return combined_audio
else:
return None
# Usage example
if __name__ == "__main__":
speech_key = "YOUR_SPEECH_KEY"
service_region = "YOUR_SERVICE_REGION"
large_text = "Your very large text goes here... " * 1000 # Example of a large text
result = process_large_text(large_text, speech_key, service_region)
if result:
with open("output.wav", "wb") as audio_file:
audio_file.write(result)
print("Audio file 'output.wav' has been created.")
else:
print("Failed to process the text.")
A large document can be split into text as shown:
from docx import Document import os
input_file = Document1.docx'
output_file = 'Text1.txt'
def process_large_file(input_file_path, output_file_path):
try:
doc = Document(input_file_path)
print(f"Number of paragraphs: {len(doc.paragraphs)}")
with open(output_file_path, 'a', encoding='utf-8') as output_file:
for para in doc.paragraphs: chunk = para.text
if chunk:
output_file.write(chunk)
output_file.write("\r\n")
except Exception as e: print(f"An error occurred: {e}")
process_large_file(input_file, output_file)
print(f"Text has been extracted from {input_file} and written to {output_file}")
--
https://ezcloudiac.com/info/index.html
No comments:
Post a Comment