Cluster computing

Monday, April 7, 2025

The following script can be used to covert the manuscript of a book into its corresponding audio production.

Option 1: individual chapters

import azure.cognitiveservices.speech as speechsdk

import time

def batch_text_to_speech(text, output_filename):

# Azure Speech Service configuration

speech_key = "<use-your-speech-key>"

service_region = "eastus"

# Configure speech synthesis

speech_config = speechsdk.SpeechConfig(

subscription=speech_key,

region=service_region

)

# Set output format to MP3

speech_config.set_speech_synthesis_output_format(speechsdk.SpeechSynthesisOutputFormat.Audio48Khz192KBitRateMonoMp3)

speech_config.speech_synthesis_voice_name = "en-US-BrianMultilingualNeural"

# Create audio config for file output

audio_config = speechsdk.audio.AudioOutputConfig(filename=output_filename)

# Create speech synthesizer

synthesizer = speechsdk.SpeechSynthesizer(

speech_config=speech_config,

audio_config=audio_config

)

# Split text into chunks if needed (optional)

# text_chunks = split_large_text(text)

# Synthesize text

result = synthesizer.speak_text_async(text).get()

if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:

print(f"Audio synthesized to {output_filename}")

elif result.reason == speechsdk.ResultReason.Canceled:

cancellation_details = result.cancellation_details

print(f"Speech synthesis canceled: {cancellation_details.reason}")

if cancellation_details.reason == speechsdk.CancellationReason.Error:

print(f"Error details: {cancellation_details.error_details}")

def split_large_text(text, max_length=9000):

return [text[i:i+max_length] for i in range(0, len(text), max_length)]

input_filename = ""

large_text = ""

for i in range(1,100):

input_filename=f"{i}.txt"

print(input_filename)

if input_filename:

with open(input_filename, "r") as fin:

large_text = fin.read()

print(str(len(large_text)) + " " + input_filename.replace("txt","mp3"))

batch_text_to_speech(large_text, input_filename.replace("txt","mp3"))

Option 2. Whole manuscript:

import requests import json import time from docx import Document import os import uuid

# Azure AI Language Service configuration

endpoint = "https://eastus.api.cognitive.microsoft.com/texttospeech/batchsyntheses/JOBID?api-version=2024-04-01" api_key = "<your_api_key>"

headers = {

"Content-Type": "application/json",

"Ocp-Apim-Subscription-Key": api_key

}

def synthesize_text(inputs):

body = {

"inputKind": "PlainText", # or SSML

'synthesisConfig': {

"voice": "en-US-BrianMultilingualNeural",

# Replace with your custom voice name and deployment ID if you want to use custom voice.

# Multiple voices are supported, the mixture of custom voices and platform voices is allowed.

# Invalid voice name or deployment ID will be rejected.

'customVoices': {

# "YOUR_CUSTOM_VOICE_NAME": "YOUR_CUSTOM_VOICE_ID" }, "inputs": inputs,

"properties": {

"outputFormat": "audio-48khz-192kbitrate-mono-mp3"

}

response = requests.put(endpoint.replace("JOBID", str(uuid.uuid4())), headers=headers, json=body)

if response.status_code < 400:

jobId = f'{response.json()["id"]}'

return jobId

else:

raise Exception(f"Failed to start batch synthesis job: {response.text}")

def get_synthesis(job_id: str):

while True:

url = f'https://eastus.api.cognitive.microsoft.com/texttospeech/batchsyntheses/{job_id}?api-version=2024-04-01'

headers = { "Content-Type": "application/json", "Ocp-Apim-Subscription-Key": api_key }

response = requests.get(url, headers=headers)

if response.status_code < 400:

status = response.json()['status']

if "Succeeded" in status:

return response.json()

else:

print(f'batch synthesis job is still running, status [{status}]')

time.sleep(5) # Wait for 5 seconds before checking again

def get_text(file_path):

with open(file_path, 'r') as file:

file_contents = file.read()

print(f"Length of text: {len(file_contents)}")

return file_contents

if name == "main":

input_file_name = ""

large_text = ""

inputs = []

for i in range(1,100):

input_file_name=f"{i}.txt"

print(input_file_name)

if input_file_name:

document_text = get_text(input_file_name)

inputs += [ { "content": document_text }, ]

jobId = synthesize_text(inputs)

print(jobId)

# Get audio result

audio = get_synthesis(jobId)

print("Result:")

print(audio)

Cluster computing

Monday, April 7, 2025

No comments:

Post a Comment