Saturday, March 8, 2025

 

This is a continuation of articles on text to speech Azure AI services. The earlier article1 discussed the production of mp3 audio for short texts up to 5000 characters that can be sent via a single API call. This article discusses the conversion of large text with the help of Batch Synthesis API for text to speech. This is helpful for the creation of AudioBooks and even though Speechify ranks high in AI Voice Generator and ElevenLabs ranks high in AI Voice cloning, this will be good enough.

 

import requests

import json

import time

from docx import Document

import os

import uuid

 

# Azure AI Language Service configuration

endpoint = "https://<your_region>.api.cognitive.microsoft.com/texttospeech/batchsyntheses/JOBID?api-version=2024-04-01"

api_key = "<your_api_key>"

 

headers = {

    "Content-Type": "application/json",

    "Ocp-Apim-Subscription-Key": api_key

}

 

def synthesize_text(inputs):

    body = {

        "inputKind": "PlainText", # or SSML

        'synthesisConfig': {

            "voice": "en-US-GuyNeural",

        },

        # Replace with your custom voice name and deployment ID if you want to use custom voice.

        # Multiple voices are supported, the mixture of custom voices and platform voices is allowed.

        # Invalid voice name or deployment ID will be rejected.

        'customVoices': {

            # "YOUR_CUSTOM_VOICE_NAME": "YOUR_CUSTOM_VOICE_ID"

        },

        "inputs": inputs,

        "properties": {

            "outputFormat": "audio-48khz-192kbitrate-mono-mp3"

        }

    }

 

    response = requests.put(endpoint.replace("JOBID", str(uuid.uuid4())), headers=headers, json=body)

    if response.status_code < 400:

        jobId = f'{response.json()["id"]}'

        return jobId

    else:

        raise Exception(f"Failed to start batch synthesis job: {response.text}")

 

def get_synthesis(job_id: str):

    while True:

        url = f'https://<your_region>.api.cognitive.microsoft.com/texttospeech/batchsyntheses/{job_id}?api-version=2024-04-01'

        headers = {

           "Content-Type": "application/json",

           "Ocp-Apim-Subscription-Key": api_key

        }

        response = requests.get(url, headers=headers)

        if response.status_code < 400:

            status = response.json()['status']

            if "Succeeded" in status:

               return response.json()

            else:

               print(f'batch synthesis job is still running, status [{status}]')

               time.sleep(5)  # Wait for 5 seconds before checking again

 

def get_text(file_path):

    with open(file_path, 'r') as file:

        file_contents = file.read()

    print(f"Length of text: {len(file_contents)}")

    return file_contents

 

# Main execution

if __name__ == "__main__":

    input_file_name = ""

    large_text = ""

    inputs = []

    for i in range(2,7):

        input_file_name=f"{i}.txt"

        print(input_file_name)

        if input_file_name:

           document_text = get_text(input_file_name)

           inputs += [

              {

                "content": document_text

              },

           ]

   

    # Start summarization job

    jobId = synthesize_text(inputs)

    print(jobId)

    # Get summary result

    audio = get_synthesis(jobId)

   

    print("Result:")

    print(audio)

 

Sample result:

Result:

{'id': '8cdbd29d-43f3-4878-b83c-b5326688e302', 'status': 'Succeeded', 'createdDateTime': '2025-03-08T08:16:42.8628654Z', 'lastActionDateTime': '2025-03-08T08:16:55.1972806', 'inputKind': 'PlainText', 'customVoices': {}, 'properties': {'timeToLiveInHours': 744, 'outputFormat': 'audio-48khz-192kbitrate-mono-mp3', 'concatenateResult': False, 'decompressOutputFiles': False, 'wordBoundaryEnabled': False, 'sentenceBoundaryEnabled': False, 'sizeInBytes': 38024640, 'succeededAudioCount': 5, 'failedAudioCount': 0, 'durationInMilliseconds': 1584360, 'billingDetails': {'neuralCharacters': 27710}}, 'synthesisConfig': {'voice': 'en-US-GuyNeural'}, 'outputs': {'result': 'https://stttssvcproduse.blob.core.windows.net/batchsynthesis-output/da113ddc2b524d9e8b95c6f6b6ab2a61/8cdbd29d-43f3-4878-b83c-b5326688e302/results.zip?skoid=12345678-6c19-4f12-8d9f-57c205aaba10&sktid=33e01921-4d64-4f8c-a055-5bdaffd5e33d&skt=2025-03-07T13%3A02%3A37Z&ske=2025-03-13T13%3A07%3A37Z&sks=b&skv=2023-11-03&sv=2023-11-03&st=2025-03-08T08%3A11%3A59Z&se=2025-03-11T08%3A16%3A59Z&sr=b&sp=rl&sig=s0DIH6g6gryEgmDEHlbd2ilqC5xfuB2J7HJ%2FddOlHcA%3D'}}


No comments:

Post a Comment