Cluster computing

Indexes in Azure AI Search resource can be shared between accounts and rolled over when they reach their limits. The following code automates how to do that:

import requests
from azure.core.credentials import AzureKeyCredential
from azure.search.documents.indexes import SearchIndexClient
import os

project_endpoint = os.environ["AZURE_PROJECT_ENDPOINT"]
project_api_key = os.environ["AZURE_PROJECT_API_KEY"]
agent_model = os.getenv("AZURE_AGENT_MODEL", "gpt-4o-mini")
search_endpoint = os.environ["AZURE_SEARCH_SERVICE_ENDPOINT"]
api_version = os.getenv("AZURE_SEARCH_API_VERSION")
search_api_key = os.getenv("AZURE_SEARCH_ADMIN_KEY")
credential = AzureKeyCredential(search_api_key)
index_name = os.getenv("AZURE_SEARCH_INDEX_NAME", "index00")
azure_openai_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"]
azure_openai_api_key = os.getenv("AZURE_OPENAI_API_KEY")
azure_openai_gpt_deployment = os.getenv("AZURE_OPENAI_GPT_DEPLOYMENT", "gpt-4o-mini")
azure_openai_gpt_model = os.getenv("AZURE_OPENAI_GPT_MODEL", "gpt-4o-mini")
azure_openai_embedding_deployment = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT", "text-embedding-ada-002")
azure_openai_embedding_model = os.getenv("AZURE_OPENAI_EMBEDDING_MODEL", "text-embedding-ada-002")
chat_agent_name = os.getenv("AZURE_CHAT_AGENT_NAME", "chat-agent-in-a-team")
search_agent_name = os.getenv("AZURE_SEARCH_AGENT_NAME", "search-agent-in-a-team")
api_version = "2025-05-01-Preview"
agent_max_output_tokens=10000
vectorizer_name = "vectorizer-1748574121417"
semantic_configuration_name = "mysemantic1"
vector_dimension_size=1536
vector_search_profile_name = "myExhaustiveKnnProfile1"
new_index_name = "index05"

from azure.search.documents.indexes.models import (
    SearchIndex, SimpleField, SearchableField, SearchField, SearchFieldDataType,
    SimpleField, SearchableField, VectorSearch, VectorSearchAlgorithmConfiguration, VectorSearchProfile,
    HnswParameters, ExhaustiveKnnParameters, VectorSearchAlgorithmMetric, HnswAlgorithmConfiguration, ExhaustiveKnnAlgorithmConfiguration,
    AzureOpenAIVectorizer, AzureOpenAIVectorizerParameters, VectorSearchAlgorithmKind,
    # AzureOpenAIParameters, VectorSearchVectorizer,
    SemanticSearch, SemanticConfiguration, SemanticPrioritizedFields, SemanticField, BM25SimilarityAlgorithm
)

# 1. Create the vectorizer
# vectorizer_url = f"{search_endpoint}/vectorizers/{vectorizer_name}?api-version=2023-11-01-preview"
# vectorizer_payload = {
    # "name": vectorizer_name,
    # "kind": "azureOpenAI",
    # "azureOpenAIParameters": {
        # "resourceUri": azure_openai_endpoint,
        # "deploymentId": azure_openai_embedding_deployment,
        # "modelName": azure_openai_embedding_model,
    # }
# }
# vectorizer_response = requests.put(vectorizer_url, json=vectorizer_payload, headers=headers)
# print("Vectorizer:", vectorizer_response.status_code, vectorizer_response.json())

vectorizer_config = AzureOpenAIVectorizer(
    vectorizer_name = vectorizer_name,
    parameters = AzureOpenAIVectorizerParameters(
        resource_url=azure_openai_endpoint,
        api_key = azure_openai_api_key,
        deployment_name=azure_openai_embedding_deployment,
        model_name=azure_openai_embedding_model
    )
)
print(f"Vectorizer config '{vectorizer_name}' created for index.")

# 2. Create the vector search with the vectorizer
vector_search = VectorSearch(
    algorithms=[
        HnswAlgorithmConfiguration(
            parameters = HnswParameters(metric=VectorSearchAlgorithmMetric.COSINE, m=4, ef_construction=400, ef_search=1000),
            name="myHnsw1",
            kind=VectorSearchAlgorithmKind.HNSW,
        ),
        ExhaustiveKnnAlgorithmConfiguration(
            parameters = ExhaustiveKnnParameters(metric=VectorSearchAlgorithmMetric.COSINE),
            name="myExhaustiveKnn1",
            kind=VectorSearchAlgorithmKind.EXHAUSTIVE_KNN
        )
    ],
    profiles=[
        VectorSearchProfile(
            name="myHnswProfile1",
            algorithm_configuration_name="myHnsw1",
            vectorizer=vectorizer_name
        ),
        VectorSearchProfile(
            name=vector_search_profile_name,
            algorithm_configuration_name="MyExhaustiveKnn1",
            vectorizer=vectorizer_name
        )
    ],
    vectorizers = [vectorizer_config]
    # vectorizers=None # Already created if using REST APIs for latest features
)
print(f"VectorSearch with '{vectorizer_name}' created for index.")

semantic_search = SemanticSearch(
    default_configuration_name=semantic_configuration_name,
    configurations=[
        SemanticConfiguration(
            name=semantic_configuration_name,
            prioritized_fields=SemanticPrioritizedFields(
                title_field=SemanticField(field_name="description"),
                prioritized_content_fields=[
                    SemanticField(field_name="id"),
                    SemanticField(field_name="description")
                ],
                prioritized_keywords_fields=[
                    SemanticField(field_name="id"),
                    SemanticField(field_name="description")
                ]
            ),
            ranking_order="BoostedRerankerScore",
            flighting_opt_in=False
        )
    ]
)

similarity_algorithm = BM25SimilarityAlgorithm()
print(f"Semantic configuration '{semantic_configuration_name}' created for index.")
fields = [
    SimpleField(name="id", type=SearchFieldDataType.String, key=True, retrievable=True, stored=True),
    SearchableField(name="accountid", type=SearchFieldDataType.String, searchable=True, filterable=True,
                    retrievable=True, stored=True, sortable=True, facetable=True),
    SearchableField(name="description", type=SearchFieldDataType.String, searchable=True, filterable=True,
                    retrievable=True, stored=True, sortable=True, facetable=True),
    SearchField(name="vector", type="SearchFieldDataType.Collection(Edm.Single)", searchable=True, retrievable=True,
                stored=True, vector_search_dimensions=vector_dimension_size, vector_search_profile_name=vector_search_profile_name),
    SearchableField(name="objects", type=SearchFieldDataType.String, analyzer_name="standard.lucene",
                    searchable=True, filterable=True, retrievable=True, stored=True,
                    sortable=True, facetable=True),
    SearchableField(name="tags", type=SearchFieldDataType.String, analyzer_name="standard.lucene",
                    searchable=True, filterable=True, retrievable=True, stored=True,
                    sortable=True, facetable=True),
    SearchableField(name="title", type=SearchFieldDataType.String, analyzer_name="standard.lucene",
                    searchable=True, filterable=True, retrievable=True, stored=True,
                    sortable=True, facetable=True),
]

# 3. create the index with fields, vectorizer and semantic configuration
index = SearchIndex(
    name=new_index_name,
    fields=fields,
    semantic_search=semantic_search,
    vector_search=vector_search,
    similarity=similarity_algorithm
)

index_client = SearchIndexClient(
    endpoint=search_endpoint,
    credential=AzureKeyCredential(search_api_key)
)

index_client.create_or_update_index(index)
print(f"Index '{new_index_name}' created with vector and semantic search.")
"""
Output:
Vectorizer config 'vectorizer-1748574121417' created for index.
vectorizer is not a known attribute of class <class 'azure.search.documents.indexes._generated.models._models_py3.VectorSearchProfile'> and will be ignored
vectorizer is not a known attribute of class <class 'azure.search.documents.indexes._generated.models._models_py3.VectorSearchProfile'> and will be ignored
VectorSearch with 'vectorizer-1748574121417' created for index.
prioritized_content_fields is not a known attribute of class <class 'azure.search.documents.indexes._generated.models._models_py3.SemanticPrioritizedFields'> and will be ignored
prioritized_keywords_fields is not a known attribute of class <class 'azure.search.documents.indexes._generated.models._models_py3.SemanticPrioritizedFields'> and will be ignored
Semantic configuration 'mysemantic1' created for index.
Index 'index05' created with vector and semantic search.
"""

Cluster computing

Sunday, July 27, 2025

No comments:

Post a Comment