This is a runbook for migrating databricks workload from one region to another
1. Step 1. Create the workspace to receive the workloads in the destination.
#!/usr/bin/env bash
set -euo pipefail
# Source subscription / RG
SOURCE_SUBSCRIPTION_ID="<SOURCE_SUBSCRIPTION_ID>"
SOURCE_RG="<SOURCE_DATABRICKS_RG>" # e.g., rg-dbx-prod
SOURCE_LOCATION="<SOURCE_REGION>" # e.g., westus2
# DR subscription / RG / region
TARGET_SUBSCRIPTION_ID="<TARGET_SUBSCRIPTION_ID>"
TARGET_LOCATION="eastus2"
SUFFIX="eus2"
TARGET_RG="${SOURCE_RG}-${SUFFIX}"
# 1. Set source subscription and export
az account set --subscription "${SOURCE_SUBSCRIPTION_ID}"
EXPORT_DIR="./tfexport-${SOURCE_RG}-${SUFFIX}"
mkdir -p "${EXPORT_DIR}"
echo "Exporting all resources from ${SOURCE_RG} using aztfexport..."
aztfexport group \
--resource-group "${SOURCE_RG}" \
--output-directory "${EXPORT_DIR}" \
--append
echo "Export complete. Files in ${EXPORT_DIR}"
# 2. Create target RG in target subscription
az account set --subscription "${TARGET_SUBSCRIPTION_ID}"
echo "Creating target resource group ${TARGET_RG} in ${TARGET_LOCATION}..."
az group create \
--name "${TARGET_RG}" \
--location "${TARGET_LOCATION}" \
--output none
# 3. Rewrite names and locations in Terraform files
# - Add suffix to resource names
# - Change location to eastus2
# - Optionally change resource_group_name references
echo "Rewriting Terraform for DR region and names..."
find "${EXPORT_DIR}" -type f -name "*.tf" | while read -r FILE; do
# Example: append -eus2 to name fields and change location
# This is simplistic; refine with more precise sed/regex as needed.
# Change location
sed -i "s/\"${SOURCE_LOCATION}\"/\"${TARGET_LOCATION}\"/g" "${FILE}"
# Append suffix to resource names (name = "xyz" → "xyz-eus2")
# Be careful not to touch things like SKU names, etc.
sed -i -E "s/(name *= *\"[a-zA-Z0-9_-]+)\"/\1-${SUFFIX}\"/g" "${FILE}"
# If resource_group_name is hard-coded, retarget it
sed -i "s/\"${SOURCE_RG}\"/\"${TARGET_RG}\"/g" "${FILE}"
done
echo "Terraform rewrite done. Review ${EXPORT_DIR} before applying."
# 4. (Optional) Initialize and apply Terraform in target subscription
# cd "${EXPORT_DIR}"
# terraform init
# terraform apply
2. Step 2. Copy all the workloads for the workspace.
#!/usr/bin/env bash
set -euo pipefail
# Databricks CLI profiles
SOURCE_PROFILE="src-dbx"
TARGET_PROFILE="dr-dbx"
# Temp export directory
EXPORT_DIR="./dbx-migration-eus2"
NOTEBOOKS_DIR="${EXPORT_DIR}/notebooks"
JOBS_FILE="${EXPORT_DIR}/jobs.json"
mkdir -p "${NOTEBOOKS_DIR}"
echo "Using Databricks profiles:"
echo " Source: ${SOURCE_PROFILE}"
echo " Target: ${TARGET_PROFILE}"
echo ""
# 1. Export all notebooks from source workspace
# This uses workspace export with recursive flag.
echo "Exporting notebooks from source workspace..."
databricks --profile "${SOURCE_PROFILE}" workspace list / -r | awk '{print $1}' | while read -r PATH; do
echo "Exporting ${PATH}"
databricks --profile "${SOURCE_PROFILE}" workspace export_dir \
"${PATH}" \
"${NOTEBOOKS_DIR}${PATH}" \
--overwrite
done
echo "Notebook export complete."
# 2. Import notebooks into target workspace
echo "Importing notebooks into target workspace..."
find "${NOTEBOOKS_DIR}" -type d | while read -r DIR; do
REL_PATH="${DIR#${NOTEBOOKS_DIR}}"
if [[ -n "${REL_PATH}" ]]; then
databricks --profile "${TARGET_PROFILE}" workspace mkdirs "${REL_PATH}"
fi
done
find "${NOTEBOOKS_DIR}" -type f | while read -r FILE; do
REL_PATH="${FILE#${NOTEBOOKS_DIR}}"
TARGET_PATH="${REL_PATH}"
echo "Importing ${TARGET_PATH}"
databricks --profile "${TARGET_PROFILE}" workspace import \
--format AUTO \
--language AUTO \
--overwrite \
"${FILE}" \
"${TARGET_PATH}"
done
echo "Notebook import complete."
# 3. Export jobs from source workspace
echo "Exporting jobs from source workspace..."
databricks --profile "${SOURCE_PROFILE}" jobs list --output JSON > "${JOBS_FILE}"
# 4. Recreate jobs in target workspace
echo "Recreating jobs in target workspace..."
jq -c '.jobs[]' "${JOBS_FILE}" | while read -r JOB; do
# Remove fields that cannot be reused directly (job_id, created_time, etc.)
CLEANED=$(echo "${JOB}" | jq 'del(.job_id, .created_time, .creator_user_name, .settings.schedule_status, .settings.created_time, .settings.modified_time) | {name: .settings.name, settings: .settings}')
echo "Creating job: $(echo "${CLEANED}" | jq -r '.name')"
databricks --profile "${TARGET_PROFILE}" jobs create --json "${CLEANED}"
done
echo "Job migration complete."
3. Step 3. Find allowed storage accounts and copy data to eus2
#!/usr/bin/env bash
set -euo pipefail
SOURCE_SUBSCRIPTION_ID="<SOURCE_SUBSCRIPTION_ID>"
TARGET_SUBSCRIPTION_ID="<TARGET_SUBSCRIPTION_ID>"
SOURCE_RG="<SOURCE_DATABRICKS_RG>"
SUFFIX="eus2"
# Databricks workspace info (source)
DATABRICKS_WS_NAME="<SOURCE_DATABRICKS_WORKSPACE_NAME>"
DATABRICKS_WS_RG="${SOURCE_RG}"
# 1. Get workspace VNet/subnets (assuming VNet injection)
az account set --subscription "${SOURCE_SUBSCRIPTION_ID}"
WS_INFO=$(az databricks workspace show -g "${DATABRICKS_WS_RG}" -n "${DATABRICKS_WS_NAME}")
VNET_ID=$(echo "${WS_INFO}" | jq -r '.parameters.customVirtualNetworkId.value // empty')
SUBNET_IDS=$(echo "${WS_INFO}" | jq -r '.parameters.customPublicSubnetName.value, .parameters.customPrivateSubnetName.value // empty' | sed "/^$/d")
echo "Workspace VNet: ${VNET_ID}"
echo "Workspace subnets (names):"
echo "${SUBNET_IDS}"
echo ""
# 2. Find storage accounts whose network rules allow these subnets
echo "Finding storage accounts with network rules allowing workspace subnets..."
STORAGE_ACCOUNTS=$(az storage account list --query "[].id" -o tsv)
MATCHED_SA=()
for SA_ID in ${STORAGE_ACCOUNTS}; do
RULES=$(az storage account network-rule list --account-name "$(basename "${SA_ID}")" --resource-group "$(echo "${SA_ID}" | awk -F/ '{print $5}')" 2>/dev/null || echo "")
if [[ -z "${RULES}" ]]; then
continue
fi
for SUBNET in ${SUBNET_IDS}; do
# Check if subnet name appears in virtualNetworkRules
if echo "${RULES}" | jq -e --arg sn "${SUBNET}" '.virtualNetworkRules[]?.virtualNetworkResourceId | contains($sn)' >/dev/null 2>&1; then
echo "Matched storage account: ${SA_ID} for subnet: ${SUBNET}"
MATCHED_SA+=("${SA_ID}")
break
fi
done
done
# Deduplicate
MATCHED_SA_UNIQ=($(printf "%s\n" "${MATCHED_SA[@]}" | sort -u))
echo ""
echo "Matched storage accounts:"
printf "%s\n" "${MATCHED_SA_UNIQ[@]}"
echo ""
# 3. For each matched storage account, copy data to corresponding DR storage account with eus2 suffix
for SA_ID in "${MATCHED_SA_UNIQ[@]}"; do
SA_NAME=$(basename "${SA_ID}")
SA_RG=$(echo "${SA_ID}" | awk -F/ '{print $5}')
TARGET_SA_NAME="${SA_NAME}${SUFFIX}"
echo "Processing storage account:"
echo " Source: ${SA_NAME} (RG: ${SA_RG})"
echo " Target: ${TARGET_SA_NAME}"
echo ""
# Get source key
SRC_KEY=$(az storage account keys list \
--account-name "${SA_NAME}" \
--resource-group "${SA_RG}" \
--query "[0].value" -o tsv)
# Switch to target subscription and get target key
az account set --subscription "${TARGET_SUBSCRIPTION_ID}"
TARGET_SA_RG="<TARGET_SA_RG_FOR_${TARGET_SA_NAME}>" # or derive if same naming pattern
TGT_KEY=$(az storage account keys list \
--account-name "${TARGET_SA_NAME}" \
--resource-group "${TARGET_SA_RG}" \
--query "[0].value" -o tsv)
# Build connection strings
SRC_CONN="DefaultEndpointsProtocol=https;AccountName=${SA_NAME};AccountKey=${SRC_KEY};EndpointSuffix=core.windows.net"
TGT_CONN="DefaultEndpointsProtocol=https;AccountName=${TARGET_SA_NAME};AccountKey=${TGT_KEY};EndpointSuffix=core.windows.net"
# List containers in source
az account set --subscription "${SOURCE_SUBSCRIPTION_ID}"
CONTAINERS=$(az storage container list \
--connection-string "${SRC_CONN}" \
--query "[].name" -o tsv)
for CONT in ${CONTAINERS}; do
echo "Copying container: ${CONT}"
SRC_URL="https://${SA_NAME}.blob.core.windows.net/${CONT}"
TGT_URL="https://${TARGET_SA_NAME}.blob.core.windows.net/${CONT}"
# Generate SAS tokens or use connection string; here we use account key via azcopy env vars
export AZCOPY_AUTO_LOGIN_TYPE=AZURE_AD # or use SAS if preferred
# If using key-based auth:
# export AZCOPY_ACCOUNT_KEY="${SRC_KEY}" for source and then TGT_KEY for target in separate runs
# For simplicity, assume managed identity / AAD with proper RBAC.
azcopy copy "${SRC_URL}" "${TGT_URL}" \
--recursive=true
echo "Completed copy for container ${CONT}"
done
# Reset to source subscription for next iteration
az account set --subscription "${SOURCE_SUBSCRIPTION_ID}"
done
echo "All matched storage accounts copied to DR counterparts."
#codingexercises: CodingExercise-04-12-2026.docx