Sunday, April 30, 2023

 A script to copy attributes and tags for objects from on-premises s3 store to the Azure public cloud, when the preserve metadata option in ADF CopyActivity does not suffice. This article follows the on on customizing ADF with WebHook activity to include functionality from external services.

#! /usr/bin/bash 

#------------------- 

# This script is equally applicable to windows 

#------------------- 

throw() { 

  echo "$*" >&2 

  (exit 33) && true 

} 

  

STORAGE_ACCOUNT_NAME= 

STORAGE_ACCOUNT_KEY= 

CONTAINER_NAME= 

LOCAL_FOLDER_PATH= 

REMOTE_FOLDER_PREFIX= 

ARM_TENANT_ID=f66b7197-eb94-49fa-80fb-6df9fa346b46 

RCLONE_CONNECTION_NAME= 

  

usage() { 

  echo 

  echo "Usage: $(basename $0) -b arg -c arg -l arg -x arg -r arg [-h]" 

  echo 

  echo "-b - The name of the blob storage account." 

  echo "-c - The name of the container." 

  echo "-l - The name of the local folder path." 

  echo "-r - The name of the remote folder path." 

  echo "-x - The name of the rclone connection." 

  echo "-k - The key for the storage account." 

  echo "-h - This help text." 

  echo 

} 

  

parse_options() { 

while getopts ':b:l:c:r:x:k:h' opt; do 

  case "$opt" in 

    b) 

      STORAGE_ACCOUNT_NAME="$OPTARG" 

      ;; 

  

    k) 

      STORAGE_ACCOUNT_KEY="$OPTARG" 

      ;; 

  

    l) 

      LOCAL_FOLDER_PATH="$OPTARG" 

      ;; 

  

    r) 

      REMOTE_FOLDER_PREFIX="$OPTARG" 

      ;; 

  

    c) 

      CONTAINER_NAME="$OPTARG" 

      ;; 

  

    x) 

      RCLONE_CONNECTION_NAME="$OPTARG" 

      ;; 

  

    h) 

      echo "Processing option 'h'" 

      usage 

      (exit 33) && true 

      ;; 

  

    :) 

      echo "option requires an argument.\n" 

      usage 

      (exit 33) && true 

      ;; 

  

    ?) 

      echo "Invalid command option.\n" 

      usage 

      (exit 33) && true 

      ;; 

  esac 

done 

shift "$(($OPTIND -1))" 

} 

  

  

parse_options "$@" 

if ([ -z "$LOCAL_FOLDER_PATH" ] || [ -z "$REMOTE_FOLDER_PREFIX" ] || [ -z "$STORAGE_ACCOUNT_NAME" ] || [ -z "$CONTAINER_NAME" ] || [ -z "$RCLONE_CONNECTION_NAME" ] || [ -z "$STORAGE_ACCOUNT_KEY" ]); 

then  

  echo "Invalid command.\n" 

  usage 

  (exit 33) && true 

fi 

# az login 

key="$STORAGE_ACCOUNT_KEY" 

items=($(rclone lsf "$RCLONE_CONNECTION_NAME":"$LOCAL_FOLDER_PATH" --recursive)) 

echo LENGTH=${#items[@]} 

for item in $items 

do 

  [[ "$item" == */ ]] && continue 

  tagsJson=$(rclone lsf --format M $(eval echo $RCLONE_CONNECTION_NAME:$LOCAL_FOLDER_PATH/$item)) 

  [[ -z ${tagsJson} ]] && continue  

  #{"btime":"2023-03-30T15:57:08.66Z","content-type":"application/octet-stream","owner":"you","test-dataset":""} 

  keyValues=`echo "$tagsJson" | jq -r '[to_entries|map("\(.key)=\(.value|tostring)")|.[]]|join(" ")'` 

  [[ -z ${keyValues} ]] && continue 

  agreeableKeyValues=`echo "${keyValues//\-/\_}"` 

  [[ -z ${agreeableKeyValues} ]] && continue 

  existsJson=`az storage blob exists --account-name "$STORAGE_ACCOUNT_NAME" --account-key $key --container-name "$CONTAINER_NAME" --name $(eval echo $REMOTE_FOLDER_PREFIX/$LOCAL_FOLDER_PATH/$item)` 

  exists=`echo "$existsJson" | jq .exists` 

  #echo $exists 

  #{ 

  #  "exists": true 

  #} 

  if [[ $exists == *"true"* ]]; then 

     az storage blob metadata update --account-name "$STORAGE_ACCOUNT_NAME" --account-key $key  --container-name "$CONTAINER_NAME" --name $(eval echo $REMOTE_FOLDER_PREFIX/$LOCAL_FOLDER_PATH/$item) --metadata $(eval echo $agreeableKeyValues) 

#{ 

#  "client_request_id": "819eed5c-e557-11ed-9b75-8ef5922a9146", 

#  "date": "2023-04-27T23:59:11+00:00", 

#  "encryption_key_sha256": null, 

#  "encryption_scope": null, 

#  "etag": "\"0x8DB477B663E0F10\"", 

#  "last_modified": "2023-04-27T23:59:12+00:00", 

#  "request_id": "8fa92f3a-e01e-0014-6564-79ae0b000000", 

#  "request_server_encrypted": true, 

#  "version": "2021-06-08", 

#  "version_id": null 

#} 

    newMetadata=`az storage blob metadata show --account-name "$STORAGE_ACCOUNT_NAME" --account-key $key --container-name "$CONTAINER_NAME" --name $(eval echo $REMOTE_FOLDER_PREFIX/$LOCAL_FOLDER_PATH/$item)` 

    echo $newMetadata 

#{ 

#  "btime": "2023-03-30T15:56:48.161Z", 

#  "content_type": "application/octet-stream", 

#  "owner": "you", 

#  "test_dataset": "" 

#} 

  else 

    echo "$item not found" 

  fi 

done 

# crontab -e 

# */5 * * * * sh /path/to/this_script.sh