A script to copy attributes and tags for objects from on-premises s3 store to the Azure public cloud, when the preserve metadata option in ADF CopyActivity does not suffice. This article follows the on on customizing ADF with WebHook activity to include functionality from external services.
#! /usr/bin/bash
#-------------------
# This script is equally applicable to windows
#-------------------
throw() {
echo "$*" >&2
(exit 33) && true
}
STORAGE_ACCOUNT_NAME=
STORAGE_ACCOUNT_KEY=
CONTAINER_NAME=
LOCAL_FOLDER_PATH=
REMOTE_FOLDER_PREFIX=
ARM_TENANT_ID=f66b7197-eb94-49fa-80fb-6df9fa346b46
RCLONE_CONNECTION_NAME=
usage() {
echo
echo "Usage: $(basename $0) -b arg -c arg -l arg -x arg -r arg [-h]"
echo
echo "-b - The name of the blob storage account."
echo "-c - The name of the container."
echo "-l - The name of the local folder path."
echo "-r - The name of the remote folder path."
echo "-x - The name of the rclone connection."
echo "-k - The key for the storage account."
echo "-h - This help text."
echo
}
parse_options() {
while getopts ':b:l:c:r:x:k:h' opt; do
case "$opt" in
b)
STORAGE_ACCOUNT_NAME="$OPTARG"
;;
k)
STORAGE_ACCOUNT_KEY="$OPTARG"
;;
l)
LOCAL_FOLDER_PATH="$OPTARG"
;;
r)
REMOTE_FOLDER_PREFIX="$OPTARG"
;;
c)
CONTAINER_NAME="$OPTARG"
;;
x)
RCLONE_CONNECTION_NAME="$OPTARG"
;;
h)
echo "Processing option 'h'"
usage
(exit 33) && true
;;
:)
echo "option requires an argument.\n"
usage
(exit 33) && true
;;
?)
echo "Invalid command option.\n"
usage
(exit 33) && true
;;
esac
done
shift "$(($OPTIND -1))"
}
parse_options "$@"
if ([ -z "$LOCAL_FOLDER_PATH" ] || [ -z "$REMOTE_FOLDER_PREFIX" ] || [ -z "$STORAGE_ACCOUNT_NAME" ] || [ -z "$CONTAINER_NAME" ] || [ -z "$RCLONE_CONNECTION_NAME" ] || [ -z "$STORAGE_ACCOUNT_KEY" ]);
then
echo "Invalid command.\n"
usage
(exit 33) && true
fi
# az login
key="$STORAGE_ACCOUNT_KEY"
items=($(rclone lsf "$RCLONE_CONNECTION_NAME":"$LOCAL_FOLDER_PATH" --recursive))
echo LENGTH=${#items[@]}
for item in $items
do
[[ "$item" == */ ]] && continue
tagsJson=$(rclone lsf --format M $(eval echo $RCLONE_CONNECTION_NAME:$LOCAL_FOLDER_PATH/$item))
[[ -z ${tagsJson} ]] && continue
#{"btime":"2023-03-30T15:57:08.66Z","content-type":"application/octet-stream","owner":"you","test-dataset":""}
keyValues=`echo "$tagsJson" | jq -r '[to_entries|map("\(.key)=\(.value|tostring)")|.[]]|join(" ")'`
[[ -z ${keyValues} ]] && continue
agreeableKeyValues=`echo "${keyValues//\-/\_}"`
[[ -z ${agreeableKeyValues} ]] && continue
existsJson=`az storage blob exists --account-name "$STORAGE_ACCOUNT_NAME" --account-key $key --container-name "$CONTAINER_NAME" --name $(eval echo $REMOTE_FOLDER_PREFIX/$LOCAL_FOLDER_PATH/$item)`
exists=`echo "$existsJson" | jq .exists`
#echo $exists
#{
# "exists": true
#}
if [[ $exists == *"true"* ]]; then
az storage blob metadata update --account-name "$STORAGE_ACCOUNT_NAME" --account-key $key --container-name "$CONTAINER_NAME" --name $(eval echo $REMOTE_FOLDER_PREFIX/$LOCAL_FOLDER_PATH/$item) --metadata $(eval echo $agreeableKeyValues)
#{
# "client_request_id": "819eed5c-e557-11ed-9b75-8ef5922a9146",
# "date": "2023-04-27T23:59:11+00:00",
# "encryption_key_sha256": null,
# "encryption_scope": null,
# "etag": "\"0x8DB477B663E0F10\"",
# "last_modified": "2023-04-27T23:59:12+00:00",
# "request_id": "8fa92f3a-e01e-0014-6564-79ae0b000000",
# "request_server_encrypted": true,
# "version": "2021-06-08",
# "version_id": null
#}
newMetadata=`az storage blob metadata show --account-name "$STORAGE_ACCOUNT_NAME" --account-key $key --container-name "$CONTAINER_NAME" --name $(eval echo $REMOTE_FOLDER_PREFIX/$LOCAL_FOLDER_PATH/$item)`
echo $newMetadata
#{
# "btime": "2023-03-30T15:56:48.161Z",
# "content_type": "application/octet-stream",
# "owner": "you",
# "test_dataset": ""
#}
else
echo "$item not found"
fi
done
# crontab -e
# */5 * * * * sh /path/to/this_script.sh