Files
azcopy_sync/run_azcopy-single-dir.sh
2025-02-21 12:23:18 +00:00

138 lines
4.1 KiB
Bash
Executable File

#!/bin/bash
# Configurable variables
BUSINESS_HOURS_START=7
BUSINESS_HOURS_END=15
AZURE_URL="https://<>.core.windows.net/<container>"
AZURE_SAS="<add key here>"
# Arguments
SOURCE_DIR="$1"
LOGGING="${2:-false}" # Default to no logs
BANDWIDTH_CAP="${3:-0}" # Default is 0 (no cap)
# Report files
TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
FILE_LIST="file_list_$TIMESTAMP.txt"
COMPLETION_REPORT="completion_report_$TIMESTAMP.txt"
LOG_FILE="azcopy_log_$TIMESTAMP.txt"
# Lock file to prevent multiple instances
LOCK_FILE="/tmp/run_azcopy.lock"
# Check if another instance is running
if [[ -f "$LOCK_FILE" ]]; then
PID=$(cat "$LOCK_FILE")
if kill -0 "$PID" 2>/dev/null; then
echo "Another instance (PID $PID) is already running. Exiting..."
exit 1
else
echo "Stale lock file found. Removing..."
rm -f "$LOCK_FILE"
fi
fi
# Create lock file with current PID
echo $$ > "$LOCK_FILE"
# Function to check business hours
is_business_hours() {
HOUR=$(date +%H | sed 's/^0*//') # Remove leading zeros causing errors at morning times
[[ $HOUR -ge $BUSINESS_HOURS_START && $HOUR -lt $BUSINESS_HOURS_END ]]
}
# Ensure source directory is provided
if [[ -z "$SOURCE_DIR" || ! -d "$SOURCE_DIR" ]]; then
echo "Usage: $0 <directory> [log=true|false] [bandwidth_mbps]"
exit 1
fi
# Check if already within business hours before starting
if is_business_hours; then
echo "Business hours detected ($BUSINESS_HOURS_START:00 - $BUSINESS_HOURS_END:00). Exiting..."
exit 1
fi
# Create initial file list
find "$SOURCE_DIR" -type f > "$FILE_LIST"
TOTAL_FILES=$(wc -l < "$FILE_LIST")
echo "Initial file list created: $TOTAL_FILES files found."
# Check for incomplete azcopy jobs
PENDING_JOB=$(azcopy jobs list --with-status=InProgress | awk '/JobId:/ {print $2; exit}')
if [[ -n "$PENDING_JOB" ]]; then
echo "Resuming previous job: $PENDING_JOB"
azcopy jobs resume "$PENDING_JOB" &
AZCOPY_PID=$!
else
# Run azcopy in the background for a new sync
if [[ "$LOGGING" == "true" ]]; then
echo "Running: azcopy sync \"$SOURCE_DIR\" \"$AZURE_URL?$AZURE_SAS\" --recursive --cap-mbps $BANDWIDTH_CAP" | tee -a "$LOG_FILE"
azcopy sync "$SOURCE_DIR" "$AZURE_URL?$AZURE_SAS" --recursive --cap-mbps "$BANDWIDTH_CAP" | tee -a "$LOG_FILE" &
else
azcopy sync "$SOURCE_DIR" "$AZURE_URL?$AZURE_SAS" --recursive --cap-mbps "$BANDWIDTH_CAP" > /dev/null 2>&1 &
fi
AZCOPY_PID=$!
fi
echo "azcopy started with PID $AZCOPY_PID"
# Monitor the process
while kill -0 $AZCOPY_PID 2>/dev/null; do
if is_business_hours; then
echo -e "\nBusiness hours started! Stopping azcopy..."
kill $AZCOPY_PID
wait $AZCOPY_PID 2>/dev/null # Ensure the process is fully stopped
INTERRUPTED=true
break
fi
sleep 30 # Check every 30 seconds
done
# Generate completion report
if [[ "$INTERRUPTED" == "true" ]]; then
STATUS="Interrupted due to business hours, can be resumed"
else
STATUS="Completed normally"
fi
# Extract job summary from log (only if logging is enabled)
if [[ "$LOGGING" == "true" && -f "$LOG_FILE" ]]; then
COMPLETED_FILES=$(grep -oP 'Number of Copy Transfers Completed:\s+\K\d+' "$LOG_FILE" | tail -n1)
FAILED_FILES=$(grep -oP 'Number of Copy Transfers Failed:\s+\K\d+' "$LOG_FILE" | tail -n1)
TOTAL_FILES=$(grep -oP 'Total Number of Copy Transfers:\s+\K\d+' "$LOG_FILE" | tail -n1)
# If values are empty, fallback to 0
COMPLETED_FILES=${COMPLETED_FILES:-0}
FAILED_FILES=${FAILED_FILES:-0}
TOTAL_FILES=${TOTAL_FILES:-$COMPLETED_FILES}
# Calculate percentage
if [[ "$TOTAL_FILES" -eq 0 ]]; then
PERCENT_COMPLETE=0
else
PERCENT_COMPLETE=$(( COMPLETED_FILES * 100 / TOTAL_FILES ))
fi
else
PERCENT_COMPLETE=0
COMPLETED_FILES=0
FAILED_FILES=0
TOTAL_FILES=0
fi
echo "Sync Status: $STATUS" > "$COMPLETION_REPORT"
echo "Total Files: $TOTAL_FILES" >> "$COMPLETION_REPORT"
echo "Completed Files: $COMPLETED_FILES" >> "$COMPLETION_REPORT"
echo "Percentage Completed: $PERCENT_COMPLETE%" >> "$COMPLETION_REPORT"
echo "Completion report generated: $COMPLETION_REPORT"
# Remove lock file when done
rm -f "$LOCK_FILE"
exit 0