commit 4466d9e85079cbe8657c18cfeae3bad29af8a050 Author: Radek Date: Thu Feb 20 14:41:24 2025 +0000 new diff --git a/howto.md b/howto.md new file mode 100644 index 0000000..07e96d4 --- /dev/null +++ b/howto.md @@ -0,0 +1,90 @@ +# **Azure Blob Sync Script - How To Use** + +This script syncs a directory to **Azure Blob Storage** using `azcopy` while: +✅ **Avoiding business hours** (default: 9 AM - 5 PM, configurable). +✅ **Preventing duplicate instances** (via a lock file). +✅ **Automatically resuming** unfinished jobs. +✅ **Logging progress & generating reports**. + +--- + +## **📌 1. Script Usage** +Run the script manually: +```bash +./run_azcopy.sh [log=true|false] [bandwidth_mbps] +``` +### **Example Commands** +- **Basic sync with no bandwidth limit:** + ```bash + ./run_azcopy.sh /opt/AZURE/ false + ``` +- **Enable logging & limit bandwidth to 10 Mbps:** + ```bash + ./run_azcopy.sh /opt/AZURE/ true 10 + ``` +- **Run in the background & detach from terminal:** + ```bash + nohup ./run_azcopy.sh /opt/AZURE/ true 10 & disown + ``` + +--- + +## **⏲️ 2. Automating with Cron** +Schedule the script to **run every hour**: +```bash +crontab -e +``` +Add this line: +```bash +0 * * * * /path/to/run_azcopy.sh /opt/AZURE/ true 10 +``` +### **How It Works** +- Runs at **00 minutes past every hour** (e.g., 1:00, 2:00, 3:00, etc.). +- If already running, the **next cron execution exits** to prevent duplicates. +- If interrupted (e.g., business hours), the **next run resumes**. + +--- + +## **🔍 3. Checking If the Script Is Running** +Check if `azcopy` or the script is running: +```bash +pgrep -fl run_azcopy.sh +pgrep -fl azcopy +``` +To **stop it manually**: +```bash +pkill -f azcopy +pkill -f run_azcopy.sh +``` + +--- + +## **📄 4. Checking Logs & Reports** +- **Sync Log (if enabled)**: + ```bash + tail -f azcopy_log_*.txt + ``` +- **Completion Report**: + ```bash + cat completion_report_*.txt + ``` + +--- + +## **⚙️ 5. Customizing Business Hours** +Modify the script to change business hours: +```bash +BUSINESS_HOURS_START=9 +BUSINESS_HOURS_END=17 +``` + +--- + +## **✅ 6. Expected Behavior** +| Scenario | What Happens? | +|----------|--------------| +| **Cron runs script inside business hours** | Script exits immediately. | +| **Script is running when cron starts again** | Second instance exits to prevent duplicates. | +| **Sync job interrupted by business hours** | Next run resumes automatically. | +| **Sync completes normally** | Report logs all transferred files. | + diff --git a/run_azcopy.sh b/run_azcopy.sh new file mode 100755 index 0000000..fc34c7a --- /dev/null +++ b/run_azcopy.sh @@ -0,0 +1,137 @@ +#!/bin/bash + +# Configurable variables +BUSINESS_HOURS_START=7 +BUSINESS_HOURS_END=15 +AZURE_URL="https://<>.core.windows.net/" +AZURE_SAS="" + +# Arguments +SOURCE_DIR="$1" +LOGGING="${2:-false}" # Default to no logs +BANDWIDTH_CAP="${3:-0}" # Default is 0 (no cap) + +# Report files +TIMESTAMP=$(date +"%Y%m%d_%H%M%S") +FILE_LIST="file_list_$TIMESTAMP.txt" +COMPLETION_REPORT="completion_report_$TIMESTAMP.txt" +LOG_FILE="azcopy_log_$TIMESTAMP.txt" + +# Lock file to prevent multiple instances +LOCK_FILE="/tmp/run_azcopy.lock" + +# Check if another instance is running +if [[ -f "$LOCK_FILE" ]]; then + PID=$(cat "$LOCK_FILE") + if kill -0 "$PID" 2>/dev/null; then + echo "Another instance (PID $PID) is already running. Exiting..." + exit 1 + else + echo "Stale lock file found. Removing..." + rm -f "$LOCK_FILE" + fi +fi + +# Create lock file with current PID +echo $$ > "$LOCK_FILE" + + +# Function to check business hours +is_business_hours() { + HOUR=$(date +%H) + [[ $HOUR -ge $BUSINESS_HOURS_START && $HOUR -lt $BUSINESS_HOURS_END ]] +} + +# Ensure source directory is provided +if [[ -z "$SOURCE_DIR" || ! -d "$SOURCE_DIR" ]]; then + echo "Usage: $0 [log=true|false] [bandwidth_mbps]" + exit 1 +fi + +# Check if already within business hours before starting +if is_business_hours; then + echo "Business hours detected ($BUSINESS_HOURS_START:00 - $BUSINESS_HOURS_END:00). Exiting..." + exit 1 +fi + +# Create initial file list +find "$SOURCE_DIR" -type f > "$FILE_LIST" +TOTAL_FILES=$(wc -l < "$FILE_LIST") + +echo "Initial file list created: $TOTAL_FILES files found." + +# Check for incomplete azcopy jobs +PENDING_JOB=$(azcopy jobs list --with-status=InProgress | awk '/JobId:/ {print $2; exit}') + +if [[ -n "$PENDING_JOB" ]]; then + echo "Resuming previous job: $PENDING_JOB" + azcopy jobs resume "$PENDING_JOB" & + AZCOPY_PID=$! +else + # Run azcopy in the background for a new sync + if [[ "$LOGGING" == "true" ]]; then + echo "Running: azcopy sync \"$SOURCE_DIR\" \"$AZURE_URL?$AZURE_SAS\" --recursive --cap-mbps $BANDWIDTH_CAP" | tee -a "$LOG_FILE" + azcopy sync "$SOURCE_DIR" "$AZURE_URL?$AZURE_SAS" --recursive --cap-mbps "$BANDWIDTH_CAP" | tee -a "$LOG_FILE" & + else + azcopy sync "$SOURCE_DIR" "$AZURE_URL?$AZURE_SAS" --recursive --cap-mbps "$BANDWIDTH_CAP" > /dev/null 2>&1 & + fi + AZCOPY_PID=$! +fi + +echo "azcopy started with PID $AZCOPY_PID" + +# Monitor the process +while kill -0 $AZCOPY_PID 2>/dev/null; do + if is_business_hours; then + echo -e "\nBusiness hours started! Stopping azcopy..." + kill $AZCOPY_PID + wait $AZCOPY_PID 2>/dev/null # Ensure the process is fully stopped + INTERRUPTED=true + break + fi + sleep 30 # Check every 30 seconds +done + +# Generate completion report +if [[ "$INTERRUPTED" == "true" ]]; then + STATUS="Interrupted due to business hours, can be resumed" +else + STATUS="Completed normally" +fi + +# Extract job summary from log (only if logging is enabled) +if [[ "$LOGGING" == "true" && -f "$LOG_FILE" ]]; then + COMPLETED_FILES=$(grep -oP 'Number of Copy Transfers Completed:\s+\K\d+' "$LOG_FILE" | tail -n1) + FAILED_FILES=$(grep -oP 'Number of Copy Transfers Failed:\s+\K\d+' "$LOG_FILE" | tail -n1) + TOTAL_FILES=$(grep -oP 'Total Number of Copy Transfers:\s+\K\d+' "$LOG_FILE" | tail -n1) + + # If values are empty, fallback to 0 + COMPLETED_FILES=${COMPLETED_FILES:-0} + FAILED_FILES=${FAILED_FILES:-0} + TOTAL_FILES=${TOTAL_FILES:-$COMPLETED_FILES} + + # Calculate percentage + if [[ "$TOTAL_FILES" -eq 0 ]]; then + PERCENT_COMPLETE=0 + else + PERCENT_COMPLETE=$(( COMPLETED_FILES * 100 / TOTAL_FILES )) + fi +else + PERCENT_COMPLETE=0 + COMPLETED_FILES=0 + FAILED_FILES=0 + TOTAL_FILES=0 +fi + +echo "Sync Status: $STATUS" > "$COMPLETION_REPORT" +echo "Total Files: $TOTAL_FILES" >> "$COMPLETION_REPORT" +echo "Completed Files: $COMPLETED_FILES" >> "$COMPLETION_REPORT" +echo "Percentage Completed: $PERCENT_COMPLETE%" >> "$COMPLETION_REPORT" + +echo "Completion report generated: $COMPLETION_REPORT" + +# Remove lock file when done +rm -f "$LOCK_FILE" + +exit 0 + diff --git a/tests.md b/tests.md new file mode 100644 index 0000000..77f8253 --- /dev/null +++ b/tests.md @@ -0,0 +1,83 @@ +#1st test. + +Just run. + +Detected busines hours and stopped. + +--- +[test@alma-azure-test AZURE]$ ./run_azcopy.sh /opt/AZURE/ true 20 +Business hours detected (9:00 - 17:00). Exiting... + +#2nd test. + +Changed busines hours to something outside the time of test. To se if it operates as expected. 13 to 16 + +Started sync just before 13 to se if a file gets copied to blob and if the bandwith limit works and that the script gets stopped before finishing becouse going over after 13 + +--- +[test@alma-azure-test AZURE]$ ./run_azcopy.sh /opt/AZURE/ true 10 & +[2] 8109 +[test@alma-azure-test AZURE]$ Initial file list created: 8 files found. +Running: azcopy sync "/opt/AZURE/" "https://115.blob.core.windows.net/115?sv=2022-11-02&ss=bfqt&srt=sco&sp=rwlacupitfx&se=2025-02-20T18:39:49Z&st=2025-02-19T10:39:49Z&spr=https&sig=xxxx" --recursive --cap-mbps 10 & +azcopy started with PID 8128 +Error: 2 arguments source and destination are required for this command. Number of commands passed 3 + +Issues going to background. + +#3rd test. + +Amended the way fucnion calls azcopy to beter handle backgrouind tasks. Moved some files out of directory to sync in order to do basic sync test. + +--- +[test@alma-azure-test AZURE]$ ./run_azcopy.sh /opt/AZURE/BLOB/ true 20 +Initial file list created: 1 files found. +Running: azcopy sync "/opt/AZURE/BLOB/" "https://115.blob.core.windows.net/115?sv=2022-11-02&ss=bfqt&srt=sco&sp=rwlacupitfx&se=2025-02-20T18:39:49Z&st=2025-02-19T10:39:49Z&spr=https&sig=xxxx" --recursive --cap-mbps 20 +azcopy started with PID 8493 +INFO: Any empty folders will not be processed, because source and/or destination doesn't have full folder support + +Job 9692ef9e-3872-f64c-5dbb-6f8fd8ad220e has started +Log file is located at: /home/test/.azcopy/9692ef9e-3872-f64c-5dbb-6f8fd8ad220e.log + +100.0 %, 1 Done, 0 Failed, 0 Pending, 1 Total, 2-sec Throughput (Mb/s): 9.1727 + +Job 9692ef9e-3872-f64c-5dbb-6f8fd8ad220e Summary +Files Scanned at Source: 1 +Files Scanned at Destination: 2 +Elapsed Time (Minutes): 4.4351 +Number of Copy Transfers for Files: 1 +Number of Copy Transfers for Folder Properties: 0 +Total Number of Copy Transfers: 1 +Number of Copy Transfers Completed: 1 +Number of Copy Transfers Failed: 0 +Number of Deletions at Destination: 0 +Total Number of Bytes Transferred: 662880628 +Total Number of Bytes Enumerated: 662880628 +Final Job Status: Completed + +Completion report generated: completion_report_20250220_132836.txt + +#4th test. + +Changed busines hours to test localy second time going into the restricted window to check if it will stopp the process. Window 14-15 so it should stop at 14 and then resume at 15. +We will use local execution to test if it stopps and then will test cron if it resumes as it should. + +--- +[test@alma-azure-test AZURE]$ ./run_azcopy.sh /opt/AZURE/BLOB/ true 20 +Initial file list created: 4 files found. +Running: azcopy sync "/opt/AZURE/BLOB/" "https://115.blob.core.windows.net/115?sv=2022-11-02&ss=bfqt&srt=sco&sp=rwlacupitfx&se=2025-02-20T18:39:49Z&st=2025-02-19T10:39:49Z&spr=https&sig=xxxx" --recursive --cap-mbps 20 +azcopy started with PID 8654 +INFO: Any empty folders will not be processed, because source and/or destination doesn't have full folder support + +Job 13c1945e-3c6e-dd42-49c0-2c6722ced7ec has started +Log file is located at: /home/test/.azcopy/13c1945e-3c6e-dd42-49c0-2c6722ced7ec.log + +54.7 %, 1 Done, 0 Failed, 2 Pending, 3 Total, 2-sec Throughput (Mb/s): 20.1759Business hours started! Stopping azcopy... +./run_azcopy.sh: line 95: COMPLETED_FILES * 100 / TOTAL_FILES : division by 0 (error token is "TOTAL_FILES ") +Completion report generated: completion_report_20250220_135033.txt + +#5th test. + +Changed edge case were 0 files were transfered or incorectly processed from the log creating a division by 0 error. added check if it is running still to ensure we do not spawn many proccesses. +It should start via cron at 15 and then be re run every hour untill 7Am where it should detect restricted business hours window and do nothing. + +---