diff --git a/README.MD b/README.MD index 7e76611..4f0f943 100644 --- a/README.MD +++ b/README.MD @@ -57,7 +57,7 @@ tail -f /var/log/hdd_temp_monitor.log |-----------------------|-----------------------------------------------------------------|-----------------------------------------------| | `MAX_TEMP` | Maximum allowed temperature (°C) before starting shutdown count | `60` | | `HOT_DURATION` | Consecutive minutes above `MAX_TEMP` before shutdown | `5` | -| `COOL_DURATION` | Consecutive minutes below `MAX_TEMP` required to reset counter | `5` | +| `COOL_RESET_DURATION` | Consecutive minutes below `MAX_TEMP` to reset all counters | `5` | | `LOG_FILE` | Path to the main log file | `/var/log/hdd_temp_monitor.log` | | `LOG_ROTATE_COUNT` | Number of log files to keep | `7` | | `LOG_ROTATE_PERIOD` | Rotation period for logs (`daily` or `weekly`) | `daily` | diff --git a/sh/hotdisk.sh b/sh/hotdisk.sh index d19960e..9c6795a 100644 --- a/sh/hotdisk.sh +++ b/sh/hotdisk.sh @@ -1,36 +1,74 @@ #!/bin/bash # HotDisk: Monitor SATA disk temperature and notify via Discord +set -euo pipefail + CONF_FILE="/etc/hdd_temp_monitor.conf" -STATE_FILE="/tmp/hdd_temp_state.txt" +STATE_FILE="/tmp/hdd_temp_state" + +# Check if configuration file exists +if [[ ! -f "$CONF_FILE" ]]; then + echo "ERROR: Configuration file $CONF_FILE not found!" >&2 + exit 1 +fi + source "$CONF_FILE" + +# Validate required variables +for var in MAX_TEMP HOT_DURATION COOL_RESET_DURATION LOG_FILE DISCORD_WEBHOOK; do + if [[ -z "${!var:-}" ]]; then + echo "ERROR: Required variable $var not set in $CONF_FILE" >&2 + exit 1 + fi +done DISKS=$(lsblk -dno NAME,TYPE | awk '$2=="disk"{print $1}' | grep -v '^nvme') +if [[ -z "$DISKS" ]]; then + echo "WARNING: No SATA disks found to monitor" >&2 + exit 0 +fi if [ ! -f "$STATE_FILE" ]; then touch "$STATE_FILE"; fi declare -A HOT_COUNTERS declare -A COOL_COUNTERS while read -r line; do - disk=$(echo "$line" | cut -d= -f1) - val=$(echo "$line" | cut -d= -f2) - HOT_COUNTERS[$disk]=$val + [[ -z "$line" || "$line" =~ ^# ]] && continue + if [[ "$line" =~ ^(.+)_HOT=(.+)$ ]]; then + HOT_COUNTERS[${BASH_REMATCH[1]}]=${BASH_REMATCH[2]} + elif [[ "$line" =~ ^(.+)_COOL=(.+)$ ]]; then + COOL_COUNTERS[${BASH_REMATCH[1]}]=${BASH_REMATCH[2]} + fi done < "$STATE_FILE" for disk in $DISKS; do - temp=$(smartctl -A /dev/$disk | awk '/Temperature_Celsius/ {print $10; exit}') - [ -z "$temp" ] && continue + # Get temperature with error handling + if ! temp=$(smartctl -A /dev/$disk 2>/dev/null | awk '/Temperature_Celsius/ {print $10; exit}'); then + echo "WARNING: Failed to read temperature for $disk" >&2 + continue + fi + + # Skip if temperature is empty or not numeric + if [[ -z "$temp" ]] || ! [[ "$temp" =~ ^[0-9]+$ ]]; then + continue + fi hot=${HOT_COUNTERS[$disk]:-0} cool=${COOL_COUNTERS[$disk]:-0} if [ "$temp" -ge "$MAX_TEMP" ]; then hot=$((hot+1)) cool=0 - curl -s -X POST -H "Content-Type: application/json" -d "{\"content\":\"🔥 Warning: $disk is above $MAX_TEMP°C for $hot minute(s)\"}" "$DISCORD_WEBHOOK" + if ! curl -s -X POST -H "Content-Type: application/json" -d "{\"content\":\"🔥 Warning: $disk is above $MAX_TEMP°C for $hot minute(s)\"}" "$DISCORD_WEBHOOK" >/dev/null 2>&1; then + echo "WARNING: Failed to send Discord notification for $disk" >&2 + fi if [ "$hot" -ge "$HOT_DURATION" ]; then - curl -s -X POST -H "Content-Type: application/json" -d "{\"content\":\"⚠️ Critical: $disk has been above $MAX_TEMP°C for $HOT_DURATION minutes. Shutting down...\"}" "$DISCORD_WEBHOOK" + if ! curl -s -X POST -H "Content-Type: application/json" -d "{\"content\":\"⚠️ Critical: $disk has been above $MAX_TEMP°C for $HOT_DURATION minutes. Shutting down...\"}" "$DISCORD_WEBHOOK" >/dev/null 2>&1; then + echo "WARNING: Failed to send critical Discord notification for $disk" >&2 + fi sleep 5 shutdown -h now fi else if [ "$hot" -gt 0 ]; then cool=$((cool+1)) - curl -s -X POST -H "Content-Type: application/json" -d "{\"content\":\"❄️ Notice: $disk is under $MAX_TEMP°C for $cool minute(s)\"}" "$DISCORD_WEBHOOK" - if [ "$cool" -ge "$COOL_DURATION" ]; then + if ! curl -s -X POST -H "Content-Type: application/json" -d "{\"content\":\"❄️ Notice: $disk is under $MAX_TEMP°C for $cool minute(s)\"}" "$DISCORD_WEBHOOK" >/dev/null 2>&1; then + echo "WARNING: Failed to send cool-down Discord notification for $disk" >&2 + fi + if [ "$cool" -ge "$COOL_RESET_DURATION" ]; then hot=0 cool=0 fi @@ -38,9 +76,31 @@ for disk in $DISKS; do fi HOT_COUNTERS[$disk]=$hot COOL_COUNTERS[$disk]=$cool - echo "$(date '+%Y-%m-%d %H:%M:%S') $disk $temp°C" >> "$LOG_FILE" -done -> "$STATE_FILE" -for disk in "${!HOT_COUNTERS[@]}"; do - echo "$disk=${HOT_COUNTERS[$disk]}" >> "$STATE_FILE" + + # Ensure log directory exists and log the temperature + LOG_DIR=$(dirname "$LOG_FILE") + if [[ ! -d "$LOG_DIR" ]]; then + mkdir -p "$LOG_DIR" 2>/dev/null || echo "WARNING: Cannot create log directory $LOG_DIR" >&2 + fi + echo "$(date '+%Y-%m-%d %H:%M:%S') $disk $temp°C" >> "$LOG_FILE" 2>/dev/null || echo "WARNING: Cannot write to log file $LOG_FILE" >&2 done + +# Atomic state file update - write to temp file then move +TEMP_STATE_FILE="${STATE_FILE}.tmp.$$" +{ + for disk in "${!HOT_COUNTERS[@]}"; do + echo "${disk}_HOT=${HOT_COUNTERS[$disk]}" + done + for disk in "${!COOL_COUNTERS[@]}"; do + echo "${disk}_COOL=${COOL_COUNTERS[$disk]}" + done +} > "$TEMP_STATE_FILE" + +# Atomic move - this operation is atomic on most filesystems +if mv "$TEMP_STATE_FILE" "$STATE_FILE" 2>/dev/null; then + : # Success - do nothing +else + echo "WARNING: Failed to update state file atomically" >&2 + # Cleanup temp file if move failed + rm -f "$TEMP_STATE_FILE" 2>/dev/null || true +fi diff --git a/sh/hotdisk_curl_install.sh b/sh/hotdisk_curl_install.sh index a430639..acc836d 100644 --- a/sh/hotdisk_curl_install.sh +++ b/sh/hotdisk_curl_install.sh @@ -1,11 +1,24 @@ #!/bin/bash +set -euo pipefail + +# Function to run commands with sudo only if not root +run_as_root() { + if [[ $EUID -eq 0 ]]; then + "$@" + else + sudo "$@" + fi +} + BASE_URL="https://git.djeex.fr/Djeex/hotdisk/raw/branch/main/sh" SCRIPTS=("hotdisk.sh" "hotdisk_logger.sh" "install_hotdisk.sh") -sudo apt update -sudo apt install -y smartmontools curl -sudo mkdir -p /usr/local/bin +run_as_root mkdir -p /usr/local/bin for script in "${SCRIPTS[@]}"; do - sudo curl -fsSL "$BASE_URL/$script" -o "/usr/local/bin/$script" - sudo chmod +x "/usr/local/bin/$script" + echo "Downloading $script..." + if ! run_as_root curl -fsSL "$BASE_URL/$script" -o "/usr/local/bin/$script"; then + echo "ERROR: Failed to download $script" >&2 + exit 1 + fi + run_as_root chmod +x "/usr/local/bin/$script" done -sudo /usr/local/bin/install_hotdisk.sh +run_as_root /usr/local/bin/install_hotdisk.sh diff --git a/sh/hotdisk_logger.sh b/sh/hotdisk_logger.sh index d434d51..328a572 100644 --- a/sh/hotdisk_logger.sh +++ b/sh/hotdisk_logger.sh @@ -1,8 +1,32 @@ #!/bin/bash +set -euo pipefail + +# Function to run commands with sudo only if not root +run_as_root() { + if [[ $EUID -eq 0 ]]; then + "$@" + else + sudo "$@" + fi +} + CONF_FILE="/etc/hdd_temp_monitor.conf" +if [[ ! -f "$CONF_FILE" ]]; then + echo "ERROR: Configuration file $CONF_FILE not found!" >&2 + exit 1 +fi source "$CONF_FILE" + +# Validate required variables +for var in LOG_FILE LOG_ROTATE_PERIOD LOG_ROTATE_COUNT; do + if [[ -z "${!var:-}" ]]; then + echo "ERROR: Required variable $var not set in $CONF_FILE" >&2 + exit 1 + fi +done + LOGROTATE_FILE="/etc/logrotate.d/hotdisk" -sudo tee "$LOGROTATE_FILE" > /dev/null < /dev/null </dev/null 2>&1; then MISSING+=("$cmd"); fi done + +# Only check for sudo if not running as root +if [[ $EUID -ne 0 ]] && ! command -v sudo >/dev/null 2>&1; then + MISSING+=("sudo") +fi + if [ ${#MISSING[@]} -ne 0 ]; then echo "❌ Missing dependencies:" for cmd in "${MISSING[@]}"; do echo " - $cmd"; done @@ -17,49 +36,81 @@ fi echo "✅ All dependencies are installed." read -p "Maximum temperature (°C) before shutdown [60]: " MAX_TEMP MAX_TEMP=${MAX_TEMP:-60} +if ! [[ "$MAX_TEMP" =~ ^[0-9]+$ ]] || [[ $MAX_TEMP -lt 1 || $MAX_TEMP -gt 100 ]]; then + echo "ERROR: MAX_TEMP must be a number between 1-100" >&2 + exit 1 +fi + read -p "Consecutive minutes above MAX_TEMP before shutdown [5]: " HOT_DURATION HOT_DURATION=${HOT_DURATION:-5} -read -p "Consecutive minutes below MAX_TEMP to reset counter [5]: " COOL_DURATION -COOL_DURATION=${COOL_DURATION:-5} +if ! [[ "$HOT_DURATION" =~ ^[0-9]+$ ]] || [[ $HOT_DURATION -lt 1 ]]; then + echo "ERROR: HOT_DURATION must be a positive number" >&2 + exit 1 +fi + +read -p "Minutes below MAX_TEMP to reset all counters [5]: " COOL_RESET_DURATION +COOL_RESET_DURATION=${COOL_RESET_DURATION:-5} +if ! [[ "$COOL_RESET_DURATION" =~ ^[0-9]+$ ]] || [[ $COOL_RESET_DURATION -lt 1 ]]; then + echo "ERROR: COOL_RESET_DURATION must be a positive number" >&2 + exit 1 +fi read -p "Log file path [/var/log/hdd_temp_monitor.log]: " LOG_FILE LOG_FILE=${LOG_FILE:-/var/log/hdd_temp_monitor.log} read -p "Logrotate: number of files to keep [7]: " LOG_ROTATE_COUNT LOG_ROTATE_COUNT=${LOG_ROTATE_COUNT:-7} +if ! [[ "$LOG_ROTATE_COUNT" =~ ^[0-9]+$ ]] || [[ $LOG_ROTATE_COUNT -lt 1 ]]; then + echo "ERROR: LOG_ROTATE_COUNT must be a positive number" >&2 + exit 1 +fi + read -p "Logrotate: rotation period (daily/weekly) [daily]: " LOG_ROTATE_PERIOD LOG_ROTATE_PERIOD=${LOG_ROTATE_PERIOD:-daily} +if [[ ! "$LOG_ROTATE_PERIOD" =~ ^(daily|weekly)$ ]]; then + echo "ERROR: LOG_ROTATE_PERIOD must be 'daily' or 'weekly'" >&2 + exit 1 +fi echo "Paste your Discord Webhook URL here." read -p "Discord Webhook URL: " DISCORD_WEBHOOK -[ -z "$DISCORD_WEBHOOK" ] && { echo "Discord Webhook cannot be empty"; exit 1; } +if [[ -z "$DISCORD_WEBHOOK" ]]; then + echo "ERROR: Discord Webhook cannot be empty" >&2 + exit 1 +fi + +# Validate Discord webhook URL format +if [[ ! "$DISCORD_WEBHOOK" =~ ^https://discord(app)?\.com/api/webhooks/ ]]; then + echo "ERROR: Invalid Discord webhook URL format" >&2 + exit 1 +fi echo "" echo "Please confirm:" echo "MAX_TEMP=$MAX_TEMP" echo "HOT_DURATION=$HOT_DURATION" -echo "COOL_DURATION=$COOL_DURATION" +echo "COOL_RESET_DURATION=$COOL_RESET_DURATION" echo "LOG_FILE=$LOG_FILE" echo "LOG_ROTATE_COUNT=$LOG_ROTATE_COUNT" echo "LOG_ROTATE_PERIOD=$LOG_ROTATE_PERIOD" echo "DISCORD_WEBHOOK=$DISCORD_WEBHOOK" read -p "Is this correct? (y/n): " CONFIRM [[ ! "$CONFIRM" =~ ^[Yy]$ ]] && { echo "Aborted"; exit 1; } -sudo tee "$CONFIG_FILE" > /dev/null < /dev/null < /dev/null < /dev/null < /dev/null < /dev/null <