#!/bin/bash # System Health Monitor - per individuare cause di crash LOG_FILE="$HOME/system_health_$(date +%Y%m%d_%H%M%S).log" echo "========================================" echo "System Health Monitoring Started" echo "Log file: $LOG_FILE" echo "Press Ctrl+C to stop" echo "========================================" echo "" # Header del log { echo "System Health Monitor - Started at $(date)" echo "==========================================" echo "" } > "$LOG_FILE" # Funzione per loggare log_status() { TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S') # Temperature TEMPS=$(sensors 2>/dev/null | grep -E "Core|Package" | head -5) # Memory MEM=$(free -h | grep "Mem:") # Load average LOAD=$(uptime | awk -F'load average:' '{print $2}') # EDAC errors CE=$(cat /sys/devices/system/edac/mc/mc0/ce_count 2>/dev/null || echo "N/A") UE=$(cat /sys/devices/system/edac/mc/mc0/ue_count 2>/dev/null || echo "N/A") # Top CPU processes TOP_CPU=$(ps aux --sort=-%cpu | head -6 | tail -5 | awk '{printf "%s(%s%%) ", $11, $3}') # Top MEM processes TOP_MEM=$(ps aux --sort=-%mem | head -6 | tail -5 | awk '{printf "%s(%s%%) ", $11, $4}') { echo "[$TIMESTAMP]" echo " Temperatures:" echo "$TEMPS" | sed 's/^/ /' echo " Memory: $MEM" echo " Load Average:$LOAD" echo " EDAC Errors: CE=$CE UE=$UE" echo " Top CPU: $TOP_CPU" echo " Top MEM: $TOP_MEM" echo "" } >> "$LOG_FILE" # Output su schermo (più compatto) MAX_TEMP=$(echo "$TEMPS" | grep -oP '\+\K[0-9]+' | sort -nr | head -1) echo "[$TIMESTAMP] Temp: ${MAX_TEMP}°C | Load:$LOAD | Err: CE=$CE UE=$UE" # Allarme se temperatura troppo alta if [ "$MAX_TEMP" -gt 85 ]; then echo "⚠️ WARNING: Temperature over 85°C!" notify-send -u critical "System Overheat" "CPU Temperature: ${MAX_TEMP}°C" 2>/dev/null fi # Allarme se errori memoria if [ "$CE" != "0" ] || [ "$UE" != "0" ]; then echo "⚠️ WARNING: Memory errors detected! CE=$CE UE=$UE" notify-send -u critical "Memory Errors" "Correctable: $CE, Uncorrectable: $UE" 2>/dev/null fi } # Loop principale - campiona ogni 5 secondi while true; do log_status sleep 5 done