Refactor health check functionality in POE project. Removed readiness endpoint from health_check.py and improved error handling for sensor status retrieval. Updated logging to reduce noise and adjusted health check server startup process in main.py. Modified Nomad job configuration for network mode and resource allocation, enhancing overall system performance and stability.

This commit is contained in:
Naab2k3
2025-06-23 13:46:59 +07:00
parent 8991a02bf5
commit 9b0f4f6236
4 changed files with 148 additions and 339 deletions

View File

@ -10,29 +10,28 @@ job "poe-sensor" {
group "sensor-bridge" {
count = 1
# Network configuration - using bridge mode with port mapping for better isolation
# Network configuration - using host mode for better compatibility
network {
mode = "bridge"
mode = "host"
port "health" {
static = 8080
to = 8080
}
}
# Restart policy - more lenient for startup issues
# Restart policy
restart {
attempts = 5
attempts = 3
interval = "30m"
delay = "30s"
delay = "15s"
mode = "fail"
}
# Update strategy
update {
max_parallel = 1
min_healthy_time = "120s" # Increased from 60s
healthy_deadline = "10m" # Increased from 5m
progress_deadline = "15m" # Increased from 10m
min_healthy_time = "90s"
healthy_deadline = "8m"
progress_deadline = "12m"
auto_revert = true
canary = 0
}
@ -51,12 +50,12 @@ job "poe-sensor" {
check {
type = "http"
path = "/health"
interval = "30s" # Reduced frequency
timeout = "30s" # Increased timeout
initial_status = "critical" # Start as critical until proven healthy
interval = "30s"
timeout = "20s"
initial_status = "critical"
check_restart {
limit = 3
grace = "30s" # More time for graceful shutdown
limit = 2
grace = "20s"
}
}
}
@ -69,25 +68,11 @@ job "poe-sensor" {
command = "/bin/bash"
args = [
"-c",
<<EOF
cd local/poe-sensor &&
echo "Starting POE Sensor installation..." &&
apt-get update -qq &&
apt-get install -y procps curl &&
python -m pip install --upgrade pip &&
echo "Installing Python dependencies..." &&
python -m pip install -r requirements.txt &&
echo "Testing dependencies..." &&
python -c 'import pymodbus, paho.mqtt.client; print("Dependencies installed successfully")' &&
echo "Starting health check server..." &&
python -c 'from health_check import HealthCheckServer; import time; server = HealthCheckServer(); server.start(); time.sleep(2); print("Health check server started")' &
echo "Starting main application..." &&
python main.py
EOF
"cd local/poe-sensor && echo 'Starting POE Sensor...' && apt-get update -qq && apt-get install -y procps curl && python -m pip install --upgrade pip && python -m pip install -r requirements.txt && echo 'Dependencies installed' && python -c 'import pymodbus, paho.mqtt.client; print(\"Dependencies OK\")' && echo 'Starting application...' && python main.py"
]
}
# Git artifact - using SSH similar to qc-scanner
# Git artifact
artifact {
source = "git::ssh://git@gitea.service.mesh:2222/Mei_Sheng_Textiles/POE-sensor.git"
destination = "local/poe-sensor"
@ -101,23 +86,21 @@ EOF
env {
LOG_LEVEL = "INFO"
PYTHONUNBUFFERED = "1"
PYTHONDONTWRITEBYTECODE = "1" # Prevent .pyc files to save memory
PYTHONMALLOC = "malloc" # Use system malloc for better memory management
PYTHONDONTWRITEBYTECODE = "1"
PYTHONMALLOC = "malloc"
TZ = "Asia/Ho_Chi_Minh"
# MQTT configuration (can be overridden by config.py)
MQTT_BROKER = "mqtt.service.mesh"
MQTT_PORT = "1883"
MQTT_USERNAME = "relay"
MQTT_PASSWORD = "Sey@K9c&Q4^"
# Health check configuration
HEALTH_CHECK_ENABLED = "true"
HEALTH_CHECK_PORT = "8080"
}
# Resource allocation - increased for stability
# Resource allocation
resources {
cpu = 512 # Increased from 256
memory = 1024 # Increased from 512
cpu = 256
memory = 512
}
# Logs configuration