131 lines
4.0 KiB
HCL
131 lines
4.0 KiB
HCL
job "poe-sensor" {
|
|
region = "global"
|
|
datacenters = ["hs"]
|
|
type = "service"
|
|
namespace = "production"
|
|
meta {
|
|
version = "20250617"
|
|
}
|
|
|
|
group "sensor-bridge" {
|
|
count = 1
|
|
|
|
# Network configuration - using bridge mode with port mapping for better isolation
|
|
network {
|
|
mode = "bridge"
|
|
port "health" {
|
|
static = 8080
|
|
to = 8080
|
|
}
|
|
}
|
|
|
|
# Restart policy - more lenient for startup issues
|
|
restart {
|
|
attempts = 5
|
|
interval = "30m"
|
|
delay = "30s"
|
|
mode = "fail"
|
|
}
|
|
|
|
# Update strategy
|
|
update {
|
|
max_parallel = 1
|
|
min_healthy_time = "120s" # Increased from 60s
|
|
healthy_deadline = "10m" # Increased from 5m
|
|
progress_deadline = "15m" # Increased from 10m
|
|
auto_revert = true
|
|
canary = 0
|
|
}
|
|
|
|
service {
|
|
name = "${NOMAD_JOB_NAME}"
|
|
port = "health"
|
|
tags = [
|
|
"sensor",
|
|
"modbus",
|
|
"mqtt",
|
|
"iot",
|
|
"health-check"
|
|
]
|
|
|
|
check {
|
|
type = "http"
|
|
path = "/health"
|
|
interval = "30s" # Reduced frequency
|
|
timeout = "30s" # Increased timeout
|
|
initial_status = "critical" # Start as critical until proven healthy
|
|
check_restart {
|
|
limit = 3
|
|
grace = "30s" # More time for graceful shutdown
|
|
}
|
|
}
|
|
}
|
|
|
|
task "poe-sensor" {
|
|
driver = "docker"
|
|
|
|
config {
|
|
image = "registry.dev.meisheng.group/ms_qc_db:20250409"
|
|
command = "/bin/bash"
|
|
args = [
|
|
"-c",
|
|
<<EOF
|
|
cd local/poe-sensor &&
|
|
echo "Starting POE Sensor installation..." &&
|
|
apt-get update -qq &&
|
|
apt-get install -y procps curl &&
|
|
python -m pip install --upgrade pip &&
|
|
echo "Installing Python dependencies..." &&
|
|
python -m pip install -r requirements.txt &&
|
|
echo "Testing dependencies..." &&
|
|
python -c 'import pymodbus, paho.mqtt.client; print("Dependencies installed successfully")' &&
|
|
echo "Starting health check server..." &&
|
|
python -c 'from health_check import HealthCheckServer; import time; server = HealthCheckServer(); server.start(); time.sleep(2); print("Health check server started")' &
|
|
echo "Starting main application..." &&
|
|
python main.py
|
|
EOF
|
|
]
|
|
}
|
|
|
|
# Git artifact - using SSH similar to qc-scanner
|
|
artifact {
|
|
source = "git::ssh://git@gitea.service.mesh:2222/Mei_Sheng_Textiles/POE-sensor.git"
|
|
destination = "local/poe-sensor"
|
|
options {
|
|
ref = "main"
|
|
sshkey = "LS0tLS1CRUdJTiBPUEVOU1NIIFBSSVZBVEUgS0VZLS0tLS0KYjNCbGJuTnphQzFyWlhrdGRqRUFBQUFBQkc1dmJtVUFBQUFFYm05dVpRQUFBQUFBQUFBQkFBQUFNd0FBQUF0emMyZ3RaVwpReU5UVXhPUUFBQUNEbWF6M0ZWdlE1YTRaalY4dUdobENleEFjN0VxbmVVN0FETnFBSXg0cUI4d0FBQUpqNGlkSVcrSW5TCkZnQUFBQXR6YzJndFpXUXlOVFV4T1FBQUFDRG1hejNGVnZRNWE0WmpWOHVHaGxDZXhBYzdFcW5lVTdBRE5xQUl4NHFCOHcKQUFBRURpRXM1ejJRb2dTempvVzdDUnZ3U2RONUpVMTNmZm14cnFIQjNOS3hXUmp1WnJQY1ZXOURscmhtTlh5NGFHVUo3RQpCenNTcWQ1VHNBTTJvQWpIaW9IekFBQUFFbUpoYmk1dVpFQnRjM1JsZUhadUxtTnZiUUVDQXc9PQotLS0tLUVORCBPUEVOU1NIIFBSSVZBVEUgS0VZLS0tLS0K"
|
|
}
|
|
}
|
|
|
|
# Environment variables
|
|
env {
|
|
LOG_LEVEL = "INFO"
|
|
PYTHONUNBUFFERED = "1"
|
|
PYTHONDONTWRITEBYTECODE = "1" # Prevent .pyc files to save memory
|
|
PYTHONMALLOC = "malloc" # Use system malloc for better memory management
|
|
TZ = "Asia/Ho_Chi_Minh"
|
|
# MQTT configuration (can be overridden by config.py)
|
|
MQTT_BROKER = "mqtt.service.mesh"
|
|
MQTT_PORT = "1883"
|
|
MQTT_USERNAME = "relay"
|
|
MQTT_PASSWORD = "Sey@K9c&Q4^"
|
|
# Health check configuration
|
|
HEALTH_CHECK_ENABLED = "true"
|
|
HEALTH_CHECK_PORT = "8080"
|
|
}
|
|
|
|
# Resource allocation - increased for stability
|
|
resources {
|
|
cpu = 512 # Increased from 256
|
|
memory = 1024 # Increased from 512
|
|
}
|
|
|
|
# Logs configuration
|
|
logs {
|
|
max_files = 10
|
|
max_file_size = 20
|
|
}
|
|
}
|
|
}
|
|
}
|