138 lines
5.5 KiB
Python
138 lines
5.5 KiB
Python
import json
|
|
import threading
|
|
from http.server import HTTPServer, BaseHTTPRequestHandler
|
|
from datetime import datetime, timezone
|
|
import logging
|
|
import time
|
|
|
|
from config import HEALTH_CHECK_PORT, HEALTH_CHECK_ENABLED
|
|
|
|
class HealthCheckHandler(BaseHTTPRequestHandler):
|
|
def do_GET(self):
|
|
if self.path == '/health':
|
|
self.send_health_response()
|
|
elif self.path == '/sensors':
|
|
self.send_sensors_status()
|
|
else:
|
|
self.send_response(404)
|
|
self.end_headers()
|
|
|
|
def send_health_response(self):
|
|
"""Send basic health check response"""
|
|
try:
|
|
# Get basic application status
|
|
from sensor_tracker import get_sensor_tracker
|
|
sensor_tracker = get_sensor_tracker()
|
|
summary = sensor_tracker.get_summary()
|
|
|
|
health_data = {
|
|
"status": "healthy",
|
|
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
"service": "modbus-mqtt-bridge",
|
|
"version": "1.0.0",
|
|
"sensors": {
|
|
"total": summary.get('total_sensors', 0),
|
|
"online": summary.get('online_sensors', 0),
|
|
"health_percentage": summary.get('health_percentage', 0.0)
|
|
}
|
|
}
|
|
|
|
# Consider the service unhealthy if no sensors are working
|
|
if summary.get('total_sensors', 0) > 0 and summary.get('online_sensors', 0) == 0:
|
|
# If we have sensors configured but none are online, report as degraded but still healthy
|
|
# (since the health check server itself is working)
|
|
health_data["status"] = "degraded"
|
|
health_data["message"] = "All sensors offline"
|
|
|
|
except Exception as e:
|
|
# If we can't get sensor status, still report as healthy since the service is running
|
|
logging.warning(f"Could not get sensor status for health check: {e}")
|
|
health_data = {
|
|
"status": "healthy",
|
|
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
"service": "modbus-mqtt-bridge",
|
|
"version": "1.0.0",
|
|
"message": "Service running, sensor status unavailable"
|
|
}
|
|
|
|
self.send_response(200)
|
|
self.send_header('Content-type', 'application/json')
|
|
self.end_headers()
|
|
self.wfile.write(json.dumps(health_data, indent=2).encode())
|
|
|
|
def send_sensors_status(self):
|
|
"""Send detailed sensor status"""
|
|
# Get sensor status from the global sensor tracker
|
|
from sensor_tracker import get_all_sensor_status
|
|
|
|
sensors_status = get_all_sensor_status()
|
|
|
|
self.send_response(200)
|
|
self.send_header('Content-type', 'application/json')
|
|
self.end_headers()
|
|
self.wfile.write(json.dumps(sensors_status, indent=2).encode())
|
|
|
|
def log_message(self, format, *args):
|
|
"""Override to use our logging system"""
|
|
logging.info(f"Health Check - {format % args}")
|
|
|
|
class HealthCheckServer:
|
|
def __init__(self, port=HEALTH_CHECK_PORT):
|
|
self.port = port
|
|
self.server = None
|
|
self.thread = None
|
|
|
|
def start(self):
|
|
"""Start the health check server in a separate thread"""
|
|
if not HEALTH_CHECK_ENABLED:
|
|
logging.info("Health check server is disabled")
|
|
return
|
|
|
|
try:
|
|
logging.info(f"Attempting to start health check server on port {self.port}")
|
|
# Bind to all interfaces to make it accessible from outside container
|
|
self.server = HTTPServer(('0.0.0.0', self.port), HealthCheckHandler)
|
|
|
|
# Test if the server can actually bind to the port
|
|
logging.info(f"Successfully bound to 0.0.0.0:{self.port}")
|
|
|
|
self.thread = threading.Thread(target=self._serve_with_error_handling, daemon=True)
|
|
self.thread.start()
|
|
|
|
# Give the server a moment to start
|
|
time.sleep(0.5)
|
|
|
|
logging.info(f"Health check server started on 0.0.0.0:{self.port}")
|
|
logging.info(f"Health check endpoints:")
|
|
logging.info(f" - http://0.0.0.0:{self.port}/health")
|
|
logging.info(f" - http://0.0.0.0:{self.port}/sensors")
|
|
logging.info("Health check server is ready for external health checks")
|
|
|
|
except OSError as e:
|
|
if e.errno == 98: # Address already in use
|
|
logging.error(f"Port {self.port} is already in use. Cannot start health check server.")
|
|
else:
|
|
logging.error(f"OS error starting health check server: {e}")
|
|
raise e
|
|
except Exception as e:
|
|
logging.error(f"Failed to start health check server: {e}")
|
|
raise e # Re-raise to make the issue visible
|
|
|
|
def _serve_with_error_handling(self):
|
|
"""Serve forever with error handling"""
|
|
try:
|
|
logging.info("Health check server thread started, beginning to serve requests")
|
|
if self.server:
|
|
self.server.serve_forever()
|
|
else:
|
|
logging.error("Health check server is None, cannot serve requests")
|
|
except Exception as e:
|
|
logging.error(f"Health check server error: {e}", exc_info=True)
|
|
|
|
def stop(self):
|
|
"""Stop the health check server"""
|
|
if self.server:
|
|
self.server.shutdown()
|
|
self.server.server_close()
|
|
logging.info("Health check server stopped")
|