import json import threading from http.server import HTTPServer, BaseHTTPRequestHandler from datetime import datetime, timezone import logging import time from config import HEALTH_CHECK_PORT, HEALTH_CHECK_ENABLED class HealthCheckHandler(BaseHTTPRequestHandler): def do_GET(self): if self.path == '/health': self.send_health_response() elif self.path == '/sensors': self.send_sensors_status() else: self.send_response(404) self.end_headers() def send_health_response(self): """Send basic health check response""" try: # Get basic application status from sensor_tracker import get_sensor_tracker sensor_tracker = get_sensor_tracker() summary = sensor_tracker.get_summary() health_data = { "status": "healthy", "timestamp": datetime.now(timezone.utc).isoformat(), "service": "modbus-mqtt-bridge", "version": "1.0.0", "sensors": { "total": summary.get('total_sensors', 0), "online": summary.get('online_sensors', 0), "health_percentage": summary.get('health_percentage', 0.0) } } # Consider the service unhealthy if no sensors are working if summary.get('total_sensors', 0) > 0 and summary.get('online_sensors', 0) == 0: # If we have sensors configured but none are online, report as degraded but still healthy # (since the health check server itself is working) health_data["status"] = "degraded" health_data["message"] = "All sensors offline" except Exception as e: # If we can't get sensor status, still report as healthy since the service is running logging.warning(f"Could not get sensor status for health check: {e}") health_data = { "status": "healthy", "timestamp": datetime.now(timezone.utc).isoformat(), "service": "modbus-mqtt-bridge", "version": "1.0.0", "message": "Service running, sensor status unavailable" } self.send_response(200) self.send_header('Content-type', 'application/json') self.end_headers() self.wfile.write(json.dumps(health_data, indent=2).encode()) def send_sensors_status(self): """Send detailed sensor status""" # Get sensor status from the global sensor tracker from sensor_tracker import get_all_sensor_status sensors_status = get_all_sensor_status() self.send_response(200) self.send_header('Content-type', 'application/json') self.end_headers() self.wfile.write(json.dumps(sensors_status, indent=2).encode()) def log_message(self, format, *args): """Override to use our logging system""" logging.info(f"Health Check - {format % args}") class HealthCheckServer: def __init__(self, port=HEALTH_CHECK_PORT): self.port = port self.server = None self.thread = None def start(self): """Start the health check server in a separate thread""" if not HEALTH_CHECK_ENABLED: logging.info("Health check server is disabled") return try: logging.info(f"Attempting to start health check server on port {self.port}") # Bind to all interfaces to make it accessible from outside container self.server = HTTPServer(('0.0.0.0', self.port), HealthCheckHandler) # Test if the server can actually bind to the port logging.info(f"Successfully bound to 0.0.0.0:{self.port}") self.thread = threading.Thread(target=self._serve_with_error_handling, daemon=True) self.thread.start() # Give the server a moment to start time.sleep(0.5) logging.info(f"Health check server started on 0.0.0.0:{self.port}") logging.info(f"Health check endpoints:") logging.info(f" - http://0.0.0.0:{self.port}/health") logging.info(f" - http://0.0.0.0:{self.port}/sensors") logging.info("Health check server is ready for external health checks") except OSError as e: if e.errno == 98: # Address already in use logging.error(f"Port {self.port} is already in use. Cannot start health check server.") else: logging.error(f"OS error starting health check server: {e}") raise e except Exception as e: logging.error(f"Failed to start health check server: {e}") raise e # Re-raise to make the issue visible def _serve_with_error_handling(self): """Serve forever with error handling""" try: logging.info("Health check server thread started, beginning to serve requests") if self.server: self.server.serve_forever() else: logging.error("Health check server is None, cannot serve requests") except Exception as e: logging.error(f"Health check server error: {e}", exc_info=True) def stop(self): """Stop the health check server""" if self.server: self.server.shutdown() self.server.server_close() logging.info("Health check server stopped")