import json import threading from http.server import HTTPServer, BaseHTTPRequestHandler from datetime import datetime, timezone import logging import time import socket import os from config import HEALTH_CHECK_PORT, HEALTH_CHECK_ENABLED class SimpleHealthHandler(BaseHTTPRequestHandler): def do_GET(self): """Handle GET requests""" try: if self.path == '/health': self.send_health_response() elif self.path == '/sensors': self.send_sensors_response() elif self.path == '/': self.send_health_response() # Default to health else: self.send_response(404) self.send_header('Content-type', 'text/plain') self.end_headers() self.wfile.write(b'Not Found') except Exception as e: logging.error(f"Error handling request: {e}") self.send_response(500) self.end_headers() def send_health_response(self): """Send simple health response""" try: health_data = { "status": "healthy", "timestamp": datetime.now(timezone.utc).isoformat(), "service": "poe-sensor", "version": "1.0.0" } # Try to get sensor info if available try: from sensor_tracker import get_sensor_tracker tracker = get_sensor_tracker() summary = tracker.get_summary() health_data.update({ "sensors_total": summary.get('total_sensors', 0), "sensors_online": summary.get('online_sensors', 0), "sensors_health": summary.get('health_percentage', 0.0) }) except: health_data["sensors_status"] = "initializing" response = json.dumps(health_data, indent=2) self.send_response(200) self.send_header('Content-type', 'application/json') self.send_header('Content-Length', str(len(response))) self.end_headers() self.wfile.write(response.encode()) except Exception as e: logging.error(f"Error in health response: {e}") self.send_response(500) self.end_headers() def send_sensors_response(self): """Send sensor status response""" try: from sensor_tracker import get_all_sensor_status sensors_data = get_all_sensor_status() response = json.dumps(sensors_data, indent=2) self.send_response(200) self.send_header('Content-type', 'application/json') self.send_header('Content-Length', str(len(response))) self.end_headers() self.wfile.write(response.encode()) except Exception as e: logging.error(f"Error getting sensor status: {e}") error_data = { "error": "Sensor data unavailable", "message": str(e), "timestamp": datetime.now(timezone.utc).isoformat() } response = json.dumps(error_data, indent=2) self.send_response(500) self.send_header('Content-type', 'application/json') self.end_headers() self.wfile.write(response.encode()) def log_message(self, format, *args): """Suppress default logging""" pass class HealthCheckServer: def __init__(self, port=None): self.port = port or int(os.getenv('HEALTH_CHECK_PORT', HEALTH_CHECK_PORT)) self.server = None self.thread = None self.running = False def start(self): """Start the health check server""" if not HEALTH_CHECK_ENABLED: logging.info("Health check server disabled") return True # Return True so app continues try: # Try to start server logging.info(f"Starting health check server on port {self.port}") # Create server with retry logic max_retries = 3 for attempt in range(max_retries): try: self.server = HTTPServer(('0.0.0.0', self.port), SimpleHealthHandler) break except OSError as e: if e.errno == 98 and attempt < max_retries - 1: # Address in use logging.warning(f"Port {self.port} in use, retrying in 2 seconds...") time.sleep(2) continue else: raise e if not self.server: logging.error("Failed to create health check server") return False # Start server thread self.thread = threading.Thread(target=self._run_server, daemon=True) self.thread.start() # Wait a bit and verify server is running time.sleep(1) if self._test_server(): self.running = True logging.info(f"Health check server running at http://0.0.0.0:{self.port}/health") return True else: logging.error("Health check server failed to respond") return False except Exception as e: logging.error(f"Failed to start health check server: {e}") return False def _run_server(self): """Run the server""" try: logging.info("Health check server thread started") if self.server: self.server.serve_forever() except Exception as e: logging.error(f"Health check server error: {e}") self.running = False def _test_server(self): """Test if server is responding""" try: import urllib.request with urllib.request.urlopen(f'http://localhost:{self.port}/health', timeout=5) as response: return response.status == 200 except Exception as e: logging.warning(f"Health check test failed: {e}") return False def stop(self): """Stop the server""" self.running = False if self.server: try: self.server.shutdown() self.server.server_close() logging.info("Health check server stopped") except Exception as e: logging.error(f"Error stopping health check server: {e}") def is_running(self): """Check if server is running""" return self.running and self.thread and self.thread.is_alive() # Simple TCP server for basic health check class SimpleTCPHealthServer: def __init__(self, port=None): self.port = port or int(os.getenv('HEALTH_CHECK_PORT', HEALTH_CHECK_PORT)) self.server_socket = None self.thread = None self.running = False def start(self): """Start simple TCP server""" try: logging.info(f"Starting TCP health server on port {self.port}") self.server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) self.server_socket.bind(('0.0.0.0', self.port)) self.server_socket.listen(5) self.thread = threading.Thread(target=self._run_tcp_server, daemon=True) self.thread.start() self.running = True logging.info(f"TCP health server running on port {self.port}") return True except Exception as e: logging.error(f"Failed to start TCP health server: {e}") return False def _run_tcp_server(self): """Run TCP server""" while self.running and self.server_socket: try: client_socket, addr = self.server_socket.accept() # Just accept and close connection - proves port is open client_socket.close() except Exception as e: if self.running: logging.error(f"TCP server error: {e}") break def stop(self): """Stop TCP server""" self.running = False if self.server_socket: try: self.server_socket.close() logging.info("TCP health server stopped") except Exception as e: logging.error(f"Error stopping TCP server: {e}") def is_running(self): """Check if running""" return self.running # Factory function to create appropriate server def create_health_server(): """Create health server based on configuration""" # Try HTTP server first, fallback to TCP http_server = HealthCheckServer() if http_server.start(): return http_server logging.warning("HTTP health server failed, trying TCP fallback") tcp_server = SimpleTCPHealthServer() if tcp_server.start(): return tcp_server logging.error("All health servers failed") return None