Files
POE-sensor/health_check.py

256 lines
9.1 KiB
Python

import json
import threading
from http.server import HTTPServer, BaseHTTPRequestHandler
from datetime import datetime, timezone
import logging
import time
import socket
import os
from config import HEALTH_CHECK_PORT, HEALTH_CHECK_ENABLED, LOCAL_TIMEZONE
class SimpleHealthHandler(BaseHTTPRequestHandler):
def do_GET(self):
"""Handle GET requests"""
try:
if self.path == '/health':
self.send_health_response()
elif self.path == '/sensors':
self.send_sensors_response()
elif self.path == '/':
self.send_health_response() # Default to health
else:
self.send_response(404)
self.send_header('Content-type', 'text/plain')
self.end_headers()
self.wfile.write(b'Not Found')
except Exception as e:
logging.error(f"Error handling request: {e}")
self.send_response(500)
self.end_headers()
def send_health_response(self):
"""Send simple health response"""
try:
health_data = {
"status": "healthy",
"timestamp": datetime.now(LOCAL_TIMEZONE).isoformat(),
"service": "poe-sensor",
"version": "1.0.0"
}
# Try to get sensor info if available
try:
from sensor_tracker import get_sensor_tracker
tracker = get_sensor_tracker()
summary = tracker.get_summary()
health_data.update({
"sensors_total": summary.get('total_sensors', 0),
"sensors_online": summary.get('online_sensors', 0),
"sensors_health": summary.get('health_percentage', 0.0)
})
except:
health_data["sensors_status"] = "initializing"
response = json.dumps(health_data, indent=2)
self.send_response(200)
self.send_header('Content-type', 'application/json')
self.send_header('Content-Length', str(len(response)))
self.end_headers()
self.wfile.write(response.encode())
except Exception as e:
logging.error(f"Error in health response: {e}")
self.send_response(500)
self.end_headers()
def send_sensors_response(self):
"""Send sensor status response"""
try:
from sensor_tracker import get_all_sensor_status
sensors_data = get_all_sensor_status()
response = json.dumps(sensors_data, indent=2)
self.send_response(200)
self.send_header('Content-type', 'application/json')
self.send_header('Content-Length', str(len(response)))
self.end_headers()
self.wfile.write(response.encode())
except Exception as e:
logging.error(f"Error getting sensor status: {e}")
error_data = {
"error": "Sensor data unavailable",
"message": str(e),
"timestamp": datetime.now(LOCAL_TIMEZONE).isoformat()
}
response = json.dumps(error_data, indent=2)
self.send_response(500)
self.send_header('Content-type', 'application/json')
self.end_headers()
self.wfile.write(response.encode())
def log_message(self, format, *args):
"""Suppress default logging"""
pass
class HealthCheckServer:
def __init__(self, port=None):
self.port = port or int(os.getenv('HEALTH_CHECK_PORT', HEALTH_CHECK_PORT))
self.server = None
self.thread = None
self.running = False
def start(self):
"""Start the health check server"""
if not HEALTH_CHECK_ENABLED:
logging.info("Health check server disabled")
return True # Return True so app continues
try:
# Try to start server
logging.info(f"Starting health check server on port {self.port}")
# Create server with retry logic
max_retries = 3
for attempt in range(max_retries):
try:
self.server = HTTPServer(('0.0.0.0', self.port), SimpleHealthHandler)
break
except OSError as e:
if e.errno == 98 and attempt < max_retries - 1: # Address in use
logging.warning(f"Port {self.port} in use, retrying in 2 seconds...")
time.sleep(2)
continue
else:
raise e
if not self.server:
logging.error("Failed to create health check server")
return False
# Start server thread
self.thread = threading.Thread(target=self._run_server, daemon=True)
self.thread.start()
# Wait a bit and verify server is running
time.sleep(1)
if self._test_server():
self.running = True
logging.info(f"Health check server running at http://0.0.0.0:{self.port}/health")
return True
else:
logging.error("Health check server failed to respond")
return False
except Exception as e:
logging.error(f"Failed to start health check server: {e}")
return False
def _run_server(self):
"""Run the server"""
try:
logging.info("Health check server thread started")
if self.server:
self.server.serve_forever()
except Exception as e:
logging.error(f"Health check server error: {e}")
self.running = False
def _test_server(self):
"""Test if server is responding"""
try:
import urllib.request
with urllib.request.urlopen(f'http://localhost:{self.port}/health', timeout=5) as response:
return response.status == 200
except Exception as e:
logging.warning(f"Health check test failed: {e}")
return False
def stop(self):
"""Stop the server"""
self.running = False
if self.server:
try:
self.server.shutdown()
self.server.server_close()
logging.info("Health check server stopped")
except Exception as e:
logging.error(f"Error stopping health check server: {e}")
def is_running(self):
"""Check if server is running"""
return self.running and self.thread and self.thread.is_alive()
# Simple TCP server for basic health check
class SimpleTCPHealthServer:
def __init__(self, port=None):
self.port = port or int(os.getenv('HEALTH_CHECK_PORT', HEALTH_CHECK_PORT))
self.server_socket = None
self.thread = None
self.running = False
def start(self):
"""Start simple TCP server"""
try:
logging.info(f"Starting TCP health server on port {self.port}")
self.server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
self.server_socket.bind(('0.0.0.0', self.port))
self.server_socket.listen(5)
self.thread = threading.Thread(target=self._run_tcp_server, daemon=True)
self.thread.start()
self.running = True
logging.info(f"TCP health server running on port {self.port}")
return True
except Exception as e:
logging.error(f"Failed to start TCP health server: {e}")
return False
def _run_tcp_server(self):
"""Run TCP server"""
while self.running and self.server_socket:
try:
client_socket, addr = self.server_socket.accept()
# Just accept and close connection - proves port is open
client_socket.close()
except Exception as e:
if self.running:
logging.error(f"TCP server error: {e}")
break
def stop(self):
"""Stop TCP server"""
self.running = False
if self.server_socket:
try:
self.server_socket.close()
logging.info("TCP health server stopped")
except Exception as e:
logging.error(f"Error stopping TCP server: {e}")
def is_running(self):
"""Check if running"""
return self.running
# Factory function to create appropriate server
def create_health_server():
"""Create health server based on configuration"""
# Try HTTP server first, fallback to TCP
http_server = HealthCheckServer()
if http_server.start():
return http_server
logging.warning("HTTP health server failed, trying TCP fallback")
tcp_server = SimpleTCPHealthServer()
if tcp_server.start():
return tcp_server
logging.error("All health servers failed")
return None