256 lines
9.1 KiB
Python
256 lines
9.1 KiB
Python
import json
|
|
import threading
|
|
from http.server import HTTPServer, BaseHTTPRequestHandler
|
|
from datetime import datetime, timezone
|
|
import logging
|
|
import time
|
|
import socket
|
|
import os
|
|
|
|
from config import HEALTH_CHECK_PORT, HEALTH_CHECK_ENABLED, LOCAL_TIMEZONE
|
|
|
|
class SimpleHealthHandler(BaseHTTPRequestHandler):
|
|
def do_GET(self):
|
|
"""Handle GET requests"""
|
|
try:
|
|
if self.path == '/health':
|
|
self.send_health_response()
|
|
elif self.path == '/sensors':
|
|
self.send_sensors_response()
|
|
elif self.path == '/':
|
|
self.send_health_response() # Default to health
|
|
else:
|
|
self.send_response(404)
|
|
self.send_header('Content-type', 'text/plain')
|
|
self.end_headers()
|
|
self.wfile.write(b'Not Found')
|
|
except Exception as e:
|
|
logging.error(f"Error handling request: {e}")
|
|
self.send_response(500)
|
|
self.end_headers()
|
|
|
|
def send_health_response(self):
|
|
"""Send simple health response"""
|
|
try:
|
|
health_data = {
|
|
"status": "healthy",
|
|
"timestamp": datetime.now(LOCAL_TIMEZONE).isoformat(),
|
|
"service": "poe-sensor",
|
|
"version": "1.0.0"
|
|
}
|
|
|
|
# Try to get sensor info if available
|
|
try:
|
|
from sensor_tracker import get_sensor_tracker
|
|
tracker = get_sensor_tracker()
|
|
summary = tracker.get_summary()
|
|
health_data.update({
|
|
"sensors_total": summary.get('total_sensors', 0),
|
|
"sensors_online": summary.get('online_sensors', 0),
|
|
"sensors_health": summary.get('health_percentage', 0.0)
|
|
})
|
|
except:
|
|
health_data["sensors_status"] = "initializing"
|
|
|
|
response = json.dumps(health_data, indent=2)
|
|
self.send_response(200)
|
|
self.send_header('Content-type', 'application/json')
|
|
self.send_header('Content-Length', str(len(response)))
|
|
self.end_headers()
|
|
self.wfile.write(response.encode())
|
|
|
|
except Exception as e:
|
|
logging.error(f"Error in health response: {e}")
|
|
self.send_response(500)
|
|
self.end_headers()
|
|
|
|
def send_sensors_response(self):
|
|
"""Send sensor status response"""
|
|
try:
|
|
from sensor_tracker import get_all_sensor_status
|
|
sensors_data = get_all_sensor_status()
|
|
|
|
response = json.dumps(sensors_data, indent=2)
|
|
self.send_response(200)
|
|
self.send_header('Content-type', 'application/json')
|
|
self.send_header('Content-Length', str(len(response)))
|
|
self.end_headers()
|
|
self.wfile.write(response.encode())
|
|
|
|
except Exception as e:
|
|
logging.error(f"Error getting sensor status: {e}")
|
|
error_data = {
|
|
"error": "Sensor data unavailable",
|
|
"message": str(e),
|
|
"timestamp": datetime.now(LOCAL_TIMEZONE).isoformat()
|
|
}
|
|
response = json.dumps(error_data, indent=2)
|
|
self.send_response(500)
|
|
self.send_header('Content-type', 'application/json')
|
|
self.end_headers()
|
|
self.wfile.write(response.encode())
|
|
|
|
def log_message(self, format, *args):
|
|
"""Suppress default logging"""
|
|
pass
|
|
|
|
class HealthCheckServer:
|
|
def __init__(self, port=None):
|
|
self.port = port or int(os.getenv('HEALTH_CHECK_PORT', HEALTH_CHECK_PORT))
|
|
self.server = None
|
|
self.thread = None
|
|
self.running = False
|
|
|
|
def start(self):
|
|
"""Start the health check server"""
|
|
if not HEALTH_CHECK_ENABLED:
|
|
logging.info("Health check server disabled")
|
|
return True # Return True so app continues
|
|
|
|
try:
|
|
# Try to start server
|
|
logging.info(f"Starting health check server on port {self.port}")
|
|
|
|
# Create server with retry logic
|
|
max_retries = 3
|
|
for attempt in range(max_retries):
|
|
try:
|
|
self.server = HTTPServer(('0.0.0.0', self.port), SimpleHealthHandler)
|
|
break
|
|
except OSError as e:
|
|
if e.errno == 98 and attempt < max_retries - 1: # Address in use
|
|
logging.warning(f"Port {self.port} in use, retrying in 2 seconds...")
|
|
time.sleep(2)
|
|
continue
|
|
else:
|
|
raise e
|
|
|
|
if not self.server:
|
|
logging.error("Failed to create health check server")
|
|
return False
|
|
|
|
# Start server thread
|
|
self.thread = threading.Thread(target=self._run_server, daemon=True)
|
|
self.thread.start()
|
|
|
|
# Wait a bit and verify server is running
|
|
time.sleep(1)
|
|
|
|
if self._test_server():
|
|
self.running = True
|
|
logging.info(f"Health check server running at http://0.0.0.0:{self.port}/health")
|
|
return True
|
|
else:
|
|
logging.error("Health check server failed to respond")
|
|
return False
|
|
|
|
except Exception as e:
|
|
logging.error(f"Failed to start health check server: {e}")
|
|
return False
|
|
|
|
def _run_server(self):
|
|
"""Run the server"""
|
|
try:
|
|
logging.info("Health check server thread started")
|
|
if self.server:
|
|
self.server.serve_forever()
|
|
except Exception as e:
|
|
logging.error(f"Health check server error: {e}")
|
|
self.running = False
|
|
|
|
def _test_server(self):
|
|
"""Test if server is responding"""
|
|
try:
|
|
import urllib.request
|
|
with urllib.request.urlopen(f'http://localhost:{self.port}/health', timeout=5) as response:
|
|
return response.status == 200
|
|
except Exception as e:
|
|
logging.warning(f"Health check test failed: {e}")
|
|
return False
|
|
|
|
def stop(self):
|
|
"""Stop the server"""
|
|
self.running = False
|
|
if self.server:
|
|
try:
|
|
self.server.shutdown()
|
|
self.server.server_close()
|
|
logging.info("Health check server stopped")
|
|
except Exception as e:
|
|
logging.error(f"Error stopping health check server: {e}")
|
|
|
|
def is_running(self):
|
|
"""Check if server is running"""
|
|
return self.running and self.thread and self.thread.is_alive()
|
|
|
|
# Simple TCP server for basic health check
|
|
class SimpleTCPHealthServer:
|
|
def __init__(self, port=None):
|
|
self.port = port or int(os.getenv('HEALTH_CHECK_PORT', HEALTH_CHECK_PORT))
|
|
self.server_socket = None
|
|
self.thread = None
|
|
self.running = False
|
|
|
|
def start(self):
|
|
"""Start simple TCP server"""
|
|
try:
|
|
logging.info(f"Starting TCP health server on port {self.port}")
|
|
|
|
self.server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
self.server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
|
self.server_socket.bind(('0.0.0.0', self.port))
|
|
self.server_socket.listen(5)
|
|
|
|
self.thread = threading.Thread(target=self._run_tcp_server, daemon=True)
|
|
self.thread.start()
|
|
|
|
self.running = True
|
|
logging.info(f"TCP health server running on port {self.port}")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logging.error(f"Failed to start TCP health server: {e}")
|
|
return False
|
|
|
|
def _run_tcp_server(self):
|
|
"""Run TCP server"""
|
|
while self.running and self.server_socket:
|
|
try:
|
|
client_socket, addr = self.server_socket.accept()
|
|
# Just accept and close connection - proves port is open
|
|
client_socket.close()
|
|
except Exception as e:
|
|
if self.running:
|
|
logging.error(f"TCP server error: {e}")
|
|
break
|
|
|
|
def stop(self):
|
|
"""Stop TCP server"""
|
|
self.running = False
|
|
if self.server_socket:
|
|
try:
|
|
self.server_socket.close()
|
|
logging.info("TCP health server stopped")
|
|
except Exception as e:
|
|
logging.error(f"Error stopping TCP server: {e}")
|
|
|
|
def is_running(self):
|
|
"""Check if running"""
|
|
return self.running
|
|
|
|
# Factory function to create appropriate server
|
|
def create_health_server():
|
|
"""Create health server based on configuration"""
|
|
# Try HTTP server first, fallback to TCP
|
|
http_server = HealthCheckServer()
|
|
if http_server.start():
|
|
return http_server
|
|
|
|
logging.warning("HTTP health server failed, trying TCP fallback")
|
|
tcp_server = SimpleTCPHealthServer()
|
|
if tcp_server.start():
|
|
return tcp_server
|
|
|
|
logging.error("All health servers failed")
|
|
return None
|