#!/usr/bin/env python3 """ Nomad MCP Server for Claude Desktop App Provides MCP tools for managing HashiCorp Nomad jobs """ import asyncio import json import logging import os import sys from typing import Any, Dict, List, Optional from mcp.server import NotificationOptions, Server from mcp.server.models import InitializationOptions import mcp.server.stdio import mcp.types as types from app.services.nomad_client import NomadService from app.schemas.claude_api import ClaudeJobSpecification # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger("nomad-mcp") # Create the server instance server = Server("nomad-mcp") @server.list_tools() async def handle_list_tools() -> List[types.Tool]: """List available tools for Nomad management.""" return [ types.Tool( name="list_nomad_jobs", description="List all Nomad jobs in a namespace", inputSchema={ "type": "object", "properties": { "namespace": { "type": "string", "description": "Nomad namespace", "default": "development" } } } ), types.Tool( name="get_job_status", description="Get the status of a specific Nomad job", inputSchema={ "type": "object", "properties": { "job_id": { "type": "string", "description": "ID of the job to check" }, "namespace": { "type": "string", "description": "Nomad namespace", "default": "development" } }, "required": ["job_id"] } ), types.Tool( name="stop_job", description="Stop a running Nomad job", inputSchema={ "type": "object", "properties": { "job_id": { "type": "string", "description": "ID of the job to stop" }, "namespace": { "type": "string", "description": "Nomad namespace", "default": "development" }, "purge": { "type": "boolean", "description": "Whether to purge the job", "default": False } }, "required": ["job_id"] } ), types.Tool( name="restart_job", description="Restart a Nomad job", inputSchema={ "type": "object", "properties": { "job_id": { "type": "string", "description": "ID of the job to restart" }, "namespace": { "type": "string", "description": "Nomad namespace", "default": "development" } }, "required": ["job_id"] } ), types.Tool( name="create_job", description="Create a new Nomad job", inputSchema={ "type": "object", "properties": { "job_id": { "type": "string", "description": "Unique ID for the job" }, "name": { "type": "string", "description": "Display name for the job" }, "type": { "type": "string", "description": "Job type (service, batch, etc.)", "default": "service" }, "datacenters": { "type": "array", "description": "List of datacenters to run the job in", "items": {"type": "string"}, "default": ["jm"] }, "namespace": { "type": "string", "description": "Nomad namespace", "default": "development" }, "docker_image": { "type": "string", "description": "Docker image to run" }, "count": { "type": "integer", "description": "Number of instances to run", "default": 1 }, "cpu": { "type": "integer", "description": "CPU allocation in MHz", "default": 100 }, "memory": { "type": "integer", "description": "Memory allocation in MB", "default": 128 }, "ports": { "type": "array", "description": "Port mappings", "items": {"type": "object"}, "default": [] }, "env_vars": { "type": "object", "description": "Environment variables for the container", "default": {} } }, "required": ["job_id", "name", "docker_image"] } ), types.Tool( name="get_job_logs", description="Get logs for a Nomad job", inputSchema={ "type": "object", "properties": { "job_id": { "type": "string", "description": "ID of the job to get logs for" }, "namespace": { "type": "string", "description": "Nomad namespace", "default": "development" } }, "required": ["job_id"] } ), types.Tool( name="submit_job_file", description="Submit a Nomad job from HCL or JSON file content", inputSchema={ "type": "object", "properties": { "file_content": { "type": "string", "description": "Content of the Nomad job file (HCL or JSON format)" }, "file_type": { "type": "string", "description": "Type of file content: 'hcl' or 'json'", "enum": ["hcl", "json"], "default": "json" }, "namespace": { "type": "string", "description": "Nomad namespace to submit the job to", "default": "development" } }, "required": ["file_content"] } ), types.Tool( name="get_allocation_status", description="Get detailed status of job allocations", inputSchema={ "type": "object", "properties": { "job_id": { "type": "string", "description": "ID of the job to check allocations for" }, "namespace": { "type": "string", "description": "Nomad namespace", "default": "development" } }, "required": ["job_id"] } ), types.Tool( name="get_job_evaluations", description="Get evaluations for a job to understand placement and failures", inputSchema={ "type": "object", "properties": { "job_id": { "type": "string", "description": "ID of the job to get evaluations for" }, "namespace": { "type": "string", "description": "Nomad namespace", "default": "development" } }, "required": ["job_id"] } ), types.Tool( name="force_evaluate_job", description="Force a new evaluation for a job (retry failed placements)", inputSchema={ "type": "object", "properties": { "job_id": { "type": "string", "description": "ID of the job to force evaluate" }, "namespace": { "type": "string", "description": "Nomad namespace", "default": "development" } }, "required": ["job_id"] } ) ] @server.call_tool() async def handle_call_tool(name: str, arguments: Dict[str, Any]) -> List[types.TextContent]: """Handle tool calls from Claude.""" try: # Create Nomad service instance nomad_service = NomadService() namespace = arguments.get("namespace", "development") nomad_service.namespace = namespace if name == "list_nomad_jobs": jobs = nomad_service.list_jobs() simplified_jobs = [] for job in jobs: simplified_jobs.append({ "id": job.get("ID"), "name": job.get("Name"), "status": job.get("Status"), "type": job.get("Type"), "namespace": namespace }) return [types.TextContent( type="text", text=json.dumps(simplified_jobs, indent=2) )] elif name == "get_job_status": job_id = arguments.get("job_id") if not job_id: return [types.TextContent( type="text", text="Error: job_id is required" )] job = nomad_service.get_job(job_id) allocations = nomad_service.get_allocations(job_id) latest_alloc = None if allocations: sorted_allocations = sorted( allocations, key=lambda a: a.get("CreateTime", 0), reverse=True ) latest_alloc = sorted_allocations[0] result = { "job_id": job_id, "status": job.get("Status", "unknown"), "message": f"Job {job_id} is {job.get('Status', 'unknown')}", "details": { "job": job, "latest_allocation": latest_alloc } } return [types.TextContent( type="text", text=json.dumps(result, indent=2) )] elif name == "stop_job": job_id = arguments.get("job_id") purge = arguments.get("purge", False) if not job_id: return [types.TextContent( type="text", text="Error: job_id is required" )] result = nomad_service.stop_job(job_id, purge=purge) response = { "success": True, "job_id": job_id, "status": "stopped", "message": f"Job {job_id} has been stopped" + (" and purged" if purge else ""), "details": result } return [types.TextContent( type="text", text=json.dumps(response, indent=2) )] elif name == "restart_job": job_id = arguments.get("job_id") if not job_id: return [types.TextContent( type="text", text="Error: job_id is required" )] # Get current job spec job_spec = nomad_service.get_job(job_id) # Stop and restart nomad_service.stop_job(job_id) result = nomad_service.start_job(job_spec) response = { "success": True, "job_id": job_id, "status": "restarted", "message": f"Job {job_id} has been restarted", "details": result } return [types.TextContent( type="text", text=json.dumps(response, indent=2) )] elif name == "create_job": # Validate required arguments required_args = ["job_id", "name", "docker_image"] for arg in required_args: if not arguments.get(arg): return [types.TextContent( type="text", text=f"Error: {arg} is required" )] # Create job specification job_spec = ClaudeJobSpecification(**arguments) # Set namespace if job_spec.namespace: nomad_service.namespace = job_spec.namespace # Convert to Nomad format and start nomad_job_spec = job_spec.to_nomad_job_spec() result = nomad_service.start_job(nomad_job_spec) response = { "success": True, "job_id": job_spec.job_id, "status": "started", "message": f"Job {job_spec.job_id} has been created and started", "details": result } return [types.TextContent( type="text", text=json.dumps(response, indent=2) )] elif name == "get_job_logs": job_id = arguments.get("job_id") if not job_id: return [types.TextContent( type="text", text="Error: job_id is required" )] # Get allocations allocations = nomad_service.get_allocations(job_id) if not allocations: result = { "success": False, "job_id": job_id, "message": f"No allocations found for job {job_id}", "logs": None } else: # Get latest allocation sorted_allocations = sorted( allocations, key=lambda a: a.get("CreateTime", 0), reverse=True ) latest_alloc = sorted_allocations[0] alloc_id = latest_alloc.get("ID") # Get task name task_name = None if "TaskStates" in latest_alloc: task_states = latest_alloc["TaskStates"] if task_states: task_name = next(iter(task_states.keys())) if not task_name: task_name = "app" # Default task name # Get logs stdout_logs = nomad_service.get_allocation_logs(alloc_id, task_name, "stdout") stderr_logs = nomad_service.get_allocation_logs(alloc_id, task_name, "stderr") result = { "success": True, "job_id": job_id, "allocation_id": alloc_id, "task_name": task_name, "message": f"Retrieved logs for job {job_id}", "logs": { "stdout": stdout_logs, "stderr": stderr_logs } } return [types.TextContent( type="text", text=json.dumps(result, indent=2) )] elif name == "submit_job_file": file_content = arguments.get("file_content") file_type = arguments.get("file_type", "json") if not file_content: return [types.TextContent( type="text", text="Error: file_content is required" )] try: # Parse the job specification based on file type if file_type.lower() == "json": import json as json_parser job_spec = json_parser.loads(file_content) elif file_type.lower() == "hcl": return [types.TextContent( type="text", text="Error: HCL parsing not yet implemented. Please provide JSON format." )] else: return [types.TextContent( type="text", text=f"Error: Unsupported file type '{file_type}'. Use 'json' or 'hcl'." )] # Submit the job result = nomad_service.start_job(job_spec) response = { "success": True, "job_id": result.get("job_id"), "status": "submitted", "message": f"Job {result.get('job_id')} has been submitted from {file_type} file", "details": result } return [types.TextContent( type="text", text=json.dumps(response, indent=2) )] except json.JSONDecodeError as e: return [types.TextContent( type="text", text=f"Error: Invalid JSON format - {str(e)}" )] except Exception as e: return [types.TextContent( type="text", text=f"Error submitting job: {str(e)}" )] elif name == "get_allocation_status": job_id = arguments.get("job_id") if not job_id: return [types.TextContent( type="text", text="Error: job_id is required" )] # Get allocations for the job allocations = nomad_service.get_allocations(job_id) # Get detailed status for each allocation detailed_allocations = [] for alloc in allocations: alloc_id = alloc.get("ID") detailed_allocations.append({ "allocation_id": alloc_id, "name": alloc.get("Name"), "client_status": alloc.get("ClientStatus"), "desired_status": alloc.get("DesiredStatus"), "job_id": alloc.get("JobID"), "task_group": alloc.get("TaskGroup"), "node_id": alloc.get("NodeID"), "create_time": alloc.get("CreateTime"), "modify_time": alloc.get("ModifyTime"), "task_states": alloc.get("TaskStates", {}), "failed": alloc.get("Failed", False), "deployment_status": alloc.get("DeploymentStatus", {}) }) result = { "job_id": job_id, "total_allocations": len(allocations), "allocations": detailed_allocations, "message": f"Found {len(allocations)} allocations for job {job_id}" } return [types.TextContent( type="text", text=json.dumps(result, indent=2) )] elif name == "get_job_evaluations": job_id = arguments.get("job_id") if not job_id: return [types.TextContent( type="text", text="Error: job_id is required" )] try: evaluations = nomad_service.get_job_evaluations(job_id) simplified_evals = [] for eval_item in evaluations: simplified_evals.append({ "eval_id": eval_item.get("ID"), "status": eval_item.get("Status"), "type": eval_item.get("Type"), "triggered_by": eval_item.get("TriggeredBy"), "job_id": eval_item.get("JobID"), "create_time": eval_item.get("CreateTime"), "modify_time": eval_item.get("ModifyTime"), "wait_until": eval_item.get("WaitUntil"), "blocked_eval": eval_item.get("BlockedEval"), "failed_tg_allocs": eval_item.get("FailedTGAllocs", {}), "class_eligibility": eval_item.get("ClassEligibility", {}), "quota_limit_reached": eval_item.get("QuotaLimitReached") }) result = { "job_id": job_id, "total_evaluations": len(evaluations), "evaluations": simplified_evals, "message": f"Found {len(evaluations)} evaluations for job {job_id}" } return [types.TextContent( type="text", text=json.dumps(result, indent=2) )] except Exception as e: return [types.TextContent( type="text", text=f"Error getting evaluations: {str(e)}" )] elif name == "force_evaluate_job": job_id = arguments.get("job_id") if not job_id: return [types.TextContent( type="text", text="Error: job_id is required" )] try: # Force evaluation by making a direct API call import requests nomad_addr = f"http://{nomad_service.client.host}:{nomad_service.client.port}" url = f"{nomad_addr}/v1/job/{job_id}/evaluate" headers = {} if hasattr(nomad_service.client, 'token') and nomad_service.client.token: headers["X-Nomad-Token"] = nomad_service.client.token params = {"namespace": nomad_service.namespace} response = requests.post( url=url, headers=headers, params=params, verify=False if os.getenv("NOMAD_SKIP_VERIFY", "false").lower() == "true" else True ) if response.status_code == 200: response_data = response.json() result = { "success": True, "job_id": job_id, "eval_id": response_data.get("EvalID"), "status": "evaluation_forced", "message": f"Forced evaluation for job {job_id}", "details": response_data } return [types.TextContent( type="text", text=json.dumps(result, indent=2) )] else: return [types.TextContent( type="text", text=f"Error: Failed to force evaluation - {response.text}" )] except Exception as e: return [types.TextContent( type="text", text=f"Error forcing evaluation: {str(e)}" )] else: return [types.TextContent( type="text", text=f"Error: Unknown tool '{name}'" )] except Exception as e: logger.error(f"Error in tool '{name}': {str(e)}") return [types.TextContent( type="text", text=f"Error: {str(e)}" )] async def main(): """Main entry point for the MCP server.""" # Run the server using stdio transport async with mcp.server.stdio.stdio_server() as (read_stream, write_stream): await server.run( read_stream, write_stream, InitializationOptions( server_name="nomad-mcp", server_version="1.0.0", capabilities=server.get_capabilities( notification_options=NotificationOptions(), experimental_capabilities={}, ), ), ) if __name__ == "__main__": asyncio.run(main())