Enhance MCP server with complete job workflow capabilities

- Add submit_job_file tool for HCL/JSON job file submission - Add get_allocation_status tool for detailed allocation monitoring - Add get_job_evaluations tool for placement failure analysis - Add force_evaluate_job tool for retrying failed job placements - Comprehensive testing confirms all capabilities work end-to-end - Support complete workflow: submit → monitor → debug → retry 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
Add standalone MCP server for Claude Desktop integration
2025-05-31 10:54:03 +07:00 · 2025-05-31 10:42:38 +07:00
3 changed files with 728 additions and 1 deletions
--- a/claude_desktop_config.json
+++ b/claude_desktop_config.json
@ -0,0 +1,12 @@
+{
+  "mcpServers": {
+    "nomad-mcp": {
+      "command": "python",
+      "args": ["/Users/nkohl/Documents/Code/nomad_mcp/mcp_server.py"],
+      "env": {
+        "NOMAD_ADDR": "http://localhost:4646",
+        "PYTHONPATH": "/Users/nkohl/Documents/Code/nomad_mcp"
+      }
+    }
+  }
+}
--- a/mcp_server.py
+++ b/mcp_server.py
@ -0,0 +1,714 @@
+#!/usr/bin/env python3
+"""
+Nomad MCP Server for Claude Desktop App
+Provides MCP tools for managing HashiCorp Nomad jobs
+"""
+
+import asyncio
+import json
+import logging
+import os
+import sys
+from typing import Any, Dict, List, Optional
+
+from mcp.server import NotificationOptions, Server
+from mcp.server.models import InitializationOptions
+import mcp.server.stdio
+import mcp.types as types
+
+from app.services.nomad_client import NomadService
+from app.schemas.claude_api import ClaudeJobSpecification
+
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("nomad-mcp")
+
+# Create the server instance
+server = Server("nomad-mcp")
+
+@server.list_tools()
+async def handle_list_tools() -> List[types.Tool]:
+    """List available tools for Nomad management."""
+    return [
+        types.Tool(
+            name="list_nomad_jobs",
+            description="List all Nomad jobs in a namespace",
+            inputSchema={
+                "type": "object",
+                "properties": {
+                    "namespace": {
+                        "type": "string",
+                        "description": "Nomad namespace",
+                        "default": "development"
+                    }
+                }
+            }
+        ),
+        types.Tool(
+            name="get_job_status",
+            description="Get the status of a specific Nomad job",
+            inputSchema={
+                "type": "object",
+                "properties": {
+                    "job_id": {
+                        "type": "string",
+                        "description": "ID of the job to check"
+                    },
+                    "namespace": {
+                        "type": "string",
+                        "description": "Nomad namespace",
+                        "default": "development"
+                    }
+                },
+                "required": ["job_id"]
+            }
+        ),
+        types.Tool(
+            name="stop_job",
+            description="Stop a running Nomad job",
+            inputSchema={
+                "type": "object",
+                "properties": {
+                    "job_id": {
+                        "type": "string",
+                        "description": "ID of the job to stop"
+                    },
+                    "namespace": {
+                        "type": "string",
+                        "description": "Nomad namespace",
+                        "default": "development"
+                    },
+                    "purge": {
+                        "type": "boolean",
+                        "description": "Whether to purge the job",
+                        "default": False
+                    }
+                },
+                "required": ["job_id"]
+            }
+        ),
+        types.Tool(
+            name="restart_job",
+            description="Restart a Nomad job",
+            inputSchema={
+                "type": "object",
+                "properties": {
+                    "job_id": {
+                        "type": "string",
+                        "description": "ID of the job to restart"
+                    },
+                    "namespace": {
+                        "type": "string",
+                        "description": "Nomad namespace",
+                        "default": "development"
+                    }
+                },
+                "required": ["job_id"]
+            }
+        ),
+        types.Tool(
+            name="create_job",
+            description="Create a new Nomad job",
+            inputSchema={
+                "type": "object",
+                "properties": {
+                    "job_id": {
+                        "type": "string",
+                        "description": "Unique ID for the job"
+                    },
+                    "name": {
+                        "type": "string",
+                        "description": "Display name for the job"
+                    },
+                    "type": {
+                        "type": "string",
+                        "description": "Job type (service, batch, etc.)",
+                        "default": "service"
+                    },
+                    "datacenters": {
+                        "type": "array",
+                        "description": "List of datacenters to run the job in",
+                        "items": {"type": "string"},
+                        "default": ["jm"]
+                    },
+                    "namespace": {
+                        "type": "string",
+                        "description": "Nomad namespace",
+                        "default": "development"
+                    },
+                    "docker_image": {
+                        "type": "string",
+                        "description": "Docker image to run"
+                    },
+                    "count": {
+                        "type": "integer",
+                        "description": "Number of instances to run",
+                        "default": 1
+                    },
+                    "cpu": {
+                        "type": "integer",
+                        "description": "CPU allocation in MHz",
+                        "default": 100
+                    },
+                    "memory": {
+                        "type": "integer",
+                        "description": "Memory allocation in MB",
+                        "default": 128
+                    },
+                    "ports": {
+                        "type": "array",
+                        "description": "Port mappings",
+                        "items": {"type": "object"},
+                        "default": []
+                    },
+                    "env_vars": {
+                        "type": "object",
+                        "description": "Environment variables for the container",
+                        "default": {}
+                    }
+                },
+                "required": ["job_id", "name", "docker_image"]
+            }
+        ),
+        types.Tool(
+            name="get_job_logs",
+            description="Get logs for a Nomad job",
+            inputSchema={
+                "type": "object",
+                "properties": {
+                    "job_id": {
+                        "type": "string",
+                        "description": "ID of the job to get logs for"
+                    },
+                    "namespace": {
+                        "type": "string",
+                        "description": "Nomad namespace",
+                        "default": "development"
+                    }
+                },
+                "required": ["job_id"]
+            }
+        ),
+        types.Tool(
+            name="submit_job_file",
+            description="Submit a Nomad job from HCL or JSON file content",
+            inputSchema={
+                "type": "object",
+                "properties": {
+                    "file_content": {
+                        "type": "string",
+                        "description": "Content of the Nomad job file (HCL or JSON format)"
+                    },
+                    "file_type": {
+                        "type": "string",
+                        "description": "Type of file content: 'hcl' or 'json'",
+                        "enum": ["hcl", "json"],
+                        "default": "json"
+                    },
+                    "namespace": {
+                        "type": "string",
+                        "description": "Nomad namespace to submit the job to",
+                        "default": "development"
+                    }
+                },
+                "required": ["file_content"]
+            }
+        ),
+        types.Tool(
+            name="get_allocation_status",
+            description="Get detailed status of job allocations",
+            inputSchema={
+                "type": "object",
+                "properties": {
+                    "job_id": {
+                        "type": "string",
+                        "description": "ID of the job to check allocations for"
+                    },
+                    "namespace": {
+                        "type": "string",
+                        "description": "Nomad namespace",
+                        "default": "development"
+                    }
+                },
+                "required": ["job_id"]
+            }
+        ),
+        types.Tool(
+            name="get_job_evaluations",
+            description="Get evaluations for a job to understand placement and failures",
+            inputSchema={
+                "type": "object",
+                "properties": {
+                    "job_id": {
+                        "type": "string",
+                        "description": "ID of the job to get evaluations for"
+                    },
+                    "namespace": {
+                        "type": "string",
+                        "description": "Nomad namespace",
+                        "default": "development"
+                    }
+                },
+                "required": ["job_id"]
+            }
+        ),
+        types.Tool(
+            name="force_evaluate_job",
+            description="Force a new evaluation for a job (retry failed placements)",
+            inputSchema={
+                "type": "object",
+                "properties": {
+                    "job_id": {
+                        "type": "string",
+                        "description": "ID of the job to force evaluate"
+                    },
+                    "namespace": {
+                        "type": "string",
+                        "description": "Nomad namespace",
+                        "default": "development"
+                    }
+                },
+                "required": ["job_id"]
+            }
+        )
+    ]
+
+@server.call_tool()
+async def handle_call_tool(name: str, arguments: Dict[str, Any]) -> List[types.TextContent]:
+    """Handle tool calls from Claude."""
+    try:
+        # Create Nomad service instance
+        nomad_service = NomadService()
+        namespace = arguments.get("namespace", "development")
+        nomad_service.namespace = namespace
+        
+        if name == "list_nomad_jobs":
+            jobs = nomad_service.list_jobs()
+            simplified_jobs = []
+            for job in jobs:
+                simplified_jobs.append({
+                    "id": job.get("ID"),
+                    "name": job.get("Name"),
+                    "status": job.get("Status"),
+                    "type": job.get("Type"),
+                    "namespace": namespace
+                })
+            
+            return [types.TextContent(
+                type="text",
+                text=json.dumps(simplified_jobs, indent=2)
+            )]
+            
+        elif name == "get_job_status":
+            job_id = arguments.get("job_id")
+            if not job_id:
+                return [types.TextContent(
+                    type="text",
+                    text="Error: job_id is required"
+                )]
+            
+            job = nomad_service.get_job(job_id)
+            allocations = nomad_service.get_allocations(job_id)
+            
+            latest_alloc = None
+            if allocations:
+                sorted_allocations = sorted(
+                    allocations, 
+                    key=lambda a: a.get("CreateTime", 0), 
+                    reverse=True
+                )
+                latest_alloc = sorted_allocations[0]
+            
+            result = {
+                "job_id": job_id,
+                "status": job.get("Status", "unknown"),
+                "message": f"Job {job_id} is {job.get('Status', 'unknown')}",
+                "details": {
+                    "job": job,
+                    "latest_allocation": latest_alloc
+                }
+            }
+            
+            return [types.TextContent(
+                type="text",
+                text=json.dumps(result, indent=2)
+            )]
+            
+        elif name == "stop_job":
+            job_id = arguments.get("job_id")
+            purge = arguments.get("purge", False)
+            
+            if not job_id:
+                return [types.TextContent(
+                    type="text",
+                    text="Error: job_id is required"
+                )]
+            
+            result = nomad_service.stop_job(job_id, purge=purge)
+            
+            response = {
+                "success": True,
+                "job_id": job_id,
+                "status": "stopped",
+                "message": f"Job {job_id} has been stopped" + (" and purged" if purge else ""),
+                "details": result
+            }
+            
+            return [types.TextContent(
+                type="text",
+                text=json.dumps(response, indent=2)
+            )]
+            
+        elif name == "restart_job":
+            job_id = arguments.get("job_id")
+            
+            if not job_id:
+                return [types.TextContent(
+                    type="text",
+                    text="Error: job_id is required"
+                )]
+            
+            # Get current job spec
+            job_spec = nomad_service.get_job(job_id)
+            
+            # Stop and restart
+            nomad_service.stop_job(job_id)
+            result = nomad_service.start_job(job_spec)
+            
+            response = {
+                "success": True,
+                "job_id": job_id,
+                "status": "restarted",
+                "message": f"Job {job_id} has been restarted",
+                "details": result
+            }
+            
+            return [types.TextContent(
+                type="text",
+                text=json.dumps(response, indent=2)
+            )]
+            
+        elif name == "create_job":
+            # Validate required arguments
+            required_args = ["job_id", "name", "docker_image"]
+            for arg in required_args:
+                if not arguments.get(arg):
+                    return [types.TextContent(
+                        type="text",
+                        text=f"Error: {arg} is required"
+                    )]
+            
+            # Create job specification
+            job_spec = ClaudeJobSpecification(**arguments)
+            
+            # Set namespace
+            if job_spec.namespace:
+                nomad_service.namespace = job_spec.namespace
+            
+            # Convert to Nomad format and start
+            nomad_job_spec = job_spec.to_nomad_job_spec()
+            result = nomad_service.start_job(nomad_job_spec)
+            
+            response = {
+                "success": True,
+                "job_id": job_spec.job_id,
+                "status": "started",
+                "message": f"Job {job_spec.job_id} has been created and started",
+                "details": result
+            }
+            
+            return [types.TextContent(
+                type="text",
+                text=json.dumps(response, indent=2)
+            )]
+            
+        elif name == "get_job_logs":
+            job_id = arguments.get("job_id")
+            
+            if not job_id:
+                return [types.TextContent(
+                    type="text",
+                    text="Error: job_id is required"
+                )]
+            
+            # Get allocations
+            allocations = nomad_service.get_allocations(job_id)
+            if not allocations:
+                result = {
+                    "success": False,
+                    "job_id": job_id,
+                    "message": f"No allocations found for job {job_id}",
+                    "logs": None
+                }
+            else:
+                # Get latest allocation
+                sorted_allocations = sorted(
+                    allocations, 
+                    key=lambda a: a.get("CreateTime", 0), 
+                    reverse=True
+                )
+                latest_alloc = sorted_allocations[0]
+                alloc_id = latest_alloc.get("ID")
+                
+                # Get task name
+                task_name = None
+                if "TaskStates" in latest_alloc:
+                    task_states = latest_alloc["TaskStates"]
+                    if task_states:
+                        task_name = next(iter(task_states.keys()))
+                
+                if not task_name:
+                    task_name = "app"  # Default task name
+                    
+                # Get logs
+                stdout_logs = nomad_service.get_allocation_logs(alloc_id, task_name, "stdout")
+                stderr_logs = nomad_service.get_allocation_logs(alloc_id, task_name, "stderr")
+                
+                result = {
+                    "success": True,
+                    "job_id": job_id,
+                    "allocation_id": alloc_id,
+                    "task_name": task_name,
+                    "message": f"Retrieved logs for job {job_id}",
+                    "logs": {
+                        "stdout": stdout_logs,
+                        "stderr": stderr_logs
+                    }
+                }
+            
+            return [types.TextContent(
+                type="text",
+                text=json.dumps(result, indent=2)
+            )]
+            
+        elif name == "submit_job_file":
+            file_content = arguments.get("file_content")
+            file_type = arguments.get("file_type", "json")
+            
+            if not file_content:
+                return [types.TextContent(
+                    type="text",
+                    text="Error: file_content is required"
+                )]
+            
+            try:
+                # Parse the job specification based on file type
+                if file_type.lower() == "json":
+                    import json as json_parser
+                    job_spec = json_parser.loads(file_content)
+                elif file_type.lower() == "hcl":
+                    return [types.TextContent(
+                        type="text",
+                        text="Error: HCL parsing not yet implemented. Please provide JSON format."
+                    )]
+                else:
+                    return [types.TextContent(
+                        type="text",
+                        text=f"Error: Unsupported file type '{file_type}'. Use 'json' or 'hcl'."
+                    )]
+                
+                # Submit the job
+                result = nomad_service.start_job(job_spec)
+                
+                response = {
+                    "success": True,
+                    "job_id": result.get("job_id"),
+                    "status": "submitted",
+                    "message": f"Job {result.get('job_id')} has been submitted from {file_type} file",
+                    "details": result
+                }
+                
+                return [types.TextContent(
+                    type="text",
+                    text=json.dumps(response, indent=2)
+                )]
+                
+            except json.JSONDecodeError as e:
+                return [types.TextContent(
+                    type="text",
+                    text=f"Error: Invalid JSON format - {str(e)}"
+                )]
+            except Exception as e:
+                return [types.TextContent(
+                    type="text",
+                    text=f"Error submitting job: {str(e)}"
+                )]
+                
+        elif name == "get_allocation_status":
+            job_id = arguments.get("job_id")
+            
+            if not job_id:
+                return [types.TextContent(
+                    type="text",
+                    text="Error: job_id is required"
+                )]
+            
+            # Get allocations for the job
+            allocations = nomad_service.get_allocations(job_id)
+            
+            # Get detailed status for each allocation
+            detailed_allocations = []
+            for alloc in allocations:
+                alloc_id = alloc.get("ID")
+                detailed_allocations.append({
+                    "allocation_id": alloc_id,
+                    "name": alloc.get("Name"),
+                    "client_status": alloc.get("ClientStatus"),
+                    "desired_status": alloc.get("DesiredStatus"),
+                    "job_id": alloc.get("JobID"),
+                    "task_group": alloc.get("TaskGroup"),
+                    "node_id": alloc.get("NodeID"),
+                    "create_time": alloc.get("CreateTime"),
+                    "modify_time": alloc.get("ModifyTime"),
+                    "task_states": alloc.get("TaskStates", {}),
+                    "failed": alloc.get("Failed", False),
+                    "deployment_status": alloc.get("DeploymentStatus", {})
+                })
+            
+            result = {
+                "job_id": job_id,
+                "total_allocations": len(allocations),
+                "allocations": detailed_allocations,
+                "message": f"Found {len(allocations)} allocations for job {job_id}"
+            }
+            
+            return [types.TextContent(
+                type="text",
+                text=json.dumps(result, indent=2)
+            )]
+            
+        elif name == "get_job_evaluations":
+            job_id = arguments.get("job_id")
+            
+            if not job_id:
+                return [types.TextContent(
+                    type="text",
+                    text="Error: job_id is required"
+                )]
+            
+            try:
+                evaluations = nomad_service.get_job_evaluations(job_id)
+                
+                simplified_evals = []
+                for eval_item in evaluations:
+                    simplified_evals.append({
+                        "eval_id": eval_item.get("ID"),
+                        "status": eval_item.get("Status"),
+                        "type": eval_item.get("Type"),
+                        "triggered_by": eval_item.get("TriggeredBy"),
+                        "job_id": eval_item.get("JobID"),
+                        "create_time": eval_item.get("CreateTime"),
+                        "modify_time": eval_item.get("ModifyTime"),
+                        "wait_until": eval_item.get("WaitUntil"),
+                        "blocked_eval": eval_item.get("BlockedEval"),
+                        "failed_tg_allocs": eval_item.get("FailedTGAllocs", {}),
+                        "class_eligibility": eval_item.get("ClassEligibility", {}),
+                        "quota_limit_reached": eval_item.get("QuotaLimitReached")
+                    })
+                
+                result = {
+                    "job_id": job_id,
+                    "total_evaluations": len(evaluations),
+                    "evaluations": simplified_evals,
+                    "message": f"Found {len(evaluations)} evaluations for job {job_id}"
+                }
+                
+                return [types.TextContent(
+                    type="text",
+                    text=json.dumps(result, indent=2)
+                )]
+                
+            except Exception as e:
+                return [types.TextContent(
+                    type="text",
+                    text=f"Error getting evaluations: {str(e)}"
+                )]
+                
+        elif name == "force_evaluate_job":
+            job_id = arguments.get("job_id")
+            
+            if not job_id:
+                return [types.TextContent(
+                    type="text",
+                    text="Error: job_id is required"
+                )]
+            
+            try:
+                # Force evaluation by making a direct API call
+                import requests
+                nomad_addr = f"http://{nomad_service.client.host}:{nomad_service.client.port}"
+                url = f"{nomad_addr}/v1/job/{job_id}/evaluate"
+                
+                headers = {}
+                if hasattr(nomad_service.client, 'token') and nomad_service.client.token:
+                    headers["X-Nomad-Token"] = nomad_service.client.token
+                
+                params = {"namespace": nomad_service.namespace}
+                
+                response = requests.post(
+                    url=url,
+                    headers=headers,
+                    params=params,
+                    verify=False if os.getenv("NOMAD_SKIP_VERIFY", "false").lower() == "true" else True
+                )
+                
+                if response.status_code == 200:
+                    response_data = response.json()
+                    
+                    result = {
+                        "success": True,
+                        "job_id": job_id,
+                        "eval_id": response_data.get("EvalID"),
+                        "status": "evaluation_forced",
+                        "message": f"Forced evaluation for job {job_id}",
+                        "details": response_data
+                    }
+                    
+                    return [types.TextContent(
+                        type="text",
+                        text=json.dumps(result, indent=2)
+                    )]
+                else:
+                    return [types.TextContent(
+                        type="text",
+                        text=f"Error: Failed to force evaluation - {response.text}"
+                    )]
+                    
+            except Exception as e:
+                return [types.TextContent(
+                    type="text",
+                    text=f"Error forcing evaluation: {str(e)}"
+                )]
+            
+        else:
+            return [types.TextContent(
+                type="text",
+                text=f"Error: Unknown tool '{name}'"
+            )]
+            
+    except Exception as e:
+        logger.error(f"Error in tool '{name}': {str(e)}")
+        return [types.TextContent(
+            type="text",
+            text=f"Error: {str(e)}"
+        )]
+
+async def main():
+    """Main entry point for the MCP server."""
+    # Run the server using stdio transport
+    async with mcp.server.stdio.stdio_server() as (read_stream, write_stream):
+        await server.run(
+            read_stream,
+            write_stream,
+            InitializationOptions(
+                server_name="nomad-mcp",
+                server_version="1.0.0",
+                capabilities=server.get_capabilities(
+                    notification_options=NotificationOptions(),
+                    experimental_capabilities={},
+                ),
+            ),
+        )
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/requirements.txt
+++ b/requirements.txt
@ -7,4 +7,5 @@ httpx
 python-multipart
 pyyaml
 requests
-fastapi_mcp
+fastapi_mcp
+mcp