Add comprehensive log analysis features to improve troubleshooting workflows: - Time-based filtering (8pm-6am shifts) with HH:MM format support - Multi-level log filtering (ERROR, WARNING, EMERGENCY, etc.) - Full-text search across log content with case-insensitive matching - Proper line break formatting for readable output - Line count limiting for large log files New REST API endpoints: - /api/logs/errors/{job_id} - Get only error/warning logs - /api/logs/search/{job_id} - Search logs for specific terms - Enhanced /api/logs/job/{job_id} with filtering parameters New MCP tools: - get_error_logs - Streamlined error analysis - search_job_logs - Pattern-based log searching - Enhanced get_job_logs with all filtering options 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
925 lines
34 KiB
Python
925 lines
34 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Nomad MCP Server for Claude Desktop App
|
|
Provides MCP tools for managing HashiCorp Nomad jobs
|
|
"""
|
|
|
|
import asyncio
|
|
import json
|
|
import logging
|
|
import os
|
|
import sys
|
|
import requests
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
from mcp.server import NotificationOptions, Server
|
|
from mcp.server.models import InitializationOptions
|
|
import mcp.server.stdio
|
|
import mcp.types as types
|
|
|
|
from app.services.nomad_client import NomadService
|
|
from app.schemas.claude_api import ClaudeJobSpecification
|
|
|
|
# Configure logging
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger("nomad-mcp")
|
|
|
|
# Create the server instance
|
|
server = Server("nomad-mcp")
|
|
|
|
@server.list_tools()
|
|
async def handle_list_tools() -> List[types.Tool]:
|
|
"""List available tools for Nomad management."""
|
|
return [
|
|
types.Tool(
|
|
name="list_nomad_jobs",
|
|
description="List all Nomad jobs in a namespace",
|
|
inputSchema={
|
|
"type": "object",
|
|
"properties": {
|
|
"namespace": {
|
|
"type": "string",
|
|
"description": "Nomad namespace",
|
|
"default": "development"
|
|
}
|
|
}
|
|
}
|
|
),
|
|
types.Tool(
|
|
name="get_job_status",
|
|
description="Get the status of a specific Nomad job",
|
|
inputSchema={
|
|
"type": "object",
|
|
"properties": {
|
|
"job_id": {
|
|
"type": "string",
|
|
"description": "ID of the job to check"
|
|
},
|
|
"namespace": {
|
|
"type": "string",
|
|
"description": "Nomad namespace",
|
|
"default": "development"
|
|
}
|
|
},
|
|
"required": ["job_id"]
|
|
}
|
|
),
|
|
types.Tool(
|
|
name="stop_job",
|
|
description="Stop a running Nomad job",
|
|
inputSchema={
|
|
"type": "object",
|
|
"properties": {
|
|
"job_id": {
|
|
"type": "string",
|
|
"description": "ID of the job to stop"
|
|
},
|
|
"namespace": {
|
|
"type": "string",
|
|
"description": "Nomad namespace",
|
|
"default": "development"
|
|
},
|
|
"purge": {
|
|
"type": "boolean",
|
|
"description": "Whether to purge the job",
|
|
"default": False
|
|
}
|
|
},
|
|
"required": ["job_id"]
|
|
}
|
|
),
|
|
types.Tool(
|
|
name="restart_job",
|
|
description="Restart a Nomad job",
|
|
inputSchema={
|
|
"type": "object",
|
|
"properties": {
|
|
"job_id": {
|
|
"type": "string",
|
|
"description": "ID of the job to restart"
|
|
},
|
|
"namespace": {
|
|
"type": "string",
|
|
"description": "Nomad namespace",
|
|
"default": "development"
|
|
}
|
|
},
|
|
"required": ["job_id"]
|
|
}
|
|
),
|
|
types.Tool(
|
|
name="create_job",
|
|
description="Create a new Nomad job",
|
|
inputSchema={
|
|
"type": "object",
|
|
"properties": {
|
|
"job_id": {
|
|
"type": "string",
|
|
"description": "Unique ID for the job"
|
|
},
|
|
"name": {
|
|
"type": "string",
|
|
"description": "Display name for the job"
|
|
},
|
|
"type": {
|
|
"type": "string",
|
|
"description": "Job type (service, batch, etc.)",
|
|
"default": "service"
|
|
},
|
|
"datacenters": {
|
|
"type": "array",
|
|
"description": "List of datacenters to run the job in",
|
|
"items": {"type": "string"},
|
|
"default": ["jm"]
|
|
},
|
|
"namespace": {
|
|
"type": "string",
|
|
"description": "Nomad namespace",
|
|
"default": "development"
|
|
},
|
|
"docker_image": {
|
|
"type": "string",
|
|
"description": "Docker image to run"
|
|
},
|
|
"count": {
|
|
"type": "integer",
|
|
"description": "Number of instances to run",
|
|
"default": 1
|
|
},
|
|
"cpu": {
|
|
"type": "integer",
|
|
"description": "CPU allocation in MHz",
|
|
"default": 100
|
|
},
|
|
"memory": {
|
|
"type": "integer",
|
|
"description": "Memory allocation in MB",
|
|
"default": 128
|
|
},
|
|
"ports": {
|
|
"type": "array",
|
|
"description": "Port mappings",
|
|
"items": {"type": "object"},
|
|
"default": []
|
|
},
|
|
"env_vars": {
|
|
"type": "object",
|
|
"description": "Environment variables for the container",
|
|
"default": {}
|
|
}
|
|
},
|
|
"required": ["job_id", "name", "docker_image"]
|
|
}
|
|
),
|
|
types.Tool(
|
|
name="get_job_logs",
|
|
description="Get logs for a Nomad job with advanced filtering options",
|
|
inputSchema={
|
|
"type": "object",
|
|
"properties": {
|
|
"job_id": {
|
|
"type": "string",
|
|
"description": "ID of the job to get logs for"
|
|
},
|
|
"namespace": {
|
|
"type": "string",
|
|
"description": "Nomad namespace",
|
|
"default": "development"
|
|
},
|
|
"log_type": {
|
|
"type": "string",
|
|
"description": "Type of logs: stdout or stderr",
|
|
"enum": ["stdout", "stderr"],
|
|
"default": "stderr"
|
|
},
|
|
"start_time": {
|
|
"type": "string",
|
|
"description": "Start time filter (HH:MM format, e.g., '20:00' for 8 PM)"
|
|
},
|
|
"end_time": {
|
|
"type": "string",
|
|
"description": "End time filter (HH:MM format, e.g., '06:00' for 6 AM)"
|
|
},
|
|
"log_level": {
|
|
"type": "string",
|
|
"description": "Filter by log level: ERROR, WARNING, INFO, DEBUG, EMERGENCY, CRITICAL"
|
|
},
|
|
"search": {
|
|
"type": "string",
|
|
"description": "Search term to filter logs"
|
|
},
|
|
"lines": {
|
|
"type": "integer",
|
|
"description": "Number of recent log lines to return"
|
|
},
|
|
"limit": {
|
|
"type": "integer",
|
|
"description": "Number of allocations to check",
|
|
"default": 1
|
|
}
|
|
},
|
|
"required": ["job_id"]
|
|
}
|
|
),
|
|
types.Tool(
|
|
name="get_error_logs",
|
|
description="Get only error and warning logs for a Nomad job, useful for troubleshooting",
|
|
inputSchema={
|
|
"type": "object",
|
|
"properties": {
|
|
"job_id": {
|
|
"type": "string",
|
|
"description": "ID of the job to get error logs for"
|
|
},
|
|
"namespace": {
|
|
"type": "string",
|
|
"description": "Nomad namespace",
|
|
"default": "development"
|
|
},
|
|
"start_time": {
|
|
"type": "string",
|
|
"description": "Start time filter (HH:MM format, e.g., '20:00' for 8 PM)"
|
|
},
|
|
"end_time": {
|
|
"type": "string",
|
|
"description": "End time filter (HH:MM format, e.g., '06:00' for 6 AM)"
|
|
}
|
|
},
|
|
"required": ["job_id"]
|
|
}
|
|
),
|
|
types.Tool(
|
|
name="search_job_logs",
|
|
description="Search Nomad job logs for specific terms or patterns",
|
|
inputSchema={
|
|
"type": "object",
|
|
"properties": {
|
|
"job_id": {
|
|
"type": "string",
|
|
"description": "ID of the job to search logs for"
|
|
},
|
|
"search_term": {
|
|
"type": "string",
|
|
"description": "Term or pattern to search for in logs"
|
|
},
|
|
"namespace": {
|
|
"type": "string",
|
|
"description": "Nomad namespace",
|
|
"default": "development"
|
|
},
|
|
"log_type": {
|
|
"type": "string",
|
|
"description": "Type of logs: stdout or stderr",
|
|
"enum": ["stdout", "stderr"],
|
|
"default": "stderr"
|
|
},
|
|
"lines": {
|
|
"type": "integer",
|
|
"description": "Number of matching lines to return",
|
|
"default": 50
|
|
}
|
|
},
|
|
"required": ["job_id", "search_term"]
|
|
}
|
|
),
|
|
types.Tool(
|
|
name="submit_job_file",
|
|
description="Submit a Nomad job from HCL or JSON file content",
|
|
inputSchema={
|
|
"type": "object",
|
|
"properties": {
|
|
"file_content": {
|
|
"type": "string",
|
|
"description": "Content of the Nomad job file (HCL or JSON format)"
|
|
},
|
|
"file_type": {
|
|
"type": "string",
|
|
"description": "Type of file content: 'hcl' or 'json'",
|
|
"enum": ["hcl", "json"],
|
|
"default": "json"
|
|
},
|
|
"namespace": {
|
|
"type": "string",
|
|
"description": "Nomad namespace to submit the job to",
|
|
"default": "development"
|
|
}
|
|
},
|
|
"required": ["file_content"]
|
|
}
|
|
),
|
|
types.Tool(
|
|
name="get_allocation_status",
|
|
description="Get detailed status of job allocations",
|
|
inputSchema={
|
|
"type": "object",
|
|
"properties": {
|
|
"job_id": {
|
|
"type": "string",
|
|
"description": "ID of the job to check allocations for"
|
|
},
|
|
"namespace": {
|
|
"type": "string",
|
|
"description": "Nomad namespace",
|
|
"default": "development"
|
|
}
|
|
},
|
|
"required": ["job_id"]
|
|
}
|
|
),
|
|
types.Tool(
|
|
name="get_job_evaluations",
|
|
description="Get evaluations for a job to understand placement and failures",
|
|
inputSchema={
|
|
"type": "object",
|
|
"properties": {
|
|
"job_id": {
|
|
"type": "string",
|
|
"description": "ID of the job to get evaluations for"
|
|
},
|
|
"namespace": {
|
|
"type": "string",
|
|
"description": "Nomad namespace",
|
|
"default": "development"
|
|
}
|
|
},
|
|
"required": ["job_id"]
|
|
}
|
|
),
|
|
types.Tool(
|
|
name="force_evaluate_job",
|
|
description="Force a new evaluation for a job (retry failed placements)",
|
|
inputSchema={
|
|
"type": "object",
|
|
"properties": {
|
|
"job_id": {
|
|
"type": "string",
|
|
"description": "ID of the job to force evaluate"
|
|
},
|
|
"namespace": {
|
|
"type": "string",
|
|
"description": "Nomad namespace",
|
|
"default": "development"
|
|
}
|
|
},
|
|
"required": ["job_id"]
|
|
}
|
|
)
|
|
]
|
|
|
|
@server.call_tool()
|
|
async def handle_call_tool(name: str, arguments: Dict[str, Any]) -> List[types.TextContent]:
|
|
"""Handle tool calls from Claude."""
|
|
try:
|
|
# Create Nomad service instance
|
|
nomad_service = NomadService()
|
|
namespace = arguments.get("namespace", "development")
|
|
nomad_service.namespace = namespace
|
|
|
|
if name == "list_nomad_jobs":
|
|
jobs = nomad_service.list_jobs()
|
|
simplified_jobs = []
|
|
for job in jobs:
|
|
simplified_jobs.append({
|
|
"id": job.get("ID"),
|
|
"name": job.get("Name"),
|
|
"status": job.get("Status"),
|
|
"type": job.get("Type"),
|
|
"namespace": namespace
|
|
})
|
|
|
|
return [types.TextContent(
|
|
type="text",
|
|
text=json.dumps(simplified_jobs, indent=2)
|
|
)]
|
|
|
|
elif name == "get_job_status":
|
|
job_id = arguments.get("job_id")
|
|
if not job_id:
|
|
return [types.TextContent(
|
|
type="text",
|
|
text="Error: job_id is required"
|
|
)]
|
|
|
|
job = nomad_service.get_job(job_id)
|
|
allocations = nomad_service.get_allocations(job_id)
|
|
|
|
latest_alloc = None
|
|
if allocations:
|
|
sorted_allocations = sorted(
|
|
allocations,
|
|
key=lambda a: a.get("CreateTime", 0),
|
|
reverse=True
|
|
)
|
|
latest_alloc = sorted_allocations[0]
|
|
|
|
result = {
|
|
"job_id": job_id,
|
|
"status": job.get("Status", "unknown"),
|
|
"message": f"Job {job_id} is {job.get('Status', 'unknown')}",
|
|
"details": {
|
|
"job": job,
|
|
"latest_allocation": latest_alloc
|
|
}
|
|
}
|
|
|
|
return [types.TextContent(
|
|
type="text",
|
|
text=json.dumps(result, indent=2)
|
|
)]
|
|
|
|
elif name == "stop_job":
|
|
job_id = arguments.get("job_id")
|
|
purge = arguments.get("purge", False)
|
|
|
|
if not job_id:
|
|
return [types.TextContent(
|
|
type="text",
|
|
text="Error: job_id is required"
|
|
)]
|
|
|
|
result = nomad_service.stop_job(job_id, purge=purge)
|
|
|
|
response = {
|
|
"success": True,
|
|
"job_id": job_id,
|
|
"status": "stopped",
|
|
"message": f"Job {job_id} has been stopped" + (" and purged" if purge else ""),
|
|
"details": result
|
|
}
|
|
|
|
return [types.TextContent(
|
|
type="text",
|
|
text=json.dumps(response, indent=2)
|
|
)]
|
|
|
|
elif name == "restart_job":
|
|
job_id = arguments.get("job_id")
|
|
|
|
if not job_id:
|
|
return [types.TextContent(
|
|
type="text",
|
|
text="Error: job_id is required"
|
|
)]
|
|
|
|
# Get current job spec
|
|
job_spec = nomad_service.get_job(job_id)
|
|
|
|
# Stop and restart
|
|
nomad_service.stop_job(job_id)
|
|
result = nomad_service.start_job(job_spec)
|
|
|
|
response = {
|
|
"success": True,
|
|
"job_id": job_id,
|
|
"status": "restarted",
|
|
"message": f"Job {job_id} has been restarted",
|
|
"details": result
|
|
}
|
|
|
|
return [types.TextContent(
|
|
type="text",
|
|
text=json.dumps(response, indent=2)
|
|
)]
|
|
|
|
elif name == "create_job":
|
|
# Validate required arguments
|
|
required_args = ["job_id", "name", "docker_image"]
|
|
for arg in required_args:
|
|
if not arguments.get(arg):
|
|
return [types.TextContent(
|
|
type="text",
|
|
text=f"Error: {arg} is required"
|
|
)]
|
|
|
|
# Create job specification
|
|
job_spec = ClaudeJobSpecification(**arguments)
|
|
|
|
# Set namespace
|
|
if job_spec.namespace:
|
|
nomad_service.namespace = job_spec.namespace
|
|
|
|
# Convert to Nomad format and start
|
|
nomad_job_spec = job_spec.to_nomad_job_spec()
|
|
result = nomad_service.start_job(nomad_job_spec)
|
|
|
|
response = {
|
|
"success": True,
|
|
"job_id": job_spec.job_id,
|
|
"status": "started",
|
|
"message": f"Job {job_spec.job_id} has been created and started",
|
|
"details": result
|
|
}
|
|
|
|
return [types.TextContent(
|
|
type="text",
|
|
text=json.dumps(response, indent=2)
|
|
)]
|
|
|
|
elif name == "get_job_logs":
|
|
job_id = arguments.get("job_id")
|
|
|
|
if not job_id:
|
|
return [types.TextContent(
|
|
type="text",
|
|
text="Error: job_id is required"
|
|
)]
|
|
|
|
# Use the enhanced REST API endpoint
|
|
import requests
|
|
base_url = os.getenv("BASE_URL", "http://localhost:8000")
|
|
|
|
# Build query parameters
|
|
params = {
|
|
"namespace": arguments.get("namespace", namespace),
|
|
"log_type": arguments.get("log_type", "stderr"),
|
|
"limit": arguments.get("limit", 1),
|
|
"plain_text": True,
|
|
"formatted": True
|
|
}
|
|
|
|
# Add optional filters
|
|
if arguments.get("start_time"):
|
|
params["start_time"] = arguments["start_time"]
|
|
if arguments.get("end_time"):
|
|
params["end_time"] = arguments["end_time"]
|
|
if arguments.get("log_level"):
|
|
params["log_level"] = arguments["log_level"]
|
|
if arguments.get("search"):
|
|
params["search"] = arguments["search"]
|
|
if arguments.get("lines"):
|
|
params["lines"] = arguments["lines"]
|
|
|
|
try:
|
|
response = requests.get(
|
|
f"{base_url}/api/logs/job/{job_id}",
|
|
params=params,
|
|
timeout=30
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
logs_text = response.text
|
|
|
|
result = {
|
|
"success": True,
|
|
"job_id": job_id,
|
|
"namespace": params["namespace"],
|
|
"message": f"Retrieved logs for job {job_id}",
|
|
"logs": logs_text,
|
|
"filters_applied": {k: v for k, v in params.items() if k not in ["namespace", "plain_text", "formatted"]}
|
|
}
|
|
else:
|
|
result = {
|
|
"success": False,
|
|
"job_id": job_id,
|
|
"message": f"Failed to get logs: {response.status_code} - {response.text}",
|
|
"logs": None
|
|
}
|
|
|
|
except Exception as e:
|
|
result = {
|
|
"success": False,
|
|
"job_id": job_id,
|
|
"message": f"Error getting logs: {str(e)}",
|
|
"logs": None
|
|
}
|
|
|
|
return [types.TextContent(
|
|
type="text",
|
|
text=json.dumps(result, indent=2) if not result.get("success") else result["logs"]
|
|
)]
|
|
|
|
elif name == "get_error_logs":
|
|
job_id = arguments.get("job_id")
|
|
|
|
if not job_id:
|
|
return [types.TextContent(
|
|
type="text",
|
|
text="Error: job_id is required"
|
|
)]
|
|
|
|
# Use the error logs endpoint
|
|
import requests
|
|
base_url = os.getenv("BASE_URL", "http://localhost:8000")
|
|
|
|
params = {
|
|
"namespace": arguments.get("namespace", namespace),
|
|
"plain_text": True
|
|
}
|
|
|
|
if arguments.get("start_time"):
|
|
params["start_time"] = arguments["start_time"]
|
|
if arguments.get("end_time"):
|
|
params["end_time"] = arguments["end_time"]
|
|
|
|
try:
|
|
response = requests.get(
|
|
f"{base_url}/api/logs/errors/{job_id}",
|
|
params=params,
|
|
timeout=30
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
logs_text = response.text
|
|
|
|
result = {
|
|
"success": True,
|
|
"job_id": job_id,
|
|
"message": f"Retrieved error logs for job {job_id}",
|
|
"logs": logs_text
|
|
}
|
|
|
|
return [types.TextContent(
|
|
type="text",
|
|
text=logs_text
|
|
)]
|
|
else:
|
|
return [types.TextContent(
|
|
type="text",
|
|
text=f"Error: Failed to get error logs: {response.status_code} - {response.text}"
|
|
)]
|
|
|
|
except Exception as e:
|
|
return [types.TextContent(
|
|
type="text",
|
|
text=f"Error getting error logs: {str(e)}"
|
|
)]
|
|
|
|
elif name == "search_job_logs":
|
|
job_id = arguments.get("job_id")
|
|
search_term = arguments.get("search_term")
|
|
|
|
if not job_id or not search_term:
|
|
return [types.TextContent(
|
|
type="text",
|
|
text="Error: job_id and search_term are required"
|
|
)]
|
|
|
|
# Use the search logs endpoint
|
|
import requests
|
|
base_url = os.getenv("BASE_URL", "http://localhost:8000")
|
|
|
|
params = {
|
|
"q": search_term,
|
|
"namespace": arguments.get("namespace", namespace),
|
|
"log_type": arguments.get("log_type", "stderr"),
|
|
"lines": arguments.get("lines", 50),
|
|
"plain_text": True
|
|
}
|
|
|
|
try:
|
|
response = requests.get(
|
|
f"{base_url}/api/logs/search/{job_id}",
|
|
params=params,
|
|
timeout=30
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
logs_text = response.text
|
|
|
|
return [types.TextContent(
|
|
type="text",
|
|
text=logs_text
|
|
)]
|
|
else:
|
|
return [types.TextContent(
|
|
type="text",
|
|
text=f"Error: Failed to search logs: {response.status_code} - {response.text}"
|
|
)]
|
|
|
|
except Exception as e:
|
|
return [types.TextContent(
|
|
type="text",
|
|
text=f"Error searching logs: {str(e)}"
|
|
)]
|
|
|
|
elif name == "submit_job_file":
|
|
file_content = arguments.get("file_content")
|
|
file_type = arguments.get("file_type", "json")
|
|
|
|
if not file_content:
|
|
return [types.TextContent(
|
|
type="text",
|
|
text="Error: file_content is required"
|
|
)]
|
|
|
|
try:
|
|
# Parse the job specification based on file type
|
|
if file_type.lower() == "json":
|
|
import json as json_parser
|
|
job_spec = json_parser.loads(file_content)
|
|
elif file_type.lower() == "hcl":
|
|
return [types.TextContent(
|
|
type="text",
|
|
text="Error: HCL parsing not yet implemented. Please provide JSON format."
|
|
)]
|
|
else:
|
|
return [types.TextContent(
|
|
type="text",
|
|
text=f"Error: Unsupported file type '{file_type}'. Use 'json' or 'hcl'."
|
|
)]
|
|
|
|
# Submit the job
|
|
result = nomad_service.start_job(job_spec)
|
|
|
|
response = {
|
|
"success": True,
|
|
"job_id": result.get("job_id"),
|
|
"status": "submitted",
|
|
"message": f"Job {result.get('job_id')} has been submitted from {file_type} file",
|
|
"details": result
|
|
}
|
|
|
|
return [types.TextContent(
|
|
type="text",
|
|
text=json.dumps(response, indent=2)
|
|
)]
|
|
|
|
except json.JSONDecodeError as e:
|
|
return [types.TextContent(
|
|
type="text",
|
|
text=f"Error: Invalid JSON format - {str(e)}"
|
|
)]
|
|
except Exception as e:
|
|
return [types.TextContent(
|
|
type="text",
|
|
text=f"Error submitting job: {str(e)}"
|
|
)]
|
|
|
|
elif name == "get_allocation_status":
|
|
job_id = arguments.get("job_id")
|
|
|
|
if not job_id:
|
|
return [types.TextContent(
|
|
type="text",
|
|
text="Error: job_id is required"
|
|
)]
|
|
|
|
# Get allocations for the job
|
|
allocations = nomad_service.get_allocations(job_id)
|
|
|
|
# Get detailed status for each allocation
|
|
detailed_allocations = []
|
|
for alloc in allocations:
|
|
alloc_id = alloc.get("ID")
|
|
detailed_allocations.append({
|
|
"allocation_id": alloc_id,
|
|
"name": alloc.get("Name"),
|
|
"client_status": alloc.get("ClientStatus"),
|
|
"desired_status": alloc.get("DesiredStatus"),
|
|
"job_id": alloc.get("JobID"),
|
|
"task_group": alloc.get("TaskGroup"),
|
|
"node_id": alloc.get("NodeID"),
|
|
"create_time": alloc.get("CreateTime"),
|
|
"modify_time": alloc.get("ModifyTime"),
|
|
"task_states": alloc.get("TaskStates", {}),
|
|
"failed": alloc.get("Failed", False),
|
|
"deployment_status": alloc.get("DeploymentStatus", {})
|
|
})
|
|
|
|
result = {
|
|
"job_id": job_id,
|
|
"total_allocations": len(allocations),
|
|
"allocations": detailed_allocations,
|
|
"message": f"Found {len(allocations)} allocations for job {job_id}"
|
|
}
|
|
|
|
return [types.TextContent(
|
|
type="text",
|
|
text=json.dumps(result, indent=2)
|
|
)]
|
|
|
|
elif name == "get_job_evaluations":
|
|
job_id = arguments.get("job_id")
|
|
|
|
if not job_id:
|
|
return [types.TextContent(
|
|
type="text",
|
|
text="Error: job_id is required"
|
|
)]
|
|
|
|
try:
|
|
evaluations = nomad_service.get_job_evaluations(job_id)
|
|
|
|
simplified_evals = []
|
|
for eval_item in evaluations:
|
|
simplified_evals.append({
|
|
"eval_id": eval_item.get("ID"),
|
|
"status": eval_item.get("Status"),
|
|
"type": eval_item.get("Type"),
|
|
"triggered_by": eval_item.get("TriggeredBy"),
|
|
"job_id": eval_item.get("JobID"),
|
|
"create_time": eval_item.get("CreateTime"),
|
|
"modify_time": eval_item.get("ModifyTime"),
|
|
"wait_until": eval_item.get("WaitUntil"),
|
|
"blocked_eval": eval_item.get("BlockedEval"),
|
|
"failed_tg_allocs": eval_item.get("FailedTGAllocs", {}),
|
|
"class_eligibility": eval_item.get("ClassEligibility", {}),
|
|
"quota_limit_reached": eval_item.get("QuotaLimitReached")
|
|
})
|
|
|
|
result = {
|
|
"job_id": job_id,
|
|
"total_evaluations": len(evaluations),
|
|
"evaluations": simplified_evals,
|
|
"message": f"Found {len(evaluations)} evaluations for job {job_id}"
|
|
}
|
|
|
|
return [types.TextContent(
|
|
type="text",
|
|
text=json.dumps(result, indent=2)
|
|
)]
|
|
|
|
except Exception as e:
|
|
return [types.TextContent(
|
|
type="text",
|
|
text=f"Error getting evaluations: {str(e)}"
|
|
)]
|
|
|
|
elif name == "force_evaluate_job":
|
|
job_id = arguments.get("job_id")
|
|
|
|
if not job_id:
|
|
return [types.TextContent(
|
|
type="text",
|
|
text="Error: job_id is required"
|
|
)]
|
|
|
|
try:
|
|
# Force evaluation by making a direct API call
|
|
import requests
|
|
nomad_addr = f"http://{nomad_service.client.host}:{nomad_service.client.port}"
|
|
url = f"{nomad_addr}/v1/job/{job_id}/evaluate"
|
|
|
|
headers = {}
|
|
if hasattr(nomad_service.client, 'token') and nomad_service.client.token:
|
|
headers["X-Nomad-Token"] = nomad_service.client.token
|
|
|
|
params = {"namespace": nomad_service.namespace}
|
|
|
|
response = requests.post(
|
|
url=url,
|
|
headers=headers,
|
|
params=params,
|
|
verify=False if os.getenv("NOMAD_SKIP_VERIFY", "false").lower() == "true" else True
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
response_data = response.json()
|
|
|
|
result = {
|
|
"success": True,
|
|
"job_id": job_id,
|
|
"eval_id": response_data.get("EvalID"),
|
|
"status": "evaluation_forced",
|
|
"message": f"Forced evaluation for job {job_id}",
|
|
"details": response_data
|
|
}
|
|
|
|
return [types.TextContent(
|
|
type="text",
|
|
text=json.dumps(result, indent=2)
|
|
)]
|
|
else:
|
|
return [types.TextContent(
|
|
type="text",
|
|
text=f"Error: Failed to force evaluation - {response.text}"
|
|
)]
|
|
|
|
except Exception as e:
|
|
return [types.TextContent(
|
|
type="text",
|
|
text=f"Error forcing evaluation: {str(e)}"
|
|
)]
|
|
|
|
else:
|
|
return [types.TextContent(
|
|
type="text",
|
|
text=f"Error: Unknown tool '{name}'"
|
|
)]
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in tool '{name}': {str(e)}")
|
|
return [types.TextContent(
|
|
type="text",
|
|
text=f"Error: {str(e)}"
|
|
)]
|
|
|
|
async def main():
|
|
"""Main entry point for the MCP server."""
|
|
# Run the server using stdio transport
|
|
async with mcp.server.stdio.stdio_server() as (read_stream, write_stream):
|
|
await server.run(
|
|
read_stream,
|
|
write_stream,
|
|
InitializationOptions(
|
|
server_name="nomad-mcp",
|
|
server_version="1.0.0",
|
|
capabilities=server.get_capabilities(
|
|
notification_options=NotificationOptions(),
|
|
experimental_capabilities={},
|
|
),
|
|
),
|
|
)
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main()) |