Files
nomad_mcp/app/routers/claude.py
2025-02-26 15:25:39 +07:00

230 lines
7.9 KiB
Python

from fastapi import APIRouter, HTTPException, Body, Query, Depends
from typing import Dict, Any, List, Optional
import logging
import json
from app.services.nomad_client import NomadService
from app.schemas.claude_api import ClaudeJobRequest, ClaudeJobSpecification, ClaudeJobResponse
router = APIRouter()
logger = logging.getLogger(__name__)
@router.post("/jobs", response_model=ClaudeJobResponse)
async def manage_job(request: ClaudeJobRequest):
"""
Endpoint for Claude to manage Nomad jobs with a simplified interface.
This endpoint handles job operations like start, stop, restart, and status checks.
"""
try:
# Create a Nomad service instance with the specified namespace
nomad_service = NomadService()
if request.namespace:
nomad_service.namespace = request.namespace
# Handle different actions
if request.action.lower() == "status":
# Get job status
job = nomad_service.get_job(request.job_id)
# Get allocations for more detailed status
allocations = nomad_service.get_allocations(request.job_id)
latest_alloc = None
if allocations:
# Sort allocations by creation time (descending)
sorted_allocations = sorted(
allocations,
key=lambda a: a.get("CreateTime", 0),
reverse=True
)
latest_alloc = sorted_allocations[0]
return ClaudeJobResponse(
success=True,
job_id=request.job_id,
status=job.get("Status", "unknown"),
message=f"Job {request.job_id} is {job.get('Status', 'unknown')}",
details={
"job": job,
"latest_allocation": latest_alloc
}
)
elif request.action.lower() == "stop":
# Stop the job
result = nomad_service.stop_job(request.job_id, purge=request.purge)
return ClaudeJobResponse(
success=True,
job_id=request.job_id,
status="stopped",
message=f"Job {request.job_id} has been stopped" + (" and purged" if request.purge else ""),
details=result
)
elif request.action.lower() == "restart":
# Get the current job specification
job_spec = nomad_service.get_job(request.job_id)
# Stop the job
nomad_service.stop_job(request.job_id)
# Start the job with the original specification
result = nomad_service.start_job(job_spec)
return ClaudeJobResponse(
success=True,
job_id=request.job_id,
status="restarted",
message=f"Job {request.job_id} has been restarted",
details=result
)
else:
# Unknown action
raise HTTPException(status_code=400, detail=f"Unknown action: {request.action}")
except Exception as e:
logger.error(f"Error managing job {request.job_id}: {str(e)}")
return ClaudeJobResponse(
success=False,
job_id=request.job_id,
status="error",
message=f"Error: {str(e)}",
details=None
)
@router.post("/create-job", response_model=ClaudeJobResponse)
async def create_job(job_spec: ClaudeJobSpecification):
"""
Endpoint for Claude to create a new Nomad job with a simplified interface.
This endpoint allows creating a job with minimal configuration.
"""
try:
# Create a Nomad service instance with the specified namespace
nomad_service = NomadService()
if job_spec.namespace:
nomad_service.namespace = job_spec.namespace
# Convert the simplified job spec to Nomad format
nomad_job_spec = job_spec.to_nomad_job_spec()
# Start the job
result = nomad_service.start_job(nomad_job_spec)
return ClaudeJobResponse(
success=True,
job_id=job_spec.job_id,
status="started",
message=f"Job {job_spec.job_id} has been created and started",
details=result
)
except Exception as e:
logger.error(f"Error creating job {job_spec.job_id}: {str(e)}")
return ClaudeJobResponse(
success=False,
job_id=job_spec.job_id,
status="error",
message=f"Error: {str(e)}",
details=None
)
@router.get("/list-jobs", response_model=List[Dict[str, Any]])
async def list_jobs(namespace: str = Query("development")):
"""
List all jobs in the specified namespace.
Returns a simplified list of jobs with their IDs and statuses.
"""
try:
# Create a Nomad service instance with the specified namespace
nomad_service = NomadService()
nomad_service.namespace = namespace
# Get all jobs
jobs = nomad_service.list_jobs()
# Return a simplified list
simplified_jobs = []
for job in jobs:
simplified_jobs.append({
"id": job.get("ID"),
"name": job.get("Name"),
"status": job.get("Status"),
"type": job.get("Type"),
"namespace": namespace
})
return simplified_jobs
except Exception as e:
logger.error(f"Error listing jobs: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error listing jobs: {str(e)}")
@router.get("/job-logs/{job_id}", response_model=Dict[str, Any])
async def get_job_logs(job_id: str, namespace: str = Query("development")):
"""
Get logs for a job.
Returns logs from the latest allocation of the job.
"""
try:
# Create a Nomad service instance with the specified namespace
nomad_service = NomadService()
nomad_service.namespace = namespace
# Get allocations for the job
allocations = nomad_service.get_allocations(job_id)
if not allocations:
return {
"success": False,
"job_id": job_id,
"message": f"No allocations found for job {job_id}",
"logs": None
}
# Sort allocations by creation time (descending)
sorted_allocations = sorted(
allocations,
key=lambda a: a.get("CreateTime", 0),
reverse=True
)
latest_alloc = sorted_allocations[0]
alloc_id = latest_alloc.get("ID")
# Get the task name from the allocation
task_name = None
if "TaskStates" in latest_alloc:
task_states = latest_alloc["TaskStates"]
if task_states:
task_name = next(iter(task_states.keys()))
if not task_name:
task_name = "app" # Default task name
# Get logs for the allocation
stdout_logs = nomad_service.get_allocation_logs(alloc_id, task_name, "stdout")
stderr_logs = nomad_service.get_allocation_logs(alloc_id, task_name, "stderr")
return {
"success": True,
"job_id": job_id,
"allocation_id": alloc_id,
"task_name": task_name,
"message": f"Retrieved logs for job {job_id}",
"logs": {
"stdout": stdout_logs,
"stderr": stderr_logs
}
}
except Exception as e:
logger.error(f"Error getting logs for job {job_id}: {str(e)}")
return {
"success": False,
"job_id": job_id,
"message": f"Error getting logs: {str(e)}",
"logs": None
}