from fastapi import APIRouter, HTTPException, Body, Query, Depends from typing import Dict, Any, List, Optional import logging import json from app.services.nomad_client import NomadService from app.schemas.claude_api import ClaudeJobRequest, ClaudeJobSpecification, ClaudeJobResponse router = APIRouter() logger = logging.getLogger(__name__) @router.post("/jobs", response_model=ClaudeJobResponse) async def manage_job(request: ClaudeJobRequest): """ Endpoint for Claude to manage Nomad jobs with a simplified interface. This endpoint handles job operations like start, stop, restart, and status checks. """ try: # Create a Nomad service instance with the specified namespace nomad_service = NomadService() if request.namespace: nomad_service.namespace = request.namespace # Handle different actions if request.action.lower() == "status": # Get job status job = nomad_service.get_job(request.job_id) # Get allocations for more detailed status allocations = nomad_service.get_allocations(request.job_id) latest_alloc = None if allocations: # Sort allocations by creation time (descending) sorted_allocations = sorted( allocations, key=lambda a: a.get("CreateTime", 0), reverse=True ) latest_alloc = sorted_allocations[0] return ClaudeJobResponse( success=True, job_id=request.job_id, status=job.get("Status", "unknown"), message=f"Job {request.job_id} is {job.get('Status', 'unknown')}", details={ "job": job, "latest_allocation": latest_alloc } ) elif request.action.lower() == "stop": # Stop the job result = nomad_service.stop_job(request.job_id, purge=request.purge) return ClaudeJobResponse( success=True, job_id=request.job_id, status="stopped", message=f"Job {request.job_id} has been stopped" + (" and purged" if request.purge else ""), details=result ) elif request.action.lower() == "restart": # Get the current job specification job_spec = nomad_service.get_job(request.job_id) # Stop the job nomad_service.stop_job(request.job_id) # Start the job with the original specification result = nomad_service.start_job(job_spec) return ClaudeJobResponse( success=True, job_id=request.job_id, status="restarted", message=f"Job {request.job_id} has been restarted", details=result ) else: # Unknown action raise HTTPException(status_code=400, detail=f"Unknown action: {request.action}") except Exception as e: logger.error(f"Error managing job {request.job_id}: {str(e)}") return ClaudeJobResponse( success=False, job_id=request.job_id, status="error", message=f"Error: {str(e)}", details=None ) @router.post("/create-job", response_model=ClaudeJobResponse) async def create_job(job_spec: ClaudeJobSpecification): """ Endpoint for Claude to create a new Nomad job with a simplified interface. This endpoint allows creating a job with minimal configuration. """ try: # Create a Nomad service instance with the specified namespace nomad_service = NomadService() if job_spec.namespace: nomad_service.namespace = job_spec.namespace # Convert the simplified job spec to Nomad format nomad_job_spec = job_spec.to_nomad_job_spec() # Start the job result = nomad_service.start_job(nomad_job_spec) return ClaudeJobResponse( success=True, job_id=job_spec.job_id, status="started", message=f"Job {job_spec.job_id} has been created and started", details=result ) except Exception as e: logger.error(f"Error creating job {job_spec.job_id}: {str(e)}") return ClaudeJobResponse( success=False, job_id=job_spec.job_id, status="error", message=f"Error: {str(e)}", details=None ) @router.get("/list-jobs", response_model=List[Dict[str, Any]]) async def list_jobs(namespace: str = Query("development")): """ List all jobs in the specified namespace. Returns a simplified list of jobs with their IDs and statuses. """ try: # Create a Nomad service instance with the specified namespace nomad_service = NomadService() nomad_service.namespace = namespace # Get all jobs jobs = nomad_service.list_jobs() # Return a simplified list simplified_jobs = [] for job in jobs: simplified_jobs.append({ "id": job.get("ID"), "name": job.get("Name"), "status": job.get("Status"), "type": job.get("Type"), "namespace": namespace }) return simplified_jobs except Exception as e: logger.error(f"Error listing jobs: {str(e)}") raise HTTPException(status_code=500, detail=f"Error listing jobs: {str(e)}") @router.get("/job-logs/{job_id}", response_model=Dict[str, Any]) async def get_job_logs(job_id: str, namespace: str = Query("development")): """ Get logs for a job. Returns logs from the latest allocation of the job. """ try: # Create a Nomad service instance with the specified namespace nomad_service = NomadService() nomad_service.namespace = namespace # Get allocations for the job allocations = nomad_service.get_allocations(job_id) if not allocations: return { "success": False, "job_id": job_id, "message": f"No allocations found for job {job_id}", "logs": None } # Sort allocations by creation time (descending) sorted_allocations = sorted( allocations, key=lambda a: a.get("CreateTime", 0), reverse=True ) latest_alloc = sorted_allocations[0] alloc_id = latest_alloc.get("ID") # Get the task name from the allocation task_name = None if "TaskStates" in latest_alloc: task_states = latest_alloc["TaskStates"] if task_states: task_name = next(iter(task_states.keys())) if not task_name: task_name = "app" # Default task name # Get logs for the allocation stdout_logs = nomad_service.get_allocation_logs(alloc_id, task_name, "stdout") stderr_logs = nomad_service.get_allocation_logs(alloc_id, task_name, "stderr") return { "success": True, "job_id": job_id, "allocation_id": alloc_id, "task_name": task_name, "message": f"Retrieved logs for job {job_id}", "logs": { "stdout": stdout_logs, "stderr": stderr_logs } } except Exception as e: logger.error(f"Error getting logs for job {job_id}: {str(e)}") return { "success": False, "job_id": job_id, "message": f"Error getting logs: {str(e)}", "logs": None }