from fastapi import APIRouter, Depends, HTTPException, Body, Query from typing import Dict, Any, List, Optional import json import logging from app.services.nomad_client import NomadService from app.services.config_service import ConfigService from app.schemas.job import JobResponse, JobOperation, JobSpecification router = APIRouter() nomad_service = NomadService() config_service = ConfigService() # Configure logging logger = logging.getLogger(__name__) @router.get("/", response_model=List[JobResponse]) async def list_jobs(): """List all jobs.""" jobs = nomad_service.list_jobs() # Enhance job responses with repository information if available for job in jobs: job_id = job.get("ID") if job_id: repository = config_service.get_repository_from_job(job_id) if repository: job["repository"] = repository return jobs @router.get("/{job_id}", response_model=JobResponse) async def get_job(job_id: str): """Get a job by ID.""" job = nomad_service.get_job(job_id) # Add repository information if available repository = config_service.get_repository_from_job(job_id) if repository: job["repository"] = repository return job @router.post("/", response_model=JobOperation) async def start_job(job_spec: JobSpecification = Body(...)): """Start a Nomad job with the provided specification.""" return nomad_service.start_job(job_spec.dict()) @router.delete("/{job_id}", response_model=JobOperation) async def stop_job(job_id: str, purge: bool = Query(False)): """Stop a job by ID.""" return nomad_service.stop_job(job_id, purge) @router.get("/{job_id}/allocations") async def get_job_allocations(job_id: str): """Get all allocations for a job.""" return nomad_service.get_allocations(job_id) @router.get("/{job_id}/latest-allocation") async def get_latest_allocation(job_id: str): """Get the latest allocation for a job.""" allocations = nomad_service.get_allocations(job_id) if not allocations: raise HTTPException(status_code=404, detail=f"No allocations found for job {job_id}") # Sort allocations by creation time (descending) sorted_allocations = sorted( allocations, key=lambda a: a.get("CreateTime", 0), reverse=True ) return sorted_allocations[0] @router.get("/{job_id}/status") async def get_job_status(job_id: str, namespace: str = Query(None, description="Nomad namespace")): """Get the current status of a job, including deployment and latest allocation.""" try: # Create a custom service with the specific namespace if provided custom_nomad = NomadService() if namespace: custom_nomad.namespace = namespace logger.info(f"Getting job status for {job_id} in namespace {namespace}") else: logger.info(f"Getting job status for {job_id} in default namespace (development)") job = custom_nomad.get_job(job_id) status = { "job_id": job_id, "namespace": namespace or custom_nomad.namespace, "status": job.get("Status", "unknown"), "stable": job.get("Stable", False), "submitted_at": job.get("SubmitTime", 0), } # Get the latest deployment if any try: deployment = custom_nomad.get_deployment_status(job_id) if deployment: status["deployment"] = { "id": deployment.get("ID"), "status": deployment.get("Status"), "description": deployment.get("StatusDescription"), } except Exception as e: logger.warning(f"Failed to get deployment for job {job_id}: {str(e)}") pass # Deployment info is optional # Get the latest allocation if any try: allocations = custom_nomad.get_allocations(job_id) if allocations: sorted_allocations = sorted( allocations, key=lambda a: a.get("CreateTime", 0), reverse=True ) latest_alloc = sorted_allocations[0] status["latest_allocation"] = { "id": latest_alloc.get("ID"), "status": latest_alloc.get("ClientStatus"), "description": latest_alloc.get("ClientDescription", ""), "created_at": latest_alloc.get("CreateTime", 0), } except Exception as e: logger.warning(f"Failed to get allocations for job {job_id}: {str(e)}") pass # Allocation info is optional return status except Exception as e: raise HTTPException(status_code=500, detail=f"Failed to get job status: {str(e)}") @router.get("/{job_id}/specification") async def get_job_specification(job_id: str, namespace: str = Query(None, description="Nomad namespace"), raw: bool = Query(False)): """Get the job specification for a job.""" try: # Create a custom service with the specific namespace if provided custom_nomad = NomadService() if namespace: custom_nomad.namespace = namespace logger.info(f"Getting job specification for {job_id} in namespace {namespace}") else: logger.info(f"Getting job specification for {job_id} in default namespace (development)") job = custom_nomad.get_job(job_id) if raw: return job # Extract just the job specification part if present if "JobID" in job: job_spec = { "id": job.get("ID"), "name": job.get("Name"), "type": job.get("Type"), "status": job.get("Status"), "datacenters": job.get("Datacenters", []), "namespace": job.get("Namespace"), "task_groups": job.get("TaskGroups", []), "meta": job.get("Meta", {}), } return job_spec return job except Exception as e: raise HTTPException(status_code=404, detail=f"Failed to get job specification: {str(e)}") @router.post("/{job_id}/restart") async def restart_job(job_id: str): """Restart a job by stopping it and starting it again.""" try: # Get the current job specification job_spec = nomad_service.get_job(job_id) # Stop the job nomad_service.stop_job(job_id) # Start the job with the original specification result = nomad_service.start_job(job_spec) return { "job_id": job_id, "status": "restarted", "eval_id": result.get("eval_id"), } except Exception as e: raise HTTPException(status_code=500, detail=f"Failed to restart job: {str(e)}") @router.get("/by-repository/{repository}") async def get_job_by_repository(repository: str): """Get job information by repository URL or name.""" job_info = config_service.get_job_from_repository(repository) if not job_info: raise HTTPException(status_code=404, detail=f"No job found for repository: {repository}") job_id = job_info.get("job_id") namespace = job_info.get("namespace") # Get the job using the specific namespace if provided try: if namespace: # Override the default namespace with the specific one custom_nomad = NomadService() custom_nomad.namespace = namespace job = custom_nomad.get_job(job_id) else: # Use the default namespace settings job = nomad_service.get_job(job_id) # Add repository information job["repository"] = repository return job except Exception as e: raise HTTPException(status_code=404, detail=f"Job not found: {job_id}, Error: {str(e)}") @router.post("/by-repository/{repository}/start") async def start_job_by_repository(repository: str): """Start a job by its associated repository.""" logger = logging.getLogger(__name__) job_info = config_service.get_job_from_repository(repository) if not job_info: raise HTTPException(status_code=404, detail=f"No job found for repository: {repository}") job_id = job_info.get("job_id") namespace = job_info.get("namespace") logger.info(f"Starting job for repository {repository}, job_id: {job_id}, namespace: {namespace}") # Create a custom service with the specific namespace if provided custom_nomad = NomadService() if namespace: logger.info(f"Setting custom_nomad.namespace to {namespace}") custom_nomad.namespace = namespace # Log the current namespace being used logger.info(f"Nomad client namespace: {custom_nomad.namespace}") try: # Get the job specification from an existing job job_spec = custom_nomad.get_job(job_id) # Log the job specification logger.info(f"Retrieved job specification for {job_id} from existing job") # Ensure namespace is set in job spec if isinstance(job_spec, dict): # Ensure namespace is explicitly set if namespace: logger.info(f"Setting namespace in job spec to {namespace}") job_spec["Namespace"] = namespace # Log the keys in the job specification logger.info(f"Job spec keys: {job_spec.keys()}") # Start the job with the retrieved specification result = custom_nomad.start_job(job_spec) return { "job_id": job_id, "repository": repository, "status": "started", "eval_id": result.get("eval_id"), "namespace": namespace } except HTTPException as e: # If job not found, try to get spec from config if e.status_code == 404: logger.info(f"Job {job_id} not found, attempting to get specification from config") # Try to get job spec from repository config job_spec = config_service.get_job_spec_from_repository(repository) if not job_spec: logger.warning(f"No job specification found for repository {repository}, creating a default one") # Create a simple default job spec if none exists job_spec = { "ID": job_id, "Name": job_id, "Type": "service", "Datacenters": ["jm"], # Default datacenter "TaskGroups": [ { "Name": "app", "Count": 1, "Tasks": [ { "Name": job_id.split('-')[0], # Use first part of job ID as task name "Driver": "docker", "Config": { "image": f"registry.dev.meisheng.group/{repository}:latest", "force_pull": True, "ports": ["http"] }, "Resources": { "CPU": 500, "MemoryMB": 512 } } ], "Networks": [ { "DynamicPorts": [ { "Label": "http", "Value": 0, "To": 8000 } ] } ] } ], "Meta": { "repository": repository } } # Set the namespace explicitly in the job spec if namespace: logger.info(f"Setting namespace in default job spec to {namespace}") job_spec["Namespace"] = namespace logger.info(f"Starting job {job_id} with specification") # Log the job specification structure if isinstance(job_spec, dict): logger.info(f"Job spec keys: {job_spec.keys()}") if "Namespace" in job_spec: logger.info(f"Job spec namespace: {job_spec['Namespace']}") # Start the job with the specification result = custom_nomad.start_job(job_spec) return { "job_id": job_id, "repository": repository, "status": "started", "eval_id": result.get("eval_id"), "namespace": namespace } @router.post("/by-repository/{repository}/stop") async def stop_job_by_repository(repository: str, purge: bool = Query(False)): """Stop a job by its associated repository.""" job_info = config_service.get_job_from_repository(repository) if not job_info: raise HTTPException(status_code=404, detail=f"No job found for repository: {repository}") job_id = job_info.get("job_id") namespace = job_info.get("namespace") # Create a custom service with the specific namespace if provided custom_nomad = NomadService() if namespace: custom_nomad.namespace = namespace # Stop the job result = custom_nomad.stop_job(job_id, purge) return { "job_id": job_id, "repository": repository, "status": "stopped", "eval_id": result.get("eval_id"), "namespace": namespace } @router.post("/by-repository/{repository}/restart") async def restart_job_by_repository(repository: str): """Restart a job by its associated repository.""" job_info = config_service.get_job_from_repository(repository) if not job_info: raise HTTPException(status_code=404, detail=f"No job found for repository: {repository}") job_id = job_info.get("job_id") namespace = job_info.get("namespace") # Create a custom service with the specific namespace if provided custom_nomad = NomadService() if namespace: custom_nomad.namespace = namespace # Get the job specification job_spec = custom_nomad.get_job(job_id) # Stop the job first custom_nomad.stop_job(job_id) # Start the job with the original specification result = custom_nomad.start_job(job_spec) return { "job_id": job_id, "repository": repository, "status": "restarted", "eval_id": result.get("eval_id"), "namespace": namespace }