396 lines
15 KiB
Python
396 lines
15 KiB
Python
from fastapi import APIRouter, Depends, HTTPException, Body, Query
|
|
from typing import Dict, Any, List, Optional
|
|
import json
|
|
import logging
|
|
|
|
from app.services.nomad_client import NomadService
|
|
from app.services.config_service import ConfigService
|
|
from app.schemas.job import JobResponse, JobOperation, JobSpecification
|
|
|
|
router = APIRouter()
|
|
nomad_service = NomadService()
|
|
config_service = ConfigService()
|
|
|
|
# Configure logging
|
|
logger = logging.getLogger(__name__)
|
|
|
|
@router.get("/", response_model=List[JobResponse])
|
|
async def list_jobs():
|
|
"""List all jobs."""
|
|
jobs = nomad_service.list_jobs()
|
|
# Enhance job responses with repository information if available
|
|
for job in jobs:
|
|
job_id = job.get("ID")
|
|
if job_id:
|
|
repository = config_service.get_repository_from_job(job_id)
|
|
if repository:
|
|
job["repository"] = repository
|
|
return jobs
|
|
|
|
@router.get("/{job_id}", response_model=JobResponse)
|
|
async def get_job(job_id: str):
|
|
"""Get a job by ID."""
|
|
job = nomad_service.get_job(job_id)
|
|
# Add repository information if available
|
|
repository = config_service.get_repository_from_job(job_id)
|
|
if repository:
|
|
job["repository"] = repository
|
|
return job
|
|
|
|
@router.post("/", response_model=JobOperation)
|
|
async def start_job(job_spec: JobSpecification = Body(...)):
|
|
"""Start a Nomad job with the provided specification."""
|
|
return nomad_service.start_job(job_spec.dict())
|
|
|
|
@router.delete("/{job_id}", response_model=JobOperation)
|
|
async def stop_job(job_id: str, purge: bool = Query(False)):
|
|
"""Stop a job by ID."""
|
|
return nomad_service.stop_job(job_id, purge)
|
|
|
|
@router.get("/{job_id}/allocations")
|
|
async def get_job_allocations(job_id: str):
|
|
"""Get all allocations for a job."""
|
|
return nomad_service.get_allocations(job_id)
|
|
|
|
@router.get("/{job_id}/latest-allocation")
|
|
async def get_latest_allocation(job_id: str):
|
|
"""Get the latest allocation for a job."""
|
|
allocations = nomad_service.get_allocations(job_id)
|
|
if not allocations:
|
|
raise HTTPException(status_code=404, detail=f"No allocations found for job {job_id}")
|
|
|
|
# Sort allocations by creation time (descending)
|
|
sorted_allocations = sorted(
|
|
allocations,
|
|
key=lambda a: a.get("CreateTime", 0),
|
|
reverse=True
|
|
)
|
|
|
|
return sorted_allocations[0]
|
|
|
|
@router.get("/{job_id}/status")
|
|
async def get_job_status(job_id: str, namespace: str = Query(None, description="Nomad namespace")):
|
|
"""Get the current status of a job, including deployment and latest allocation."""
|
|
try:
|
|
# Create a custom service with the specific namespace if provided
|
|
custom_nomad = NomadService()
|
|
if namespace:
|
|
custom_nomad.namespace = namespace
|
|
logger.info(f"Getting job status for {job_id} in namespace {namespace}")
|
|
else:
|
|
logger.info(f"Getting job status for {job_id} in default namespace (development)")
|
|
|
|
job = custom_nomad.get_job(job_id)
|
|
status = {
|
|
"job_id": job_id,
|
|
"namespace": namespace or custom_nomad.namespace,
|
|
"status": job.get("Status", "unknown"),
|
|
"stable": job.get("Stable", False),
|
|
"submitted_at": job.get("SubmitTime", 0),
|
|
}
|
|
|
|
# Get the latest deployment if any
|
|
try:
|
|
deployment = custom_nomad.get_deployment_status(job_id)
|
|
if deployment:
|
|
status["deployment"] = {
|
|
"id": deployment.get("ID"),
|
|
"status": deployment.get("Status"),
|
|
"description": deployment.get("StatusDescription"),
|
|
}
|
|
except Exception as e:
|
|
logger.warning(f"Failed to get deployment for job {job_id}: {str(e)}")
|
|
pass # Deployment info is optional
|
|
|
|
# Get the latest allocation if any
|
|
try:
|
|
allocations = custom_nomad.get_allocations(job_id)
|
|
if allocations:
|
|
sorted_allocations = sorted(
|
|
allocations,
|
|
key=lambda a: a.get("CreateTime", 0),
|
|
reverse=True
|
|
)
|
|
latest_alloc = sorted_allocations[0]
|
|
status["latest_allocation"] = {
|
|
"id": latest_alloc.get("ID"),
|
|
"status": latest_alloc.get("ClientStatus"),
|
|
"description": latest_alloc.get("ClientDescription", ""),
|
|
"created_at": latest_alloc.get("CreateTime", 0),
|
|
}
|
|
except Exception as e:
|
|
logger.warning(f"Failed to get allocations for job {job_id}: {str(e)}")
|
|
pass # Allocation info is optional
|
|
|
|
return status
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=f"Failed to get job status: {str(e)}")
|
|
|
|
@router.get("/{job_id}/specification")
|
|
async def get_job_specification(job_id: str, namespace: str = Query(None, description="Nomad namespace"), raw: bool = Query(False)):
|
|
"""Get the job specification for a job."""
|
|
try:
|
|
# Create a custom service with the specific namespace if provided
|
|
custom_nomad = NomadService()
|
|
if namespace:
|
|
custom_nomad.namespace = namespace
|
|
logger.info(f"Getting job specification for {job_id} in namespace {namespace}")
|
|
else:
|
|
logger.info(f"Getting job specification for {job_id} in default namespace (development)")
|
|
|
|
job = custom_nomad.get_job(job_id)
|
|
|
|
if raw:
|
|
return job
|
|
|
|
# Extract just the job specification part if present
|
|
if "JobID" in job:
|
|
job_spec = {
|
|
"id": job.get("ID"),
|
|
"name": job.get("Name"),
|
|
"type": job.get("Type"),
|
|
"status": job.get("Status"),
|
|
"datacenters": job.get("Datacenters", []),
|
|
"namespace": job.get("Namespace"),
|
|
"task_groups": job.get("TaskGroups", []),
|
|
"meta": job.get("Meta", {}),
|
|
}
|
|
return job_spec
|
|
|
|
return job
|
|
except Exception as e:
|
|
raise HTTPException(status_code=404, detail=f"Failed to get job specification: {str(e)}")
|
|
|
|
@router.post("/{job_id}/restart")
|
|
async def restart_job(job_id: str):
|
|
"""Restart a job by stopping it and starting it again."""
|
|
try:
|
|
# Get the current job specification
|
|
job_spec = nomad_service.get_job(job_id)
|
|
|
|
# Stop the job
|
|
nomad_service.stop_job(job_id)
|
|
|
|
# Start the job with the original specification
|
|
result = nomad_service.start_job(job_spec)
|
|
|
|
return {
|
|
"job_id": job_id,
|
|
"status": "restarted",
|
|
"eval_id": result.get("eval_id"),
|
|
}
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=f"Failed to restart job: {str(e)}")
|
|
|
|
@router.get("/by-repository/{repository}")
|
|
async def get_job_by_repository(repository: str):
|
|
"""Get job information by repository URL or name."""
|
|
job_info = config_service.get_job_from_repository(repository)
|
|
if not job_info:
|
|
raise HTTPException(status_code=404, detail=f"No job found for repository: {repository}")
|
|
|
|
job_id = job_info.get("job_id")
|
|
namespace = job_info.get("namespace")
|
|
|
|
# Get the job using the specific namespace if provided
|
|
try:
|
|
if namespace:
|
|
# Override the default namespace with the specific one
|
|
custom_nomad = NomadService()
|
|
custom_nomad.namespace = namespace
|
|
job = custom_nomad.get_job(job_id)
|
|
else:
|
|
# Use the default namespace settings
|
|
job = nomad_service.get_job(job_id)
|
|
|
|
# Add repository information
|
|
job["repository"] = repository
|
|
return job
|
|
except Exception as e:
|
|
raise HTTPException(status_code=404, detail=f"Job not found: {job_id}, Error: {str(e)}")
|
|
|
|
@router.post("/by-repository/{repository}/start")
|
|
async def start_job_by_repository(repository: str):
|
|
"""Start a job by its associated repository."""
|
|
logger = logging.getLogger(__name__)
|
|
|
|
job_info = config_service.get_job_from_repository(repository)
|
|
if not job_info:
|
|
raise HTTPException(status_code=404, detail=f"No job found for repository: {repository}")
|
|
|
|
job_id = job_info.get("job_id")
|
|
namespace = job_info.get("namespace")
|
|
|
|
logger.info(f"Starting job for repository {repository}, job_id: {job_id}, namespace: {namespace}")
|
|
|
|
# Create a custom service with the specific namespace if provided
|
|
custom_nomad = NomadService()
|
|
if namespace:
|
|
logger.info(f"Setting custom_nomad.namespace to {namespace}")
|
|
custom_nomad.namespace = namespace
|
|
|
|
# Log the current namespace being used
|
|
logger.info(f"Nomad client namespace: {custom_nomad.namespace}")
|
|
|
|
try:
|
|
# Get the job specification from an existing job
|
|
job_spec = custom_nomad.get_job(job_id)
|
|
|
|
# Log the job specification
|
|
logger.info(f"Retrieved job specification for {job_id} from existing job")
|
|
|
|
# Ensure namespace is set in job spec
|
|
if isinstance(job_spec, dict):
|
|
# Ensure namespace is explicitly set
|
|
if namespace:
|
|
logger.info(f"Setting namespace in job spec to {namespace}")
|
|
job_spec["Namespace"] = namespace
|
|
|
|
# Log the keys in the job specification
|
|
logger.info(f"Job spec keys: {job_spec.keys()}")
|
|
|
|
# Start the job with the retrieved specification
|
|
result = custom_nomad.start_job(job_spec)
|
|
|
|
return {
|
|
"job_id": job_id,
|
|
"repository": repository,
|
|
"status": "started",
|
|
"eval_id": result.get("eval_id"),
|
|
"namespace": namespace
|
|
}
|
|
except HTTPException as e:
|
|
# If job not found, try to get spec from config
|
|
if e.status_code == 404:
|
|
logger.info(f"Job {job_id} not found, attempting to get specification from config")
|
|
|
|
# Try to get job spec from repository config
|
|
job_spec = config_service.get_job_spec_from_repository(repository)
|
|
|
|
if not job_spec:
|
|
logger.warning(f"No job specification found for repository {repository}, creating a default one")
|
|
|
|
# Create a simple default job spec if none exists
|
|
job_spec = {
|
|
"ID": job_id,
|
|
"Name": job_id,
|
|
"Type": "service",
|
|
"Datacenters": ["jm"], # Default datacenter
|
|
"TaskGroups": [
|
|
{
|
|
"Name": "app",
|
|
"Count": 1,
|
|
"Tasks": [
|
|
{
|
|
"Name": job_id.split('-')[0], # Use first part of job ID as task name
|
|
"Driver": "docker",
|
|
"Config": {
|
|
"image": f"registry.dev.meisheng.group/{repository}:latest",
|
|
"force_pull": True,
|
|
"ports": ["http"]
|
|
},
|
|
"Resources": {
|
|
"CPU": 500,
|
|
"MemoryMB": 512
|
|
}
|
|
}
|
|
],
|
|
"Networks": [
|
|
{
|
|
"DynamicPorts": [
|
|
{
|
|
"Label": "http",
|
|
"Value": 0,
|
|
"To": 8000
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"Meta": {
|
|
"repository": repository
|
|
}
|
|
}
|
|
|
|
# Set the namespace explicitly in the job spec
|
|
if namespace:
|
|
logger.info(f"Setting namespace in default job spec to {namespace}")
|
|
job_spec["Namespace"] = namespace
|
|
|
|
logger.info(f"Starting job {job_id} with specification")
|
|
|
|
# Log the job specification structure
|
|
if isinstance(job_spec, dict):
|
|
logger.info(f"Job spec keys: {job_spec.keys()}")
|
|
if "Namespace" in job_spec:
|
|
logger.info(f"Job spec namespace: {job_spec['Namespace']}")
|
|
|
|
# Start the job with the specification
|
|
result = custom_nomad.start_job(job_spec)
|
|
|
|
return {
|
|
"job_id": job_id,
|
|
"repository": repository,
|
|
"status": "started",
|
|
"eval_id": result.get("eval_id"),
|
|
"namespace": namespace
|
|
}
|
|
|
|
@router.post("/by-repository/{repository}/stop")
|
|
async def stop_job_by_repository(repository: str, purge: bool = Query(False)):
|
|
"""Stop a job by its associated repository."""
|
|
job_info = config_service.get_job_from_repository(repository)
|
|
if not job_info:
|
|
raise HTTPException(status_code=404, detail=f"No job found for repository: {repository}")
|
|
|
|
job_id = job_info.get("job_id")
|
|
namespace = job_info.get("namespace")
|
|
|
|
# Create a custom service with the specific namespace if provided
|
|
custom_nomad = NomadService()
|
|
if namespace:
|
|
custom_nomad.namespace = namespace
|
|
|
|
# Stop the job
|
|
result = custom_nomad.stop_job(job_id, purge)
|
|
|
|
return {
|
|
"job_id": job_id,
|
|
"repository": repository,
|
|
"status": "stopped",
|
|
"eval_id": result.get("eval_id"),
|
|
"namespace": namespace
|
|
}
|
|
|
|
@router.post("/by-repository/{repository}/restart")
|
|
async def restart_job_by_repository(repository: str):
|
|
"""Restart a job by its associated repository."""
|
|
job_info = config_service.get_job_from_repository(repository)
|
|
if not job_info:
|
|
raise HTTPException(status_code=404, detail=f"No job found for repository: {repository}")
|
|
|
|
job_id = job_info.get("job_id")
|
|
namespace = job_info.get("namespace")
|
|
|
|
# Create a custom service with the specific namespace if provided
|
|
custom_nomad = NomadService()
|
|
if namespace:
|
|
custom_nomad.namespace = namespace
|
|
|
|
# Get the job specification
|
|
job_spec = custom_nomad.get_job(job_id)
|
|
|
|
# Stop the job first
|
|
custom_nomad.stop_job(job_id)
|
|
|
|
# Start the job with the original specification
|
|
result = custom_nomad.start_job(job_spec)
|
|
|
|
return {
|
|
"job_id": job_id,
|
|
"repository": repository,
|
|
"status": "restarted",
|
|
"eval_id": result.get("eval_id"),
|
|
"namespace": namespace
|
|
} |