Update README.md
This commit is contained in:
396
app/routers/jobs.py
Normal file
396
app/routers/jobs.py
Normal file
@ -0,0 +1,396 @@
|
||||
from fastapi import APIRouter, Depends, HTTPException, Body, Query
|
||||
from typing import Dict, Any, List, Optional
|
||||
import json
|
||||
import logging
|
||||
|
||||
from app.services.nomad_client import NomadService
|
||||
from app.services.config_service import ConfigService
|
||||
from app.schemas.job import JobResponse, JobOperation, JobSpecification
|
||||
|
||||
router = APIRouter()
|
||||
nomad_service = NomadService()
|
||||
config_service = ConfigService()
|
||||
|
||||
# Configure logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@router.get("/", response_model=List[JobResponse])
|
||||
async def list_jobs():
|
||||
"""List all jobs."""
|
||||
jobs = nomad_service.list_jobs()
|
||||
# Enhance job responses with repository information if available
|
||||
for job in jobs:
|
||||
job_id = job.get("ID")
|
||||
if job_id:
|
||||
repository = config_service.get_repository_from_job(job_id)
|
||||
if repository:
|
||||
job["repository"] = repository
|
||||
return jobs
|
||||
|
||||
@router.get("/{job_id}", response_model=JobResponse)
|
||||
async def get_job(job_id: str):
|
||||
"""Get a job by ID."""
|
||||
job = nomad_service.get_job(job_id)
|
||||
# Add repository information if available
|
||||
repository = config_service.get_repository_from_job(job_id)
|
||||
if repository:
|
||||
job["repository"] = repository
|
||||
return job
|
||||
|
||||
@router.post("/", response_model=JobOperation)
|
||||
async def start_job(job_spec: JobSpecification = Body(...)):
|
||||
"""Start a Nomad job with the provided specification."""
|
||||
return nomad_service.start_job(job_spec.dict())
|
||||
|
||||
@router.delete("/{job_id}", response_model=JobOperation)
|
||||
async def stop_job(job_id: str, purge: bool = Query(False)):
|
||||
"""Stop a job by ID."""
|
||||
return nomad_service.stop_job(job_id, purge)
|
||||
|
||||
@router.get("/{job_id}/allocations")
|
||||
async def get_job_allocations(job_id: str):
|
||||
"""Get all allocations for a job."""
|
||||
return nomad_service.get_allocations(job_id)
|
||||
|
||||
@router.get("/{job_id}/latest-allocation")
|
||||
async def get_latest_allocation(job_id: str):
|
||||
"""Get the latest allocation for a job."""
|
||||
allocations = nomad_service.get_allocations(job_id)
|
||||
if not allocations:
|
||||
raise HTTPException(status_code=404, detail=f"No allocations found for job {job_id}")
|
||||
|
||||
# Sort allocations by creation time (descending)
|
||||
sorted_allocations = sorted(
|
||||
allocations,
|
||||
key=lambda a: a.get("CreateTime", 0),
|
||||
reverse=True
|
||||
)
|
||||
|
||||
return sorted_allocations[0]
|
||||
|
||||
@router.get("/{job_id}/status")
|
||||
async def get_job_status(job_id: str, namespace: str = Query(None, description="Nomad namespace")):
|
||||
"""Get the current status of a job, including deployment and latest allocation."""
|
||||
try:
|
||||
# Create a custom service with the specific namespace if provided
|
||||
custom_nomad = NomadService()
|
||||
if namespace:
|
||||
custom_nomad.namespace = namespace
|
||||
logger.info(f"Getting job status for {job_id} in namespace {namespace}")
|
||||
else:
|
||||
logger.info(f"Getting job status for {job_id} in default namespace (development)")
|
||||
|
||||
job = custom_nomad.get_job(job_id)
|
||||
status = {
|
||||
"job_id": job_id,
|
||||
"namespace": namespace or custom_nomad.namespace,
|
||||
"status": job.get("Status", "unknown"),
|
||||
"stable": job.get("Stable", False),
|
||||
"submitted_at": job.get("SubmitTime", 0),
|
||||
}
|
||||
|
||||
# Get the latest deployment if any
|
||||
try:
|
||||
deployment = custom_nomad.get_deployment_status(job_id)
|
||||
if deployment:
|
||||
status["deployment"] = {
|
||||
"id": deployment.get("ID"),
|
||||
"status": deployment.get("Status"),
|
||||
"description": deployment.get("StatusDescription"),
|
||||
}
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get deployment for job {job_id}: {str(e)}")
|
||||
pass # Deployment info is optional
|
||||
|
||||
# Get the latest allocation if any
|
||||
try:
|
||||
allocations = custom_nomad.get_allocations(job_id)
|
||||
if allocations:
|
||||
sorted_allocations = sorted(
|
||||
allocations,
|
||||
key=lambda a: a.get("CreateTime", 0),
|
||||
reverse=True
|
||||
)
|
||||
latest_alloc = sorted_allocations[0]
|
||||
status["latest_allocation"] = {
|
||||
"id": latest_alloc.get("ID"),
|
||||
"status": latest_alloc.get("ClientStatus"),
|
||||
"description": latest_alloc.get("ClientDescription", ""),
|
||||
"created_at": latest_alloc.get("CreateTime", 0),
|
||||
}
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get allocations for job {job_id}: {str(e)}")
|
||||
pass # Allocation info is optional
|
||||
|
||||
return status
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get job status: {str(e)}")
|
||||
|
||||
@router.get("/{job_id}/specification")
|
||||
async def get_job_specification(job_id: str, namespace: str = Query(None, description="Nomad namespace"), raw: bool = Query(False)):
|
||||
"""Get the job specification for a job."""
|
||||
try:
|
||||
# Create a custom service with the specific namespace if provided
|
||||
custom_nomad = NomadService()
|
||||
if namespace:
|
||||
custom_nomad.namespace = namespace
|
||||
logger.info(f"Getting job specification for {job_id} in namespace {namespace}")
|
||||
else:
|
||||
logger.info(f"Getting job specification for {job_id} in default namespace (development)")
|
||||
|
||||
job = custom_nomad.get_job(job_id)
|
||||
|
||||
if raw:
|
||||
return job
|
||||
|
||||
# Extract just the job specification part if present
|
||||
if "JobID" in job:
|
||||
job_spec = {
|
||||
"id": job.get("ID"),
|
||||
"name": job.get("Name"),
|
||||
"type": job.get("Type"),
|
||||
"status": job.get("Status"),
|
||||
"datacenters": job.get("Datacenters", []),
|
||||
"namespace": job.get("Namespace"),
|
||||
"task_groups": job.get("TaskGroups", []),
|
||||
"meta": job.get("Meta", {}),
|
||||
}
|
||||
return job_spec
|
||||
|
||||
return job
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=404, detail=f"Failed to get job specification: {str(e)}")
|
||||
|
||||
@router.post("/{job_id}/restart")
|
||||
async def restart_job(job_id: str):
|
||||
"""Restart a job by stopping it and starting it again."""
|
||||
try:
|
||||
# Get the current job specification
|
||||
job_spec = nomad_service.get_job(job_id)
|
||||
|
||||
# Stop the job
|
||||
nomad_service.stop_job(job_id)
|
||||
|
||||
# Start the job with the original specification
|
||||
result = nomad_service.start_job(job_spec)
|
||||
|
||||
return {
|
||||
"job_id": job_id,
|
||||
"status": "restarted",
|
||||
"eval_id": result.get("eval_id"),
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to restart job: {str(e)}")
|
||||
|
||||
@router.get("/by-repository/{repository}")
|
||||
async def get_job_by_repository(repository: str):
|
||||
"""Get job information by repository URL or name."""
|
||||
job_info = config_service.get_job_from_repository(repository)
|
||||
if not job_info:
|
||||
raise HTTPException(status_code=404, detail=f"No job found for repository: {repository}")
|
||||
|
||||
job_id = job_info.get("job_id")
|
||||
namespace = job_info.get("namespace")
|
||||
|
||||
# Get the job using the specific namespace if provided
|
||||
try:
|
||||
if namespace:
|
||||
# Override the default namespace with the specific one
|
||||
custom_nomad = NomadService()
|
||||
custom_nomad.namespace = namespace
|
||||
job = custom_nomad.get_job(job_id)
|
||||
else:
|
||||
# Use the default namespace settings
|
||||
job = nomad_service.get_job(job_id)
|
||||
|
||||
# Add repository information
|
||||
job["repository"] = repository
|
||||
return job
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=404, detail=f"Job not found: {job_id}, Error: {str(e)}")
|
||||
|
||||
@router.post("/by-repository/{repository}/start")
|
||||
async def start_job_by_repository(repository: str):
|
||||
"""Start a job by its associated repository."""
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
job_info = config_service.get_job_from_repository(repository)
|
||||
if not job_info:
|
||||
raise HTTPException(status_code=404, detail=f"No job found for repository: {repository}")
|
||||
|
||||
job_id = job_info.get("job_id")
|
||||
namespace = job_info.get("namespace")
|
||||
|
||||
logger.info(f"Starting job for repository {repository}, job_id: {job_id}, namespace: {namespace}")
|
||||
|
||||
# Create a custom service with the specific namespace if provided
|
||||
custom_nomad = NomadService()
|
||||
if namespace:
|
||||
logger.info(f"Setting custom_nomad.namespace to {namespace}")
|
||||
custom_nomad.namespace = namespace
|
||||
|
||||
# Log the current namespace being used
|
||||
logger.info(f"Nomad client namespace: {custom_nomad.namespace}")
|
||||
|
||||
try:
|
||||
# Get the job specification from an existing job
|
||||
job_spec = custom_nomad.get_job(job_id)
|
||||
|
||||
# Log the job specification
|
||||
logger.info(f"Retrieved job specification for {job_id} from existing job")
|
||||
|
||||
# Ensure namespace is set in job spec
|
||||
if isinstance(job_spec, dict):
|
||||
# Ensure namespace is explicitly set
|
||||
if namespace:
|
||||
logger.info(f"Setting namespace in job spec to {namespace}")
|
||||
job_spec["Namespace"] = namespace
|
||||
|
||||
# Log the keys in the job specification
|
||||
logger.info(f"Job spec keys: {job_spec.keys()}")
|
||||
|
||||
# Start the job with the retrieved specification
|
||||
result = custom_nomad.start_job(job_spec)
|
||||
|
||||
return {
|
||||
"job_id": job_id,
|
||||
"repository": repository,
|
||||
"status": "started",
|
||||
"eval_id": result.get("eval_id"),
|
||||
"namespace": namespace
|
||||
}
|
||||
except HTTPException as e:
|
||||
# If job not found, try to get spec from config
|
||||
if e.status_code == 404:
|
||||
logger.info(f"Job {job_id} not found, attempting to get specification from config")
|
||||
|
||||
# Try to get job spec from repository config
|
||||
job_spec = config_service.get_job_spec_from_repository(repository)
|
||||
|
||||
if not job_spec:
|
||||
logger.warning(f"No job specification found for repository {repository}, creating a default one")
|
||||
|
||||
# Create a simple default job spec if none exists
|
||||
job_spec = {
|
||||
"ID": job_id,
|
||||
"Name": job_id,
|
||||
"Type": "service",
|
||||
"Datacenters": ["jm"], # Default datacenter
|
||||
"TaskGroups": [
|
||||
{
|
||||
"Name": "app",
|
||||
"Count": 1,
|
||||
"Tasks": [
|
||||
{
|
||||
"Name": job_id.split('-')[0], # Use first part of job ID as task name
|
||||
"Driver": "docker",
|
||||
"Config": {
|
||||
"image": f"registry.dev.meisheng.group/{repository}:latest",
|
||||
"force_pull": True,
|
||||
"ports": ["http"]
|
||||
},
|
||||
"Resources": {
|
||||
"CPU": 500,
|
||||
"MemoryMB": 512
|
||||
}
|
||||
}
|
||||
],
|
||||
"Networks": [
|
||||
{
|
||||
"DynamicPorts": [
|
||||
{
|
||||
"Label": "http",
|
||||
"Value": 0,
|
||||
"To": 8000
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"Meta": {
|
||||
"repository": repository
|
||||
}
|
||||
}
|
||||
|
||||
# Set the namespace explicitly in the job spec
|
||||
if namespace:
|
||||
logger.info(f"Setting namespace in default job spec to {namespace}")
|
||||
job_spec["Namespace"] = namespace
|
||||
|
||||
logger.info(f"Starting job {job_id} with specification")
|
||||
|
||||
# Log the job specification structure
|
||||
if isinstance(job_spec, dict):
|
||||
logger.info(f"Job spec keys: {job_spec.keys()}")
|
||||
if "Namespace" in job_spec:
|
||||
logger.info(f"Job spec namespace: {job_spec['Namespace']}")
|
||||
|
||||
# Start the job with the specification
|
||||
result = custom_nomad.start_job(job_spec)
|
||||
|
||||
return {
|
||||
"job_id": job_id,
|
||||
"repository": repository,
|
||||
"status": "started",
|
||||
"eval_id": result.get("eval_id"),
|
||||
"namespace": namespace
|
||||
}
|
||||
|
||||
@router.post("/by-repository/{repository}/stop")
|
||||
async def stop_job_by_repository(repository: str, purge: bool = Query(False)):
|
||||
"""Stop a job by its associated repository."""
|
||||
job_info = config_service.get_job_from_repository(repository)
|
||||
if not job_info:
|
||||
raise HTTPException(status_code=404, detail=f"No job found for repository: {repository}")
|
||||
|
||||
job_id = job_info.get("job_id")
|
||||
namespace = job_info.get("namespace")
|
||||
|
||||
# Create a custom service with the specific namespace if provided
|
||||
custom_nomad = NomadService()
|
||||
if namespace:
|
||||
custom_nomad.namespace = namespace
|
||||
|
||||
# Stop the job
|
||||
result = custom_nomad.stop_job(job_id, purge)
|
||||
|
||||
return {
|
||||
"job_id": job_id,
|
||||
"repository": repository,
|
||||
"status": "stopped",
|
||||
"eval_id": result.get("eval_id"),
|
||||
"namespace": namespace
|
||||
}
|
||||
|
||||
@router.post("/by-repository/{repository}/restart")
|
||||
async def restart_job_by_repository(repository: str):
|
||||
"""Restart a job by its associated repository."""
|
||||
job_info = config_service.get_job_from_repository(repository)
|
||||
if not job_info:
|
||||
raise HTTPException(status_code=404, detail=f"No job found for repository: {repository}")
|
||||
|
||||
job_id = job_info.get("job_id")
|
||||
namespace = job_info.get("namespace")
|
||||
|
||||
# Create a custom service with the specific namespace if provided
|
||||
custom_nomad = NomadService()
|
||||
if namespace:
|
||||
custom_nomad.namespace = namespace
|
||||
|
||||
# Get the job specification
|
||||
job_spec = custom_nomad.get_job(job_id)
|
||||
|
||||
# Stop the job first
|
||||
custom_nomad.stop_job(job_id)
|
||||
|
||||
# Start the job with the original specification
|
||||
result = custom_nomad.start_job(job_spec)
|
||||
|
||||
return {
|
||||
"job_id": job_id,
|
||||
"repository": repository,
|
||||
"status": "restarted",
|
||||
"eval_id": result.get("eval_id"),
|
||||
"namespace": namespace
|
||||
}
|
Reference in New Issue
Block a user