Update README.md

2025-02-26 15:25:39 +07:00
parent d6acf632e3
commit baf1723a50
69 changed files with 5525 additions and 0 deletions
--- a/app/routers/jobs.py
+++ b/app/routers/jobs.py
@ -0,0 +1,396 @@
+from fastapi import APIRouter, Depends, HTTPException, Body, Query
+from typing import Dict, Any, List, Optional
+import json
+import logging
+
+from app.services.nomad_client import NomadService
+from app.services.config_service import ConfigService
+from app.schemas.job import JobResponse, JobOperation, JobSpecification
+
+router = APIRouter()
+nomad_service = NomadService()
+config_service = ConfigService()
+
+# Configure logging
+logger = logging.getLogger(__name__)
+
+@router.get("/", response_model=List[JobResponse])
+async def list_jobs():
+    """List all jobs."""
+    jobs = nomad_service.list_jobs()
+    # Enhance job responses with repository information if available
+    for job in jobs:
+        job_id = job.get("ID")
+        if job_id:
+            repository = config_service.get_repository_from_job(job_id)
+            if repository:
+                job["repository"] = repository
+    return jobs
+
+@router.get("/{job_id}", response_model=JobResponse)
+async def get_job(job_id: str):
+    """Get a job by ID."""
+    job = nomad_service.get_job(job_id)
+    # Add repository information if available
+    repository = config_service.get_repository_from_job(job_id)
+    if repository:
+        job["repository"] = repository
+    return job
+
+@router.post("/", response_model=JobOperation)
+async def start_job(job_spec: JobSpecification = Body(...)):
+    """Start a Nomad job with the provided specification."""
+    return nomad_service.start_job(job_spec.dict())
+
+@router.delete("/{job_id}", response_model=JobOperation)
+async def stop_job(job_id: str, purge: bool = Query(False)):
+    """Stop a job by ID."""
+    return nomad_service.stop_job(job_id, purge)
+
+@router.get("/{job_id}/allocations")
+async def get_job_allocations(job_id: str):
+    """Get all allocations for a job."""
+    return nomad_service.get_allocations(job_id)
+
+@router.get("/{job_id}/latest-allocation")
+async def get_latest_allocation(job_id: str):
+    """Get the latest allocation for a job."""
+    allocations = nomad_service.get_allocations(job_id)
+    if not allocations:
+        raise HTTPException(status_code=404, detail=f"No allocations found for job {job_id}")
+    
+    # Sort allocations by creation time (descending)
+    sorted_allocations = sorted(
+        allocations, 
+        key=lambda a: a.get("CreateTime", 0), 
+        reverse=True
+    )
+    
+    return sorted_allocations[0]
+
+@router.get("/{job_id}/status")
+async def get_job_status(job_id: str, namespace: str = Query(None, description="Nomad namespace")):
+    """Get the current status of a job, including deployment and latest allocation."""
+    try:
+        # Create a custom service with the specific namespace if provided
+        custom_nomad = NomadService()
+        if namespace:
+            custom_nomad.namespace = namespace
+            logger.info(f"Getting job status for {job_id} in namespace {namespace}")
+        else:
+            logger.info(f"Getting job status for {job_id} in default namespace (development)")
+        
+        job = custom_nomad.get_job(job_id)
+        status = {
+            "job_id": job_id,
+            "namespace": namespace or custom_nomad.namespace,
+            "status": job.get("Status", "unknown"),
+            "stable": job.get("Stable", False),
+            "submitted_at": job.get("SubmitTime", 0),
+        }
+        
+        # Get the latest deployment if any
+        try:
+            deployment = custom_nomad.get_deployment_status(job_id)
+            if deployment:
+                status["deployment"] = {
+                    "id": deployment.get("ID"),
+                    "status": deployment.get("Status"),
+                    "description": deployment.get("StatusDescription"),
+                }
+        except Exception as e:
+            logger.warning(f"Failed to get deployment for job {job_id}: {str(e)}")
+            pass  # Deployment info is optional
+            
+        # Get the latest allocation if any
+        try:
+            allocations = custom_nomad.get_allocations(job_id)
+            if allocations:
+                sorted_allocations = sorted(
+                    allocations, 
+                    key=lambda a: a.get("CreateTime", 0), 
+                    reverse=True
+                )
+                latest_alloc = sorted_allocations[0]
+                status["latest_allocation"] = {
+                    "id": latest_alloc.get("ID"),
+                    "status": latest_alloc.get("ClientStatus"),
+                    "description": latest_alloc.get("ClientDescription", ""),
+                    "created_at": latest_alloc.get("CreateTime", 0),
+                }
+        except Exception as e:
+            logger.warning(f"Failed to get allocations for job {job_id}: {str(e)}")
+            pass  # Allocation info is optional
+            
+        return status
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Failed to get job status: {str(e)}")
+
+@router.get("/{job_id}/specification")
+async def get_job_specification(job_id: str, namespace: str = Query(None, description="Nomad namespace"), raw: bool = Query(False)):
+    """Get the job specification for a job."""
+    try:
+        # Create a custom service with the specific namespace if provided
+        custom_nomad = NomadService()
+        if namespace:
+            custom_nomad.namespace = namespace
+            logger.info(f"Getting job specification for {job_id} in namespace {namespace}")
+        else:
+            logger.info(f"Getting job specification for {job_id} in default namespace (development)")
+        
+        job = custom_nomad.get_job(job_id)
+        
+        if raw:
+            return job
+        
+        # Extract just the job specification part if present
+        if "JobID" in job:
+            job_spec = {
+                "id": job.get("ID"),
+                "name": job.get("Name"),
+                "type": job.get("Type"),
+                "status": job.get("Status"),
+                "datacenters": job.get("Datacenters", []),
+                "namespace": job.get("Namespace"),
+                "task_groups": job.get("TaskGroups", []),
+                "meta": job.get("Meta", {}),
+            }
+            return job_spec
+        
+        return job
+    except Exception as e:
+        raise HTTPException(status_code=404, detail=f"Failed to get job specification: {str(e)}")
+
+@router.post("/{job_id}/restart")
+async def restart_job(job_id: str):
+    """Restart a job by stopping it and starting it again."""
+    try:
+        # Get the current job specification
+        job_spec = nomad_service.get_job(job_id)
+        
+        # Stop the job
+        nomad_service.stop_job(job_id)
+        
+        # Start the job with the original specification
+        result = nomad_service.start_job(job_spec)
+        
+        return {
+            "job_id": job_id,
+            "status": "restarted",
+            "eval_id": result.get("eval_id"),
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Failed to restart job: {str(e)}")
+
+@router.get("/by-repository/{repository}")
+async def get_job_by_repository(repository: str):
+    """Get job information by repository URL or name."""
+    job_info = config_service.get_job_from_repository(repository)
+    if not job_info:
+        raise HTTPException(status_code=404, detail=f"No job found for repository: {repository}")
+    
+    job_id = job_info.get("job_id")
+    namespace = job_info.get("namespace")
+    
+    # Get the job using the specific namespace if provided
+    try:
+        if namespace:
+            # Override the default namespace with the specific one
+            custom_nomad = NomadService()
+            custom_nomad.namespace = namespace
+            job = custom_nomad.get_job(job_id)
+        else:
+            # Use the default namespace settings
+            job = nomad_service.get_job(job_id)
+            
+        # Add repository information
+        job["repository"] = repository
+        return job
+    except Exception as e:
+        raise HTTPException(status_code=404, detail=f"Job not found: {job_id}, Error: {str(e)}")
+
+@router.post("/by-repository/{repository}/start")
+async def start_job_by_repository(repository: str):
+    """Start a job by its associated repository."""
+    logger = logging.getLogger(__name__)
+    
+    job_info = config_service.get_job_from_repository(repository)
+    if not job_info:
+        raise HTTPException(status_code=404, detail=f"No job found for repository: {repository}")
+    
+    job_id = job_info.get("job_id")
+    namespace = job_info.get("namespace")
+    
+    logger.info(f"Starting job for repository {repository}, job_id: {job_id}, namespace: {namespace}")
+    
+    # Create a custom service with the specific namespace if provided
+    custom_nomad = NomadService()
+    if namespace:
+        logger.info(f"Setting custom_nomad.namespace to {namespace}")
+        custom_nomad.namespace = namespace
+    
+    # Log the current namespace being used
+    logger.info(f"Nomad client namespace: {custom_nomad.namespace}")
+    
+    try:
+        # Get the job specification from an existing job
+        job_spec = custom_nomad.get_job(job_id)
+        
+        # Log the job specification
+        logger.info(f"Retrieved job specification for {job_id} from existing job")
+        
+        # Ensure namespace is set in job spec
+        if isinstance(job_spec, dict):
+            # Ensure namespace is explicitly set
+            if namespace:
+                logger.info(f"Setting namespace in job spec to {namespace}")
+                job_spec["Namespace"] = namespace
+            
+            # Log the keys in the job specification
+            logger.info(f"Job spec keys: {job_spec.keys()}")
+        
+        # Start the job with the retrieved specification
+        result = custom_nomad.start_job(job_spec)
+        
+        return {
+            "job_id": job_id,
+            "repository": repository,
+            "status": "started",
+            "eval_id": result.get("eval_id"),
+            "namespace": namespace
+        }
+    except HTTPException as e:
+        # If job not found, try to get spec from config
+        if e.status_code == 404:
+            logger.info(f"Job {job_id} not found, attempting to get specification from config")
+            
+            # Try to get job spec from repository config
+            job_spec = config_service.get_job_spec_from_repository(repository)
+            
+            if not job_spec:
+                logger.warning(f"No job specification found for repository {repository}, creating a default one")
+                
+                # Create a simple default job spec if none exists
+                job_spec = {
+                    "ID": job_id,
+                    "Name": job_id,
+                    "Type": "service",
+                    "Datacenters": ["jm"],  # Default datacenter
+                    "TaskGroups": [
+                        {
+                            "Name": "app",
+                            "Count": 1,
+                            "Tasks": [
+                                {
+                                    "Name": job_id.split('-')[0],  # Use first part of job ID as task name
+                                    "Driver": "docker",
+                                    "Config": {
+                                        "image": f"registry.dev.meisheng.group/{repository}:latest",
+                                        "force_pull": True,
+                                        "ports": ["http"]
+                                    },
+                                    "Resources": {
+                                        "CPU": 500,
+                                        "MemoryMB": 512
+                                    }
+                                }
+                            ],
+                            "Networks": [
+                                {
+                                    "DynamicPorts": [
+                                        {
+                                            "Label": "http",
+                                            "Value": 0,
+                                            "To": 8000
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ],
+                    "Meta": {
+                        "repository": repository
+                    }
+                }
+                
+                # Set the namespace explicitly in the job spec
+                if namespace:
+                    logger.info(f"Setting namespace in default job spec to {namespace}")
+                    job_spec["Namespace"] = namespace
+            
+            logger.info(f"Starting job {job_id} with specification")
+            
+            # Log the job specification structure
+            if isinstance(job_spec, dict):
+                logger.info(f"Job spec keys: {job_spec.keys()}")
+                if "Namespace" in job_spec:
+                    logger.info(f"Job spec namespace: {job_spec['Namespace']}")
+            
+            # Start the job with the specification
+            result = custom_nomad.start_job(job_spec)
+            
+            return {
+                "job_id": job_id,
+                "repository": repository,
+                "status": "started",
+                "eval_id": result.get("eval_id"),
+                "namespace": namespace
+            }
+
+@router.post("/by-repository/{repository}/stop")
+async def stop_job_by_repository(repository: str, purge: bool = Query(False)):
+    """Stop a job by its associated repository."""
+    job_info = config_service.get_job_from_repository(repository)
+    if not job_info:
+        raise HTTPException(status_code=404, detail=f"No job found for repository: {repository}")
+    
+    job_id = job_info.get("job_id")
+    namespace = job_info.get("namespace")
+    
+    # Create a custom service with the specific namespace if provided
+    custom_nomad = NomadService()
+    if namespace:
+        custom_nomad.namespace = namespace
+    
+    # Stop the job
+    result = custom_nomad.stop_job(job_id, purge)
+    
+    return {
+        "job_id": job_id,
+        "repository": repository,
+        "status": "stopped",
+        "eval_id": result.get("eval_id"),
+        "namespace": namespace
+    }
+
+@router.post("/by-repository/{repository}/restart")
+async def restart_job_by_repository(repository: str):
+    """Restart a job by its associated repository."""
+    job_info = config_service.get_job_from_repository(repository)
+    if not job_info:
+        raise HTTPException(status_code=404, detail=f"No job found for repository: {repository}")
+    
+    job_id = job_info.get("job_id")
+    namespace = job_info.get("namespace")
+    
+    # Create a custom service with the specific namespace if provided
+    custom_nomad = NomadService()
+    if namespace:
+        custom_nomad.namespace = namespace
+    
+    # Get the job specification
+    job_spec = custom_nomad.get_job(job_id)
+    
+    # Stop the job first
+    custom_nomad.stop_job(job_id)
+    
+    # Start the job with the original specification
+    result = custom_nomad.start_job(job_spec)
+    
+    return {
+        "job_id": job_id,
+        "repository": repository,
+        "status": "restarted",
+        "eval_id": result.get("eval_id"),
+        "namespace": namespace
+    }