Update README.md
This commit is contained in:
1
app/routers/__init__.py
Normal file
1
app/routers/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
# Import routers
|
BIN
app/routers/__pycache__/__init__.cpython-313.pyc
Normal file
BIN
app/routers/__pycache__/__init__.cpython-313.pyc
Normal file
Binary file not shown.
BIN
app/routers/__pycache__/claude.cpython-313.pyc
Normal file
BIN
app/routers/__pycache__/claude.cpython-313.pyc
Normal file
Binary file not shown.
BIN
app/routers/__pycache__/configs.cpython-313.pyc
Normal file
BIN
app/routers/__pycache__/configs.cpython-313.pyc
Normal file
Binary file not shown.
BIN
app/routers/__pycache__/jobs.cpython-313.pyc
Normal file
BIN
app/routers/__pycache__/jobs.cpython-313.pyc
Normal file
Binary file not shown.
BIN
app/routers/__pycache__/logs.cpython-313.pyc
Normal file
BIN
app/routers/__pycache__/logs.cpython-313.pyc
Normal file
Binary file not shown.
BIN
app/routers/__pycache__/repositories.cpython-313.pyc
Normal file
BIN
app/routers/__pycache__/repositories.cpython-313.pyc
Normal file
Binary file not shown.
230
app/routers/claude.py
Normal file
230
app/routers/claude.py
Normal file
@ -0,0 +1,230 @@
|
||||
from fastapi import APIRouter, HTTPException, Body, Query, Depends
|
||||
from typing import Dict, Any, List, Optional
|
||||
import logging
|
||||
import json
|
||||
|
||||
from app.services.nomad_client import NomadService
|
||||
from app.schemas.claude_api import ClaudeJobRequest, ClaudeJobSpecification, ClaudeJobResponse
|
||||
|
||||
router = APIRouter()
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@router.post("/jobs", response_model=ClaudeJobResponse)
|
||||
async def manage_job(request: ClaudeJobRequest):
|
||||
"""
|
||||
Endpoint for Claude to manage Nomad jobs with a simplified interface.
|
||||
|
||||
This endpoint handles job operations like start, stop, restart, and status checks.
|
||||
"""
|
||||
try:
|
||||
# Create a Nomad service instance with the specified namespace
|
||||
nomad_service = NomadService()
|
||||
if request.namespace:
|
||||
nomad_service.namespace = request.namespace
|
||||
|
||||
# Handle different actions
|
||||
if request.action.lower() == "status":
|
||||
# Get job status
|
||||
job = nomad_service.get_job(request.job_id)
|
||||
|
||||
# Get allocations for more detailed status
|
||||
allocations = nomad_service.get_allocations(request.job_id)
|
||||
latest_alloc = None
|
||||
if allocations:
|
||||
# Sort allocations by creation time (descending)
|
||||
sorted_allocations = sorted(
|
||||
allocations,
|
||||
key=lambda a: a.get("CreateTime", 0),
|
||||
reverse=True
|
||||
)
|
||||
latest_alloc = sorted_allocations[0]
|
||||
|
||||
return ClaudeJobResponse(
|
||||
success=True,
|
||||
job_id=request.job_id,
|
||||
status=job.get("Status", "unknown"),
|
||||
message=f"Job {request.job_id} is {job.get('Status', 'unknown')}",
|
||||
details={
|
||||
"job": job,
|
||||
"latest_allocation": latest_alloc
|
||||
}
|
||||
)
|
||||
|
||||
elif request.action.lower() == "stop":
|
||||
# Stop the job
|
||||
result = nomad_service.stop_job(request.job_id, purge=request.purge)
|
||||
|
||||
return ClaudeJobResponse(
|
||||
success=True,
|
||||
job_id=request.job_id,
|
||||
status="stopped",
|
||||
message=f"Job {request.job_id} has been stopped" + (" and purged" if request.purge else ""),
|
||||
details=result
|
||||
)
|
||||
|
||||
elif request.action.lower() == "restart":
|
||||
# Get the current job specification
|
||||
job_spec = nomad_service.get_job(request.job_id)
|
||||
|
||||
# Stop the job
|
||||
nomad_service.stop_job(request.job_id)
|
||||
|
||||
# Start the job with the original specification
|
||||
result = nomad_service.start_job(job_spec)
|
||||
|
||||
return ClaudeJobResponse(
|
||||
success=True,
|
||||
job_id=request.job_id,
|
||||
status="restarted",
|
||||
message=f"Job {request.job_id} has been restarted",
|
||||
details=result
|
||||
)
|
||||
|
||||
else:
|
||||
# Unknown action
|
||||
raise HTTPException(status_code=400, detail=f"Unknown action: {request.action}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error managing job {request.job_id}: {str(e)}")
|
||||
return ClaudeJobResponse(
|
||||
success=False,
|
||||
job_id=request.job_id,
|
||||
status="error",
|
||||
message=f"Error: {str(e)}",
|
||||
details=None
|
||||
)
|
||||
|
||||
@router.post("/create-job", response_model=ClaudeJobResponse)
|
||||
async def create_job(job_spec: ClaudeJobSpecification):
|
||||
"""
|
||||
Endpoint for Claude to create a new Nomad job with a simplified interface.
|
||||
|
||||
This endpoint allows creating a job with minimal configuration.
|
||||
"""
|
||||
try:
|
||||
# Create a Nomad service instance with the specified namespace
|
||||
nomad_service = NomadService()
|
||||
if job_spec.namespace:
|
||||
nomad_service.namespace = job_spec.namespace
|
||||
|
||||
# Convert the simplified job spec to Nomad format
|
||||
nomad_job_spec = job_spec.to_nomad_job_spec()
|
||||
|
||||
# Start the job
|
||||
result = nomad_service.start_job(nomad_job_spec)
|
||||
|
||||
return ClaudeJobResponse(
|
||||
success=True,
|
||||
job_id=job_spec.job_id,
|
||||
status="started",
|
||||
message=f"Job {job_spec.job_id} has been created and started",
|
||||
details=result
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating job {job_spec.job_id}: {str(e)}")
|
||||
return ClaudeJobResponse(
|
||||
success=False,
|
||||
job_id=job_spec.job_id,
|
||||
status="error",
|
||||
message=f"Error: {str(e)}",
|
||||
details=None
|
||||
)
|
||||
|
||||
@router.get("/list-jobs", response_model=List[Dict[str, Any]])
|
||||
async def list_jobs(namespace: str = Query("development")):
|
||||
"""
|
||||
List all jobs in the specified namespace.
|
||||
|
||||
Returns a simplified list of jobs with their IDs and statuses.
|
||||
"""
|
||||
try:
|
||||
# Create a Nomad service instance with the specified namespace
|
||||
nomad_service = NomadService()
|
||||
nomad_service.namespace = namespace
|
||||
|
||||
# Get all jobs
|
||||
jobs = nomad_service.list_jobs()
|
||||
|
||||
# Return a simplified list
|
||||
simplified_jobs = []
|
||||
for job in jobs:
|
||||
simplified_jobs.append({
|
||||
"id": job.get("ID"),
|
||||
"name": job.get("Name"),
|
||||
"status": job.get("Status"),
|
||||
"type": job.get("Type"),
|
||||
"namespace": namespace
|
||||
})
|
||||
|
||||
return simplified_jobs
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error listing jobs: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Error listing jobs: {str(e)}")
|
||||
|
||||
@router.get("/job-logs/{job_id}", response_model=Dict[str, Any])
|
||||
async def get_job_logs(job_id: str, namespace: str = Query("development")):
|
||||
"""
|
||||
Get logs for a job.
|
||||
|
||||
Returns logs from the latest allocation of the job.
|
||||
"""
|
||||
try:
|
||||
# Create a Nomad service instance with the specified namespace
|
||||
nomad_service = NomadService()
|
||||
nomad_service.namespace = namespace
|
||||
|
||||
# Get allocations for the job
|
||||
allocations = nomad_service.get_allocations(job_id)
|
||||
if not allocations:
|
||||
return {
|
||||
"success": False,
|
||||
"job_id": job_id,
|
||||
"message": f"No allocations found for job {job_id}",
|
||||
"logs": None
|
||||
}
|
||||
|
||||
# Sort allocations by creation time (descending)
|
||||
sorted_allocations = sorted(
|
||||
allocations,
|
||||
key=lambda a: a.get("CreateTime", 0),
|
||||
reverse=True
|
||||
)
|
||||
latest_alloc = sorted_allocations[0]
|
||||
alloc_id = latest_alloc.get("ID")
|
||||
|
||||
# Get the task name from the allocation
|
||||
task_name = None
|
||||
if "TaskStates" in latest_alloc:
|
||||
task_states = latest_alloc["TaskStates"]
|
||||
if task_states:
|
||||
task_name = next(iter(task_states.keys()))
|
||||
|
||||
if not task_name:
|
||||
task_name = "app" # Default task name
|
||||
|
||||
# Get logs for the allocation
|
||||
stdout_logs = nomad_service.get_allocation_logs(alloc_id, task_name, "stdout")
|
||||
stderr_logs = nomad_service.get_allocation_logs(alloc_id, task_name, "stderr")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"job_id": job_id,
|
||||
"allocation_id": alloc_id,
|
||||
"task_name": task_name,
|
||||
"message": f"Retrieved logs for job {job_id}",
|
||||
"logs": {
|
||||
"stdout": stdout_logs,
|
||||
"stderr": stderr_logs
|
||||
}
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting logs for job {job_id}: {str(e)}")
|
||||
return {
|
||||
"success": False,
|
||||
"job_id": job_id,
|
||||
"message": f"Error getting logs: {str(e)}",
|
||||
"logs": None
|
||||
}
|
80
app/routers/configs.py
Normal file
80
app/routers/configs.py
Normal file
@ -0,0 +1,80 @@
|
||||
from fastapi import APIRouter, HTTPException, Body, Path
|
||||
from typing import List, Dict, Any
|
||||
import json
|
||||
|
||||
from app.services.config_service import ConfigService
|
||||
from app.schemas.config import ConfigCreate, ConfigUpdate, ConfigResponse
|
||||
|
||||
router = APIRouter()
|
||||
config_service = ConfigService()
|
||||
|
||||
@router.get("/", response_model=List[ConfigResponse])
|
||||
async def list_configs():
|
||||
"""List all available configurations."""
|
||||
return config_service.list_configs()
|
||||
|
||||
@router.get("/{name}", response_model=ConfigResponse)
|
||||
async def get_config(name: str = Path(..., description="Configuration name")):
|
||||
"""Get a specific configuration by name."""
|
||||
return config_service.get_config(name)
|
||||
|
||||
@router.post("/", response_model=ConfigResponse, status_code=201)
|
||||
async def create_config(config_data: ConfigCreate):
|
||||
"""Create a new configuration."""
|
||||
return config_service.create_config(config_data.name, config_data.dict(exclude={"name"}))
|
||||
|
||||
@router.put("/{name}", response_model=ConfigResponse)
|
||||
async def update_config(name: str, config_data: ConfigUpdate):
|
||||
"""Update an existing configuration."""
|
||||
return config_service.update_config(name, config_data.dict(exclude_unset=True))
|
||||
|
||||
@router.delete("/{name}", response_model=Dict[str, Any])
|
||||
async def delete_config(name: str = Path(..., description="Configuration name")):
|
||||
"""Delete a configuration."""
|
||||
return config_service.delete_config(name)
|
||||
|
||||
@router.get("/repository/{repository}")
|
||||
async def get_config_by_repository(repository: str):
|
||||
"""Find configuration by repository."""
|
||||
configs = config_service.list_configs()
|
||||
|
||||
for config in configs:
|
||||
if config.get("repository") == repository:
|
||||
return config
|
||||
|
||||
raise HTTPException(status_code=404, detail=f"No configuration found for repository: {repository}")
|
||||
|
||||
@router.get("/job/{job_id}")
|
||||
async def get_config_by_job(job_id: str):
|
||||
"""Find configuration by job ID."""
|
||||
configs = config_service.list_configs()
|
||||
|
||||
for config in configs:
|
||||
if config.get("job_id") == job_id:
|
||||
return config
|
||||
|
||||
raise HTTPException(status_code=404, detail=f"No configuration found for job_id: {job_id}")
|
||||
|
||||
@router.post("/link")
|
||||
async def link_repository_to_job(
|
||||
repository: str = Body(..., embed=True),
|
||||
job_id: str = Body(..., embed=True),
|
||||
name: str = Body(None, embed=True)
|
||||
):
|
||||
"""Link a repository to a job."""
|
||||
# Generate a name if not provided
|
||||
if not name:
|
||||
name = f"{job_id.lower().replace('/', '_').replace(' ', '_')}"
|
||||
|
||||
# Create the config
|
||||
config = {
|
||||
"repository": repository,
|
||||
"job_id": job_id,
|
||||
}
|
||||
|
||||
return config_service.create_config(name, config)
|
||||
|
||||
@router.post("/unlink/{name}")
|
||||
async def unlink_repository_from_job(name: str):
|
||||
"""Unlink a repository from a job by deleting the configuration."""
|
||||
return config_service.delete_config(name)
|
396
app/routers/jobs.py
Normal file
396
app/routers/jobs.py
Normal file
@ -0,0 +1,396 @@
|
||||
from fastapi import APIRouter, Depends, HTTPException, Body, Query
|
||||
from typing import Dict, Any, List, Optional
|
||||
import json
|
||||
import logging
|
||||
|
||||
from app.services.nomad_client import NomadService
|
||||
from app.services.config_service import ConfigService
|
||||
from app.schemas.job import JobResponse, JobOperation, JobSpecification
|
||||
|
||||
router = APIRouter()
|
||||
nomad_service = NomadService()
|
||||
config_service = ConfigService()
|
||||
|
||||
# Configure logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@router.get("/", response_model=List[JobResponse])
|
||||
async def list_jobs():
|
||||
"""List all jobs."""
|
||||
jobs = nomad_service.list_jobs()
|
||||
# Enhance job responses with repository information if available
|
||||
for job in jobs:
|
||||
job_id = job.get("ID")
|
||||
if job_id:
|
||||
repository = config_service.get_repository_from_job(job_id)
|
||||
if repository:
|
||||
job["repository"] = repository
|
||||
return jobs
|
||||
|
||||
@router.get("/{job_id}", response_model=JobResponse)
|
||||
async def get_job(job_id: str):
|
||||
"""Get a job by ID."""
|
||||
job = nomad_service.get_job(job_id)
|
||||
# Add repository information if available
|
||||
repository = config_service.get_repository_from_job(job_id)
|
||||
if repository:
|
||||
job["repository"] = repository
|
||||
return job
|
||||
|
||||
@router.post("/", response_model=JobOperation)
|
||||
async def start_job(job_spec: JobSpecification = Body(...)):
|
||||
"""Start a Nomad job with the provided specification."""
|
||||
return nomad_service.start_job(job_spec.dict())
|
||||
|
||||
@router.delete("/{job_id}", response_model=JobOperation)
|
||||
async def stop_job(job_id: str, purge: bool = Query(False)):
|
||||
"""Stop a job by ID."""
|
||||
return nomad_service.stop_job(job_id, purge)
|
||||
|
||||
@router.get("/{job_id}/allocations")
|
||||
async def get_job_allocations(job_id: str):
|
||||
"""Get all allocations for a job."""
|
||||
return nomad_service.get_allocations(job_id)
|
||||
|
||||
@router.get("/{job_id}/latest-allocation")
|
||||
async def get_latest_allocation(job_id: str):
|
||||
"""Get the latest allocation for a job."""
|
||||
allocations = nomad_service.get_allocations(job_id)
|
||||
if not allocations:
|
||||
raise HTTPException(status_code=404, detail=f"No allocations found for job {job_id}")
|
||||
|
||||
# Sort allocations by creation time (descending)
|
||||
sorted_allocations = sorted(
|
||||
allocations,
|
||||
key=lambda a: a.get("CreateTime", 0),
|
||||
reverse=True
|
||||
)
|
||||
|
||||
return sorted_allocations[0]
|
||||
|
||||
@router.get("/{job_id}/status")
|
||||
async def get_job_status(job_id: str, namespace: str = Query(None, description="Nomad namespace")):
|
||||
"""Get the current status of a job, including deployment and latest allocation."""
|
||||
try:
|
||||
# Create a custom service with the specific namespace if provided
|
||||
custom_nomad = NomadService()
|
||||
if namespace:
|
||||
custom_nomad.namespace = namespace
|
||||
logger.info(f"Getting job status for {job_id} in namespace {namespace}")
|
||||
else:
|
||||
logger.info(f"Getting job status for {job_id} in default namespace (development)")
|
||||
|
||||
job = custom_nomad.get_job(job_id)
|
||||
status = {
|
||||
"job_id": job_id,
|
||||
"namespace": namespace or custom_nomad.namespace,
|
||||
"status": job.get("Status", "unknown"),
|
||||
"stable": job.get("Stable", False),
|
||||
"submitted_at": job.get("SubmitTime", 0),
|
||||
}
|
||||
|
||||
# Get the latest deployment if any
|
||||
try:
|
||||
deployment = custom_nomad.get_deployment_status(job_id)
|
||||
if deployment:
|
||||
status["deployment"] = {
|
||||
"id": deployment.get("ID"),
|
||||
"status": deployment.get("Status"),
|
||||
"description": deployment.get("StatusDescription"),
|
||||
}
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get deployment for job {job_id}: {str(e)}")
|
||||
pass # Deployment info is optional
|
||||
|
||||
# Get the latest allocation if any
|
||||
try:
|
||||
allocations = custom_nomad.get_allocations(job_id)
|
||||
if allocations:
|
||||
sorted_allocations = sorted(
|
||||
allocations,
|
||||
key=lambda a: a.get("CreateTime", 0),
|
||||
reverse=True
|
||||
)
|
||||
latest_alloc = sorted_allocations[0]
|
||||
status["latest_allocation"] = {
|
||||
"id": latest_alloc.get("ID"),
|
||||
"status": latest_alloc.get("ClientStatus"),
|
||||
"description": latest_alloc.get("ClientDescription", ""),
|
||||
"created_at": latest_alloc.get("CreateTime", 0),
|
||||
}
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get allocations for job {job_id}: {str(e)}")
|
||||
pass # Allocation info is optional
|
||||
|
||||
return status
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get job status: {str(e)}")
|
||||
|
||||
@router.get("/{job_id}/specification")
|
||||
async def get_job_specification(job_id: str, namespace: str = Query(None, description="Nomad namespace"), raw: bool = Query(False)):
|
||||
"""Get the job specification for a job."""
|
||||
try:
|
||||
# Create a custom service with the specific namespace if provided
|
||||
custom_nomad = NomadService()
|
||||
if namespace:
|
||||
custom_nomad.namespace = namespace
|
||||
logger.info(f"Getting job specification for {job_id} in namespace {namespace}")
|
||||
else:
|
||||
logger.info(f"Getting job specification for {job_id} in default namespace (development)")
|
||||
|
||||
job = custom_nomad.get_job(job_id)
|
||||
|
||||
if raw:
|
||||
return job
|
||||
|
||||
# Extract just the job specification part if present
|
||||
if "JobID" in job:
|
||||
job_spec = {
|
||||
"id": job.get("ID"),
|
||||
"name": job.get("Name"),
|
||||
"type": job.get("Type"),
|
||||
"status": job.get("Status"),
|
||||
"datacenters": job.get("Datacenters", []),
|
||||
"namespace": job.get("Namespace"),
|
||||
"task_groups": job.get("TaskGroups", []),
|
||||
"meta": job.get("Meta", {}),
|
||||
}
|
||||
return job_spec
|
||||
|
||||
return job
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=404, detail=f"Failed to get job specification: {str(e)}")
|
||||
|
||||
@router.post("/{job_id}/restart")
|
||||
async def restart_job(job_id: str):
|
||||
"""Restart a job by stopping it and starting it again."""
|
||||
try:
|
||||
# Get the current job specification
|
||||
job_spec = nomad_service.get_job(job_id)
|
||||
|
||||
# Stop the job
|
||||
nomad_service.stop_job(job_id)
|
||||
|
||||
# Start the job with the original specification
|
||||
result = nomad_service.start_job(job_spec)
|
||||
|
||||
return {
|
||||
"job_id": job_id,
|
||||
"status": "restarted",
|
||||
"eval_id": result.get("eval_id"),
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to restart job: {str(e)}")
|
||||
|
||||
@router.get("/by-repository/{repository}")
|
||||
async def get_job_by_repository(repository: str):
|
||||
"""Get job information by repository URL or name."""
|
||||
job_info = config_service.get_job_from_repository(repository)
|
||||
if not job_info:
|
||||
raise HTTPException(status_code=404, detail=f"No job found for repository: {repository}")
|
||||
|
||||
job_id = job_info.get("job_id")
|
||||
namespace = job_info.get("namespace")
|
||||
|
||||
# Get the job using the specific namespace if provided
|
||||
try:
|
||||
if namespace:
|
||||
# Override the default namespace with the specific one
|
||||
custom_nomad = NomadService()
|
||||
custom_nomad.namespace = namespace
|
||||
job = custom_nomad.get_job(job_id)
|
||||
else:
|
||||
# Use the default namespace settings
|
||||
job = nomad_service.get_job(job_id)
|
||||
|
||||
# Add repository information
|
||||
job["repository"] = repository
|
||||
return job
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=404, detail=f"Job not found: {job_id}, Error: {str(e)}")
|
||||
|
||||
@router.post("/by-repository/{repository}/start")
|
||||
async def start_job_by_repository(repository: str):
|
||||
"""Start a job by its associated repository."""
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
job_info = config_service.get_job_from_repository(repository)
|
||||
if not job_info:
|
||||
raise HTTPException(status_code=404, detail=f"No job found for repository: {repository}")
|
||||
|
||||
job_id = job_info.get("job_id")
|
||||
namespace = job_info.get("namespace")
|
||||
|
||||
logger.info(f"Starting job for repository {repository}, job_id: {job_id}, namespace: {namespace}")
|
||||
|
||||
# Create a custom service with the specific namespace if provided
|
||||
custom_nomad = NomadService()
|
||||
if namespace:
|
||||
logger.info(f"Setting custom_nomad.namespace to {namespace}")
|
||||
custom_nomad.namespace = namespace
|
||||
|
||||
# Log the current namespace being used
|
||||
logger.info(f"Nomad client namespace: {custom_nomad.namespace}")
|
||||
|
||||
try:
|
||||
# Get the job specification from an existing job
|
||||
job_spec = custom_nomad.get_job(job_id)
|
||||
|
||||
# Log the job specification
|
||||
logger.info(f"Retrieved job specification for {job_id} from existing job")
|
||||
|
||||
# Ensure namespace is set in job spec
|
||||
if isinstance(job_spec, dict):
|
||||
# Ensure namespace is explicitly set
|
||||
if namespace:
|
||||
logger.info(f"Setting namespace in job spec to {namespace}")
|
||||
job_spec["Namespace"] = namespace
|
||||
|
||||
# Log the keys in the job specification
|
||||
logger.info(f"Job spec keys: {job_spec.keys()}")
|
||||
|
||||
# Start the job with the retrieved specification
|
||||
result = custom_nomad.start_job(job_spec)
|
||||
|
||||
return {
|
||||
"job_id": job_id,
|
||||
"repository": repository,
|
||||
"status": "started",
|
||||
"eval_id": result.get("eval_id"),
|
||||
"namespace": namespace
|
||||
}
|
||||
except HTTPException as e:
|
||||
# If job not found, try to get spec from config
|
||||
if e.status_code == 404:
|
||||
logger.info(f"Job {job_id} not found, attempting to get specification from config")
|
||||
|
||||
# Try to get job spec from repository config
|
||||
job_spec = config_service.get_job_spec_from_repository(repository)
|
||||
|
||||
if not job_spec:
|
||||
logger.warning(f"No job specification found for repository {repository}, creating a default one")
|
||||
|
||||
# Create a simple default job spec if none exists
|
||||
job_spec = {
|
||||
"ID": job_id,
|
||||
"Name": job_id,
|
||||
"Type": "service",
|
||||
"Datacenters": ["jm"], # Default datacenter
|
||||
"TaskGroups": [
|
||||
{
|
||||
"Name": "app",
|
||||
"Count": 1,
|
||||
"Tasks": [
|
||||
{
|
||||
"Name": job_id.split('-')[0], # Use first part of job ID as task name
|
||||
"Driver": "docker",
|
||||
"Config": {
|
||||
"image": f"registry.dev.meisheng.group/{repository}:latest",
|
||||
"force_pull": True,
|
||||
"ports": ["http"]
|
||||
},
|
||||
"Resources": {
|
||||
"CPU": 500,
|
||||
"MemoryMB": 512
|
||||
}
|
||||
}
|
||||
],
|
||||
"Networks": [
|
||||
{
|
||||
"DynamicPorts": [
|
||||
{
|
||||
"Label": "http",
|
||||
"Value": 0,
|
||||
"To": 8000
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"Meta": {
|
||||
"repository": repository
|
||||
}
|
||||
}
|
||||
|
||||
# Set the namespace explicitly in the job spec
|
||||
if namespace:
|
||||
logger.info(f"Setting namespace in default job spec to {namespace}")
|
||||
job_spec["Namespace"] = namespace
|
||||
|
||||
logger.info(f"Starting job {job_id} with specification")
|
||||
|
||||
# Log the job specification structure
|
||||
if isinstance(job_spec, dict):
|
||||
logger.info(f"Job spec keys: {job_spec.keys()}")
|
||||
if "Namespace" in job_spec:
|
||||
logger.info(f"Job spec namespace: {job_spec['Namespace']}")
|
||||
|
||||
# Start the job with the specification
|
||||
result = custom_nomad.start_job(job_spec)
|
||||
|
||||
return {
|
||||
"job_id": job_id,
|
||||
"repository": repository,
|
||||
"status": "started",
|
||||
"eval_id": result.get("eval_id"),
|
||||
"namespace": namespace
|
||||
}
|
||||
|
||||
@router.post("/by-repository/{repository}/stop")
|
||||
async def stop_job_by_repository(repository: str, purge: bool = Query(False)):
|
||||
"""Stop a job by its associated repository."""
|
||||
job_info = config_service.get_job_from_repository(repository)
|
||||
if not job_info:
|
||||
raise HTTPException(status_code=404, detail=f"No job found for repository: {repository}")
|
||||
|
||||
job_id = job_info.get("job_id")
|
||||
namespace = job_info.get("namespace")
|
||||
|
||||
# Create a custom service with the specific namespace if provided
|
||||
custom_nomad = NomadService()
|
||||
if namespace:
|
||||
custom_nomad.namespace = namespace
|
||||
|
||||
# Stop the job
|
||||
result = custom_nomad.stop_job(job_id, purge)
|
||||
|
||||
return {
|
||||
"job_id": job_id,
|
||||
"repository": repository,
|
||||
"status": "stopped",
|
||||
"eval_id": result.get("eval_id"),
|
||||
"namespace": namespace
|
||||
}
|
||||
|
||||
@router.post("/by-repository/{repository}/restart")
|
||||
async def restart_job_by_repository(repository: str):
|
||||
"""Restart a job by its associated repository."""
|
||||
job_info = config_service.get_job_from_repository(repository)
|
||||
if not job_info:
|
||||
raise HTTPException(status_code=404, detail=f"No job found for repository: {repository}")
|
||||
|
||||
job_id = job_info.get("job_id")
|
||||
namespace = job_info.get("namespace")
|
||||
|
||||
# Create a custom service with the specific namespace if provided
|
||||
custom_nomad = NomadService()
|
||||
if namespace:
|
||||
custom_nomad.namespace = namespace
|
||||
|
||||
# Get the job specification
|
||||
job_spec = custom_nomad.get_job(job_id)
|
||||
|
||||
# Stop the job first
|
||||
custom_nomad.stop_job(job_id)
|
||||
|
||||
# Start the job with the original specification
|
||||
result = custom_nomad.start_job(job_spec)
|
||||
|
||||
return {
|
||||
"job_id": job_id,
|
||||
"repository": repository,
|
||||
"status": "restarted",
|
||||
"eval_id": result.get("eval_id"),
|
||||
"namespace": namespace
|
||||
}
|
293
app/routers/logs.py
Normal file
293
app/routers/logs.py
Normal file
@ -0,0 +1,293 @@
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from typing import List, Dict, Any, Optional
|
||||
import logging
|
||||
|
||||
from app.services.nomad_client import NomadService
|
||||
from app.services.config_service import ConfigService
|
||||
|
||||
# Configure logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter()
|
||||
nomad_service = NomadService()
|
||||
config_service = ConfigService()
|
||||
|
||||
# More specific routes first
|
||||
@router.get("/repository/{repository}")
|
||||
async def get_repository_logs(
|
||||
repository: str,
|
||||
log_type: str = Query("stderr", description="Log type: stdout or stderr"),
|
||||
limit: int = Query(1, description="Number of allocations to return logs for"),
|
||||
plain_text: bool = Query(False, description="Return plain text logs instead of JSON")
|
||||
):
|
||||
"""Get logs for a repository's associated job."""
|
||||
# Get the job info for the repository
|
||||
job_info = config_service.get_job_from_repository(repository)
|
||||
if not job_info:
|
||||
raise HTTPException(status_code=404, detail=f"No job found for repository: {repository}")
|
||||
|
||||
job_id = job_info.get("job_id")
|
||||
namespace = job_info.get("namespace")
|
||||
|
||||
logger.info(f"Getting logs for job {job_id} in namespace {namespace}")
|
||||
|
||||
# Create a custom service with the specific namespace if provided
|
||||
custom_nomad = NomadService()
|
||||
if namespace:
|
||||
custom_nomad.namespace = namespace
|
||||
|
||||
# Get allocations for the job
|
||||
allocations = custom_nomad.get_allocations(job_id)
|
||||
if not allocations:
|
||||
raise HTTPException(status_code=404, detail=f"No allocations found for job {job_id}")
|
||||
|
||||
logger.info(f"Found {len(allocations)} allocations for job {job_id}")
|
||||
|
||||
# Sort allocations by creation time (descending)
|
||||
sorted_allocations = sorted(
|
||||
allocations,
|
||||
key=lambda a: a.get("CreateTime", 0),
|
||||
reverse=True
|
||||
)
|
||||
|
||||
# Limit the number of allocations
|
||||
allocations_to_check = sorted_allocations[:limit]
|
||||
|
||||
# Also get the job info to determine task names
|
||||
job = custom_nomad.get_job(job_id)
|
||||
|
||||
# Collect logs for each allocation and task
|
||||
result = []
|
||||
error_messages = []
|
||||
|
||||
for alloc in allocations_to_check:
|
||||
# Use the full UUID of the allocation
|
||||
alloc_id = alloc.get("ID")
|
||||
if not alloc_id:
|
||||
logger.warning(f"Allocation ID not found in allocation data")
|
||||
error_messages.append("Allocation ID not found in allocation data")
|
||||
continue
|
||||
|
||||
logger.info(f"Processing allocation {alloc_id} for job {job_id}")
|
||||
|
||||
# Get task name from the allocation's TaskStates
|
||||
task_states = alloc.get("TaskStates", {})
|
||||
if not task_states:
|
||||
logger.warning(f"No task states found in allocation {alloc_id}")
|
||||
error_messages.append(f"No task states found in allocation {alloc_id}")
|
||||
|
||||
for task_name, task_state in task_states.items():
|
||||
try:
|
||||
logger.info(f"Retrieving logs for allocation {alloc_id}, task {task_name}")
|
||||
|
||||
logs = custom_nomad.get_allocation_logs(alloc_id, task_name, log_type)
|
||||
|
||||
# Check if logs is an error message
|
||||
if logs and isinstance(logs, str):
|
||||
if logs.startswith("Error:") or logs.startswith("No "):
|
||||
logger.warning(f"Error retrieving logs for {task_name}: {logs}")
|
||||
error_messages.append(logs)
|
||||
continue
|
||||
|
||||
# Only add if we got some logs
|
||||
if logs:
|
||||
result.append({
|
||||
"alloc_id": alloc_id,
|
||||
"task": task_name,
|
||||
"type": log_type,
|
||||
"create_time": alloc.get("CreateTime"),
|
||||
"logs": logs
|
||||
})
|
||||
logger.info(f"Successfully retrieved logs for {task_name}")
|
||||
else:
|
||||
error_msg = f"No logs found for {task_name}"
|
||||
logger.warning(error_msg)
|
||||
error_messages.append(error_msg)
|
||||
except Exception as e:
|
||||
# Log but continue to try other tasks
|
||||
error_msg = f"Failed to get logs for {alloc_id}/{task_name}: {str(e)}"
|
||||
logger.error(error_msg)
|
||||
error_messages.append(error_msg)
|
||||
|
||||
# Return as plain text if requested
|
||||
if plain_text:
|
||||
if not result:
|
||||
if error_messages:
|
||||
return f"No logs found for this job. Errors: {'; '.join(error_messages)}"
|
||||
return "No logs found for this job"
|
||||
return "\n\n".join([f"=== {r.get('task')} ===\n{r.get('logs')}" for r in result])
|
||||
|
||||
# Otherwise return as JSON
|
||||
return {
|
||||
"job_id": job_id,
|
||||
"repository": repository,
|
||||
"namespace": namespace,
|
||||
"allocation_logs": result,
|
||||
"errors": error_messages if error_messages else None
|
||||
}
|
||||
|
||||
@router.get("/job/{job_id}")
|
||||
async def get_job_logs(
|
||||
job_id: str,
|
||||
namespace: str = Query(None, description="Nomad namespace"),
|
||||
log_type: str = Query("stderr", description="Log type: stdout or stderr"),
|
||||
limit: int = Query(1, description="Number of allocations to return logs for"),
|
||||
plain_text: bool = Query(False, description="Return plain text logs instead of JSON")
|
||||
):
|
||||
"""Get logs for the most recent allocations of a job."""
|
||||
# Create a custom service with the specific namespace if provided
|
||||
custom_nomad = NomadService()
|
||||
if namespace:
|
||||
custom_nomad.namespace = namespace
|
||||
logger.info(f"Getting logs for job {job_id} in namespace {namespace}")
|
||||
else:
|
||||
logger.info(f"Getting logs for job {job_id} in default namespace")
|
||||
|
||||
# Get all allocations for the job
|
||||
allocations = custom_nomad.get_allocations(job_id)
|
||||
if not allocations:
|
||||
raise HTTPException(status_code=404, detail=f"No allocations found for job {job_id}")
|
||||
|
||||
logger.info(f"Found {len(allocations)} allocations for job {job_id}")
|
||||
|
||||
# Sort allocations by creation time (descending)
|
||||
sorted_allocations = sorted(
|
||||
allocations,
|
||||
key=lambda a: a.get("CreateTime", 0),
|
||||
reverse=True
|
||||
)
|
||||
|
||||
# Limit the number of allocations
|
||||
allocations_to_check = sorted_allocations[:limit]
|
||||
|
||||
# Collect logs for each allocation and task
|
||||
result = []
|
||||
for alloc in allocations_to_check:
|
||||
alloc_id = alloc.get("ID")
|
||||
if not alloc_id:
|
||||
logger.warning(f"Allocation ID not found in allocation data")
|
||||
continue
|
||||
|
||||
logger.info(f"Processing allocation {alloc_id} for job {job_id}")
|
||||
|
||||
# Get task names from the allocation's TaskStates
|
||||
task_states = alloc.get("TaskStates", {})
|
||||
for task_name, task_state in task_states.items():
|
||||
try:
|
||||
logger.info(f"Retrieving logs for allocation {alloc_id}, task {task_name}")
|
||||
|
||||
logs = custom_nomad.get_allocation_logs(alloc_id, task_name, log_type)
|
||||
# Only add if we got some logs and not an error message
|
||||
if logs and not logs.startswith("No") and not logs.startswith("Error"):
|
||||
result.append({
|
||||
"alloc_id": alloc_id,
|
||||
"task": task_name,
|
||||
"type": log_type,
|
||||
"create_time": alloc.get("CreateTime"),
|
||||
"logs": logs
|
||||
})
|
||||
logger.info(f"Successfully retrieved logs for {task_name}")
|
||||
else:
|
||||
logger.warning(f"No logs found for {task_name}: {logs}")
|
||||
except Exception as e:
|
||||
# Log but continue to try other tasks
|
||||
logger.error(f"Failed to get logs for {alloc_id}/{task_name}: {str(e)}")
|
||||
|
||||
# Return as plain text if requested
|
||||
if plain_text:
|
||||
if not result:
|
||||
return "No logs found for this job"
|
||||
return "\n\n".join([f"=== {r.get('task')} ===\n{r.get('logs')}" for r in result])
|
||||
|
||||
# Otherwise return as JSON
|
||||
return {
|
||||
"job_id": job_id,
|
||||
"namespace": namespace,
|
||||
"allocation_logs": result
|
||||
}
|
||||
|
||||
@router.get("/latest/{job_id}")
|
||||
async def get_latest_allocation_logs(
|
||||
job_id: str,
|
||||
log_type: str = Query("stderr", description="Log type: stdout or stderr"),
|
||||
plain_text: bool = Query(False, description="Return plain text logs instead of JSON")
|
||||
):
|
||||
"""Get logs from the latest allocation of a job."""
|
||||
# Get all allocations for the job
|
||||
allocations = nomad_service.get_allocations(job_id)
|
||||
if not allocations:
|
||||
raise HTTPException(status_code=404, detail=f"No allocations found for job {job_id}")
|
||||
|
||||
# Sort allocations by creation time (descending)
|
||||
sorted_allocations = sorted(
|
||||
allocations,
|
||||
key=lambda a: a.get("CreateTime", 0),
|
||||
reverse=True
|
||||
)
|
||||
|
||||
# Get the latest allocation
|
||||
latest_alloc = sorted_allocations[0]
|
||||
alloc_id = latest_alloc.get("ID")
|
||||
|
||||
# Get task group and task information
|
||||
job = nomad_service.get_job(job_id)
|
||||
task_groups = job.get("TaskGroups", [])
|
||||
|
||||
# Collect logs for each task in the latest allocation
|
||||
result = []
|
||||
for task_group in task_groups:
|
||||
tasks = task_group.get("Tasks", [])
|
||||
for task in tasks:
|
||||
task_name = task.get("Name")
|
||||
try:
|
||||
logs = nomad_service.get_allocation_logs(alloc_id, task_name, log_type)
|
||||
result.append({
|
||||
"alloc_id": alloc_id,
|
||||
"task": task_name,
|
||||
"type": log_type,
|
||||
"create_time": latest_alloc.get("CreateTime"),
|
||||
"logs": logs
|
||||
})
|
||||
except Exception as e:
|
||||
# Skip if logs cannot be retrieved for this task
|
||||
pass
|
||||
|
||||
# Return as plain text if requested
|
||||
if plain_text:
|
||||
return "\n\n".join([f"=== {r['task']} ===\n{r['logs']}" for r in result])
|
||||
|
||||
# Otherwise return as JSON
|
||||
return {
|
||||
"job_id": job_id,
|
||||
"latest_allocation": alloc_id,
|
||||
"task_logs": result
|
||||
}
|
||||
|
||||
@router.get("/build/{job_id}")
|
||||
async def get_build_logs(job_id: str, plain_text: bool = Query(False)):
|
||||
"""Get build logs for a job (usually stderr logs from the latest allocation)."""
|
||||
# This is a convenience endpoint that returns stderr logs from the latest allocation
|
||||
return await get_latest_allocation_logs(job_id, "stderr", plain_text)
|
||||
|
||||
# Generic allocation logs route last
|
||||
@router.get("/allocation/{alloc_id}/{task}")
|
||||
async def get_allocation_logs(
|
||||
alloc_id: str,
|
||||
task: str,
|
||||
log_type: str = Query("stderr", description="Log type: stdout or stderr"),
|
||||
plain_text: bool = Query(False, description="Return plain text logs instead of JSON")
|
||||
):
|
||||
"""Get logs for a specific allocation and task."""
|
||||
# Validate log_type
|
||||
if log_type not in ["stdout", "stderr"]:
|
||||
raise HTTPException(status_code=400, detail="Log type must be stdout or stderr")
|
||||
|
||||
# Get logs from Nomad
|
||||
logs = nomad_service.get_allocation_logs(alloc_id, task, log_type)
|
||||
|
||||
# Return as plain text if requested
|
||||
if plain_text:
|
||||
return logs
|
||||
|
||||
# Otherwise return as JSON
|
||||
return {"alloc_id": alloc_id, "task": task, "type": log_type, "logs": logs}
|
89
app/routers/repositories.py
Normal file
89
app/routers/repositories.py
Normal file
@ -0,0 +1,89 @@
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from typing import List, Dict, Any, Optional
|
||||
|
||||
from app.services.gitea_client import GiteaClient
|
||||
from app.services.config_service import ConfigService
|
||||
|
||||
router = APIRouter()
|
||||
gitea_client = GiteaClient()
|
||||
config_service = ConfigService()
|
||||
|
||||
@router.get("/")
|
||||
async def list_repositories(limit: int = Query(100, description="Maximum number of repositories to return")):
|
||||
"""
|
||||
List all available repositories from Gitea.
|
||||
|
||||
If Gitea integration is not configured, returns an empty list.
|
||||
"""
|
||||
repositories = gitea_client.list_repositories(limit)
|
||||
|
||||
# Enhance with linked job information
|
||||
for repo in repositories:
|
||||
# Create a URL from clone_url
|
||||
repo_url = repo.get("clone_url")
|
||||
if repo_url:
|
||||
# Check if repository is linked to a job
|
||||
configs = config_service.list_configs()
|
||||
for config in configs:
|
||||
if config.get("repository") == repo_url:
|
||||
repo["linked_job"] = config.get("job_id")
|
||||
repo["config_name"] = config.get("name")
|
||||
break
|
||||
|
||||
return repositories
|
||||
|
||||
@router.get("/{repository}")
|
||||
async def get_repository_info(repository: str):
|
||||
"""
|
||||
Get information about a specific repository.
|
||||
|
||||
The repository parameter can be a repository URL or a repository alias.
|
||||
If it's a repository URL, we'll get the info directly from Gitea.
|
||||
If it's a repository alias, we'll get the info from the configuration and then from Gitea.
|
||||
"""
|
||||
# First check if it's a repository URL
|
||||
repo_info = gitea_client.get_repository_info(repository)
|
||||
|
||||
if repo_info:
|
||||
# Check if repository is linked to a job
|
||||
configs = config_service.list_configs()
|
||||
for config in configs:
|
||||
if config.get("repository") == repository:
|
||||
repo_info["linked_job"] = config.get("job_id")
|
||||
repo_info["config_name"] = config.get("name")
|
||||
repo_info["config"] = config
|
||||
break
|
||||
|
||||
return repo_info
|
||||
else:
|
||||
# Check if it's a repository alias in our configs
|
||||
config = config_service.get_config_by_repository(repository)
|
||||
if config:
|
||||
repo_url = config.get("repository")
|
||||
repo_info = gitea_client.get_repository_info(repo_url)
|
||||
|
||||
if repo_info:
|
||||
repo_info["linked_job"] = config.get("job_id")
|
||||
repo_info["config_name"] = config.get("name")
|
||||
repo_info["config"] = config
|
||||
return repo_info
|
||||
|
||||
raise HTTPException(status_code=404, detail=f"Repository not found: {repository}")
|
||||
|
||||
@router.get("/{repository}/branches")
|
||||
async def get_repository_branches(repository: str):
|
||||
"""
|
||||
Get branches for a specific repository.
|
||||
|
||||
The repository parameter can be a repository URL or a repository alias.
|
||||
"""
|
||||
# If it's a repository alias, get the actual URL
|
||||
config = config_service.get_config_by_repository(repository)
|
||||
if config:
|
||||
repository = config.get("repository")
|
||||
|
||||
branches = gitea_client.get_repository_branches(repository)
|
||||
if not branches:
|
||||
raise HTTPException(status_code=404, detail=f"No branches found for repository: {repository}")
|
||||
|
||||
return branches
|
Reference in New Issue
Block a user