Update README.md
This commit is contained in:
2
app/__init__.py
Normal file
2
app/__init__.py
Normal file
@ -0,0 +1,2 @@
|
||||
# Import version
|
||||
__version__ = "0.1.0"
|
BIN
app/__pycache__/__init__.cpython-313.pyc
Normal file
BIN
app/__pycache__/__init__.cpython-313.pyc
Normal file
Binary file not shown.
BIN
app/__pycache__/main.cpython-313.pyc
Normal file
BIN
app/__pycache__/main.cpython-313.pyc
Normal file
Binary file not shown.
101
app/main.py
Normal file
101
app/main.py
Normal file
@ -0,0 +1,101 @@
|
||||
from fastapi import FastAPI, HTTPException, Depends
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
import os
|
||||
import logging
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from app.routers import jobs, logs, configs, repositories, claude
|
||||
from app.services.nomad_client import get_nomad_client
|
||||
from app.services.gitea_client import GiteaClient
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Initialize the FastAPI app
|
||||
app = FastAPI(
|
||||
title="Nomad MCP",
|
||||
description="Service for AI agents to manage Nomad jobs via MCP protocol",
|
||||
version="0.1.0",
|
||||
)
|
||||
|
||||
# Add CORS middleware
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"], # Can be set to specific origins in production
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Include routers
|
||||
app.include_router(jobs.router, prefix="/api/jobs", tags=["jobs"])
|
||||
app.include_router(logs.router, prefix="/api/logs", tags=["logs"])
|
||||
app.include_router(configs.router, prefix="/api/configs", tags=["configs"])
|
||||
app.include_router(repositories.router, prefix="/api/repositories", tags=["repositories"])
|
||||
app.include_router(claude.router, prefix="/api/claude", tags=["claude"])
|
||||
|
||||
@app.get("/api/health", tags=["health"])
|
||||
async def health_check():
|
||||
"""Health check endpoint."""
|
||||
health_status = {
|
||||
"status": "healthy",
|
||||
"services": {}
|
||||
}
|
||||
|
||||
# Check Nomad connection
|
||||
try:
|
||||
client = get_nomad_client()
|
||||
nomad_status = client.agent.get_agent()
|
||||
health_status["services"]["nomad"] = {
|
||||
"status": "connected",
|
||||
"version": nomad_status.get("config", {}).get("Version", "unknown"),
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Nomad health check failed: {str(e)}")
|
||||
health_status["services"]["nomad"] = {
|
||||
"status": "failed",
|
||||
"error": str(e),
|
||||
}
|
||||
|
||||
# Check Gitea connection
|
||||
try:
|
||||
gitea_client = GiteaClient()
|
||||
if gitea_client.api_base_url:
|
||||
# Try to list repositories as a connection test
|
||||
repos = gitea_client.list_repositories(limit=1)
|
||||
health_status["services"]["gitea"] = {
|
||||
"status": "connected",
|
||||
"api_url": gitea_client.api_base_url,
|
||||
}
|
||||
else:
|
||||
health_status["services"]["gitea"] = {
|
||||
"status": "not_configured",
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Gitea health check failed: {str(e)}")
|
||||
health_status["services"]["gitea"] = {
|
||||
"status": "failed",
|
||||
"error": str(e),
|
||||
}
|
||||
|
||||
# Overall status is unhealthy if any service is failed
|
||||
if any(service["status"] == "failed" for service in health_status["services"].values()):
|
||||
health_status["status"] = "unhealthy"
|
||||
|
||||
return health_status
|
||||
|
||||
# Mount static files
|
||||
app.mount("/", StaticFiles(directory="static", html=True), name="static")
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
port = int(os.getenv("PORT", "8000"))
|
||||
uvicorn.run("app.main:app", host="0.0.0.0", port=port, reload=True)
|
1
app/routers/__init__.py
Normal file
1
app/routers/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
# Import routers
|
BIN
app/routers/__pycache__/__init__.cpython-313.pyc
Normal file
BIN
app/routers/__pycache__/__init__.cpython-313.pyc
Normal file
Binary file not shown.
BIN
app/routers/__pycache__/claude.cpython-313.pyc
Normal file
BIN
app/routers/__pycache__/claude.cpython-313.pyc
Normal file
Binary file not shown.
BIN
app/routers/__pycache__/configs.cpython-313.pyc
Normal file
BIN
app/routers/__pycache__/configs.cpython-313.pyc
Normal file
Binary file not shown.
BIN
app/routers/__pycache__/jobs.cpython-313.pyc
Normal file
BIN
app/routers/__pycache__/jobs.cpython-313.pyc
Normal file
Binary file not shown.
BIN
app/routers/__pycache__/logs.cpython-313.pyc
Normal file
BIN
app/routers/__pycache__/logs.cpython-313.pyc
Normal file
Binary file not shown.
BIN
app/routers/__pycache__/repositories.cpython-313.pyc
Normal file
BIN
app/routers/__pycache__/repositories.cpython-313.pyc
Normal file
Binary file not shown.
230
app/routers/claude.py
Normal file
230
app/routers/claude.py
Normal file
@ -0,0 +1,230 @@
|
||||
from fastapi import APIRouter, HTTPException, Body, Query, Depends
|
||||
from typing import Dict, Any, List, Optional
|
||||
import logging
|
||||
import json
|
||||
|
||||
from app.services.nomad_client import NomadService
|
||||
from app.schemas.claude_api import ClaudeJobRequest, ClaudeJobSpecification, ClaudeJobResponse
|
||||
|
||||
router = APIRouter()
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@router.post("/jobs", response_model=ClaudeJobResponse)
|
||||
async def manage_job(request: ClaudeJobRequest):
|
||||
"""
|
||||
Endpoint for Claude to manage Nomad jobs with a simplified interface.
|
||||
|
||||
This endpoint handles job operations like start, stop, restart, and status checks.
|
||||
"""
|
||||
try:
|
||||
# Create a Nomad service instance with the specified namespace
|
||||
nomad_service = NomadService()
|
||||
if request.namespace:
|
||||
nomad_service.namespace = request.namespace
|
||||
|
||||
# Handle different actions
|
||||
if request.action.lower() == "status":
|
||||
# Get job status
|
||||
job = nomad_service.get_job(request.job_id)
|
||||
|
||||
# Get allocations for more detailed status
|
||||
allocations = nomad_service.get_allocations(request.job_id)
|
||||
latest_alloc = None
|
||||
if allocations:
|
||||
# Sort allocations by creation time (descending)
|
||||
sorted_allocations = sorted(
|
||||
allocations,
|
||||
key=lambda a: a.get("CreateTime", 0),
|
||||
reverse=True
|
||||
)
|
||||
latest_alloc = sorted_allocations[0]
|
||||
|
||||
return ClaudeJobResponse(
|
||||
success=True,
|
||||
job_id=request.job_id,
|
||||
status=job.get("Status", "unknown"),
|
||||
message=f"Job {request.job_id} is {job.get('Status', 'unknown')}",
|
||||
details={
|
||||
"job": job,
|
||||
"latest_allocation": latest_alloc
|
||||
}
|
||||
)
|
||||
|
||||
elif request.action.lower() == "stop":
|
||||
# Stop the job
|
||||
result = nomad_service.stop_job(request.job_id, purge=request.purge)
|
||||
|
||||
return ClaudeJobResponse(
|
||||
success=True,
|
||||
job_id=request.job_id,
|
||||
status="stopped",
|
||||
message=f"Job {request.job_id} has been stopped" + (" and purged" if request.purge else ""),
|
||||
details=result
|
||||
)
|
||||
|
||||
elif request.action.lower() == "restart":
|
||||
# Get the current job specification
|
||||
job_spec = nomad_service.get_job(request.job_id)
|
||||
|
||||
# Stop the job
|
||||
nomad_service.stop_job(request.job_id)
|
||||
|
||||
# Start the job with the original specification
|
||||
result = nomad_service.start_job(job_spec)
|
||||
|
||||
return ClaudeJobResponse(
|
||||
success=True,
|
||||
job_id=request.job_id,
|
||||
status="restarted",
|
||||
message=f"Job {request.job_id} has been restarted",
|
||||
details=result
|
||||
)
|
||||
|
||||
else:
|
||||
# Unknown action
|
||||
raise HTTPException(status_code=400, detail=f"Unknown action: {request.action}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error managing job {request.job_id}: {str(e)}")
|
||||
return ClaudeJobResponse(
|
||||
success=False,
|
||||
job_id=request.job_id,
|
||||
status="error",
|
||||
message=f"Error: {str(e)}",
|
||||
details=None
|
||||
)
|
||||
|
||||
@router.post("/create-job", response_model=ClaudeJobResponse)
|
||||
async def create_job(job_spec: ClaudeJobSpecification):
|
||||
"""
|
||||
Endpoint for Claude to create a new Nomad job with a simplified interface.
|
||||
|
||||
This endpoint allows creating a job with minimal configuration.
|
||||
"""
|
||||
try:
|
||||
# Create a Nomad service instance with the specified namespace
|
||||
nomad_service = NomadService()
|
||||
if job_spec.namespace:
|
||||
nomad_service.namespace = job_spec.namespace
|
||||
|
||||
# Convert the simplified job spec to Nomad format
|
||||
nomad_job_spec = job_spec.to_nomad_job_spec()
|
||||
|
||||
# Start the job
|
||||
result = nomad_service.start_job(nomad_job_spec)
|
||||
|
||||
return ClaudeJobResponse(
|
||||
success=True,
|
||||
job_id=job_spec.job_id,
|
||||
status="started",
|
||||
message=f"Job {job_spec.job_id} has been created and started",
|
||||
details=result
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating job {job_spec.job_id}: {str(e)}")
|
||||
return ClaudeJobResponse(
|
||||
success=False,
|
||||
job_id=job_spec.job_id,
|
||||
status="error",
|
||||
message=f"Error: {str(e)}",
|
||||
details=None
|
||||
)
|
||||
|
||||
@router.get("/list-jobs", response_model=List[Dict[str, Any]])
|
||||
async def list_jobs(namespace: str = Query("development")):
|
||||
"""
|
||||
List all jobs in the specified namespace.
|
||||
|
||||
Returns a simplified list of jobs with their IDs and statuses.
|
||||
"""
|
||||
try:
|
||||
# Create a Nomad service instance with the specified namespace
|
||||
nomad_service = NomadService()
|
||||
nomad_service.namespace = namespace
|
||||
|
||||
# Get all jobs
|
||||
jobs = nomad_service.list_jobs()
|
||||
|
||||
# Return a simplified list
|
||||
simplified_jobs = []
|
||||
for job in jobs:
|
||||
simplified_jobs.append({
|
||||
"id": job.get("ID"),
|
||||
"name": job.get("Name"),
|
||||
"status": job.get("Status"),
|
||||
"type": job.get("Type"),
|
||||
"namespace": namespace
|
||||
})
|
||||
|
||||
return simplified_jobs
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error listing jobs: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Error listing jobs: {str(e)}")
|
||||
|
||||
@router.get("/job-logs/{job_id}", response_model=Dict[str, Any])
|
||||
async def get_job_logs(job_id: str, namespace: str = Query("development")):
|
||||
"""
|
||||
Get logs for a job.
|
||||
|
||||
Returns logs from the latest allocation of the job.
|
||||
"""
|
||||
try:
|
||||
# Create a Nomad service instance with the specified namespace
|
||||
nomad_service = NomadService()
|
||||
nomad_service.namespace = namespace
|
||||
|
||||
# Get allocations for the job
|
||||
allocations = nomad_service.get_allocations(job_id)
|
||||
if not allocations:
|
||||
return {
|
||||
"success": False,
|
||||
"job_id": job_id,
|
||||
"message": f"No allocations found for job {job_id}",
|
||||
"logs": None
|
||||
}
|
||||
|
||||
# Sort allocations by creation time (descending)
|
||||
sorted_allocations = sorted(
|
||||
allocations,
|
||||
key=lambda a: a.get("CreateTime", 0),
|
||||
reverse=True
|
||||
)
|
||||
latest_alloc = sorted_allocations[0]
|
||||
alloc_id = latest_alloc.get("ID")
|
||||
|
||||
# Get the task name from the allocation
|
||||
task_name = None
|
||||
if "TaskStates" in latest_alloc:
|
||||
task_states = latest_alloc["TaskStates"]
|
||||
if task_states:
|
||||
task_name = next(iter(task_states.keys()))
|
||||
|
||||
if not task_name:
|
||||
task_name = "app" # Default task name
|
||||
|
||||
# Get logs for the allocation
|
||||
stdout_logs = nomad_service.get_allocation_logs(alloc_id, task_name, "stdout")
|
||||
stderr_logs = nomad_service.get_allocation_logs(alloc_id, task_name, "stderr")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"job_id": job_id,
|
||||
"allocation_id": alloc_id,
|
||||
"task_name": task_name,
|
||||
"message": f"Retrieved logs for job {job_id}",
|
||||
"logs": {
|
||||
"stdout": stdout_logs,
|
||||
"stderr": stderr_logs
|
||||
}
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting logs for job {job_id}: {str(e)}")
|
||||
return {
|
||||
"success": False,
|
||||
"job_id": job_id,
|
||||
"message": f"Error getting logs: {str(e)}",
|
||||
"logs": None
|
||||
}
|
80
app/routers/configs.py
Normal file
80
app/routers/configs.py
Normal file
@ -0,0 +1,80 @@
|
||||
from fastapi import APIRouter, HTTPException, Body, Path
|
||||
from typing import List, Dict, Any
|
||||
import json
|
||||
|
||||
from app.services.config_service import ConfigService
|
||||
from app.schemas.config import ConfigCreate, ConfigUpdate, ConfigResponse
|
||||
|
||||
router = APIRouter()
|
||||
config_service = ConfigService()
|
||||
|
||||
@router.get("/", response_model=List[ConfigResponse])
|
||||
async def list_configs():
|
||||
"""List all available configurations."""
|
||||
return config_service.list_configs()
|
||||
|
||||
@router.get("/{name}", response_model=ConfigResponse)
|
||||
async def get_config(name: str = Path(..., description="Configuration name")):
|
||||
"""Get a specific configuration by name."""
|
||||
return config_service.get_config(name)
|
||||
|
||||
@router.post("/", response_model=ConfigResponse, status_code=201)
|
||||
async def create_config(config_data: ConfigCreate):
|
||||
"""Create a new configuration."""
|
||||
return config_service.create_config(config_data.name, config_data.dict(exclude={"name"}))
|
||||
|
||||
@router.put("/{name}", response_model=ConfigResponse)
|
||||
async def update_config(name: str, config_data: ConfigUpdate):
|
||||
"""Update an existing configuration."""
|
||||
return config_service.update_config(name, config_data.dict(exclude_unset=True))
|
||||
|
||||
@router.delete("/{name}", response_model=Dict[str, Any])
|
||||
async def delete_config(name: str = Path(..., description="Configuration name")):
|
||||
"""Delete a configuration."""
|
||||
return config_service.delete_config(name)
|
||||
|
||||
@router.get("/repository/{repository}")
|
||||
async def get_config_by_repository(repository: str):
|
||||
"""Find configuration by repository."""
|
||||
configs = config_service.list_configs()
|
||||
|
||||
for config in configs:
|
||||
if config.get("repository") == repository:
|
||||
return config
|
||||
|
||||
raise HTTPException(status_code=404, detail=f"No configuration found for repository: {repository}")
|
||||
|
||||
@router.get("/job/{job_id}")
|
||||
async def get_config_by_job(job_id: str):
|
||||
"""Find configuration by job ID."""
|
||||
configs = config_service.list_configs()
|
||||
|
||||
for config in configs:
|
||||
if config.get("job_id") == job_id:
|
||||
return config
|
||||
|
||||
raise HTTPException(status_code=404, detail=f"No configuration found for job_id: {job_id}")
|
||||
|
||||
@router.post("/link")
|
||||
async def link_repository_to_job(
|
||||
repository: str = Body(..., embed=True),
|
||||
job_id: str = Body(..., embed=True),
|
||||
name: str = Body(None, embed=True)
|
||||
):
|
||||
"""Link a repository to a job."""
|
||||
# Generate a name if not provided
|
||||
if not name:
|
||||
name = f"{job_id.lower().replace('/', '_').replace(' ', '_')}"
|
||||
|
||||
# Create the config
|
||||
config = {
|
||||
"repository": repository,
|
||||
"job_id": job_id,
|
||||
}
|
||||
|
||||
return config_service.create_config(name, config)
|
||||
|
||||
@router.post("/unlink/{name}")
|
||||
async def unlink_repository_from_job(name: str):
|
||||
"""Unlink a repository from a job by deleting the configuration."""
|
||||
return config_service.delete_config(name)
|
396
app/routers/jobs.py
Normal file
396
app/routers/jobs.py
Normal file
@ -0,0 +1,396 @@
|
||||
from fastapi import APIRouter, Depends, HTTPException, Body, Query
|
||||
from typing import Dict, Any, List, Optional
|
||||
import json
|
||||
import logging
|
||||
|
||||
from app.services.nomad_client import NomadService
|
||||
from app.services.config_service import ConfigService
|
||||
from app.schemas.job import JobResponse, JobOperation, JobSpecification
|
||||
|
||||
router = APIRouter()
|
||||
nomad_service = NomadService()
|
||||
config_service = ConfigService()
|
||||
|
||||
# Configure logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@router.get("/", response_model=List[JobResponse])
|
||||
async def list_jobs():
|
||||
"""List all jobs."""
|
||||
jobs = nomad_service.list_jobs()
|
||||
# Enhance job responses with repository information if available
|
||||
for job in jobs:
|
||||
job_id = job.get("ID")
|
||||
if job_id:
|
||||
repository = config_service.get_repository_from_job(job_id)
|
||||
if repository:
|
||||
job["repository"] = repository
|
||||
return jobs
|
||||
|
||||
@router.get("/{job_id}", response_model=JobResponse)
|
||||
async def get_job(job_id: str):
|
||||
"""Get a job by ID."""
|
||||
job = nomad_service.get_job(job_id)
|
||||
# Add repository information if available
|
||||
repository = config_service.get_repository_from_job(job_id)
|
||||
if repository:
|
||||
job["repository"] = repository
|
||||
return job
|
||||
|
||||
@router.post("/", response_model=JobOperation)
|
||||
async def start_job(job_spec: JobSpecification = Body(...)):
|
||||
"""Start a Nomad job with the provided specification."""
|
||||
return nomad_service.start_job(job_spec.dict())
|
||||
|
||||
@router.delete("/{job_id}", response_model=JobOperation)
|
||||
async def stop_job(job_id: str, purge: bool = Query(False)):
|
||||
"""Stop a job by ID."""
|
||||
return nomad_service.stop_job(job_id, purge)
|
||||
|
||||
@router.get("/{job_id}/allocations")
|
||||
async def get_job_allocations(job_id: str):
|
||||
"""Get all allocations for a job."""
|
||||
return nomad_service.get_allocations(job_id)
|
||||
|
||||
@router.get("/{job_id}/latest-allocation")
|
||||
async def get_latest_allocation(job_id: str):
|
||||
"""Get the latest allocation for a job."""
|
||||
allocations = nomad_service.get_allocations(job_id)
|
||||
if not allocations:
|
||||
raise HTTPException(status_code=404, detail=f"No allocations found for job {job_id}")
|
||||
|
||||
# Sort allocations by creation time (descending)
|
||||
sorted_allocations = sorted(
|
||||
allocations,
|
||||
key=lambda a: a.get("CreateTime", 0),
|
||||
reverse=True
|
||||
)
|
||||
|
||||
return sorted_allocations[0]
|
||||
|
||||
@router.get("/{job_id}/status")
|
||||
async def get_job_status(job_id: str, namespace: str = Query(None, description="Nomad namespace")):
|
||||
"""Get the current status of a job, including deployment and latest allocation."""
|
||||
try:
|
||||
# Create a custom service with the specific namespace if provided
|
||||
custom_nomad = NomadService()
|
||||
if namespace:
|
||||
custom_nomad.namespace = namespace
|
||||
logger.info(f"Getting job status for {job_id} in namespace {namespace}")
|
||||
else:
|
||||
logger.info(f"Getting job status for {job_id} in default namespace (development)")
|
||||
|
||||
job = custom_nomad.get_job(job_id)
|
||||
status = {
|
||||
"job_id": job_id,
|
||||
"namespace": namespace or custom_nomad.namespace,
|
||||
"status": job.get("Status", "unknown"),
|
||||
"stable": job.get("Stable", False),
|
||||
"submitted_at": job.get("SubmitTime", 0),
|
||||
}
|
||||
|
||||
# Get the latest deployment if any
|
||||
try:
|
||||
deployment = custom_nomad.get_deployment_status(job_id)
|
||||
if deployment:
|
||||
status["deployment"] = {
|
||||
"id": deployment.get("ID"),
|
||||
"status": deployment.get("Status"),
|
||||
"description": deployment.get("StatusDescription"),
|
||||
}
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get deployment for job {job_id}: {str(e)}")
|
||||
pass # Deployment info is optional
|
||||
|
||||
# Get the latest allocation if any
|
||||
try:
|
||||
allocations = custom_nomad.get_allocations(job_id)
|
||||
if allocations:
|
||||
sorted_allocations = sorted(
|
||||
allocations,
|
||||
key=lambda a: a.get("CreateTime", 0),
|
||||
reverse=True
|
||||
)
|
||||
latest_alloc = sorted_allocations[0]
|
||||
status["latest_allocation"] = {
|
||||
"id": latest_alloc.get("ID"),
|
||||
"status": latest_alloc.get("ClientStatus"),
|
||||
"description": latest_alloc.get("ClientDescription", ""),
|
||||
"created_at": latest_alloc.get("CreateTime", 0),
|
||||
}
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get allocations for job {job_id}: {str(e)}")
|
||||
pass # Allocation info is optional
|
||||
|
||||
return status
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get job status: {str(e)}")
|
||||
|
||||
@router.get("/{job_id}/specification")
|
||||
async def get_job_specification(job_id: str, namespace: str = Query(None, description="Nomad namespace"), raw: bool = Query(False)):
|
||||
"""Get the job specification for a job."""
|
||||
try:
|
||||
# Create a custom service with the specific namespace if provided
|
||||
custom_nomad = NomadService()
|
||||
if namespace:
|
||||
custom_nomad.namespace = namespace
|
||||
logger.info(f"Getting job specification for {job_id} in namespace {namespace}")
|
||||
else:
|
||||
logger.info(f"Getting job specification for {job_id} in default namespace (development)")
|
||||
|
||||
job = custom_nomad.get_job(job_id)
|
||||
|
||||
if raw:
|
||||
return job
|
||||
|
||||
# Extract just the job specification part if present
|
||||
if "JobID" in job:
|
||||
job_spec = {
|
||||
"id": job.get("ID"),
|
||||
"name": job.get("Name"),
|
||||
"type": job.get("Type"),
|
||||
"status": job.get("Status"),
|
||||
"datacenters": job.get("Datacenters", []),
|
||||
"namespace": job.get("Namespace"),
|
||||
"task_groups": job.get("TaskGroups", []),
|
||||
"meta": job.get("Meta", {}),
|
||||
}
|
||||
return job_spec
|
||||
|
||||
return job
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=404, detail=f"Failed to get job specification: {str(e)}")
|
||||
|
||||
@router.post("/{job_id}/restart")
|
||||
async def restart_job(job_id: str):
|
||||
"""Restart a job by stopping it and starting it again."""
|
||||
try:
|
||||
# Get the current job specification
|
||||
job_spec = nomad_service.get_job(job_id)
|
||||
|
||||
# Stop the job
|
||||
nomad_service.stop_job(job_id)
|
||||
|
||||
# Start the job with the original specification
|
||||
result = nomad_service.start_job(job_spec)
|
||||
|
||||
return {
|
||||
"job_id": job_id,
|
||||
"status": "restarted",
|
||||
"eval_id": result.get("eval_id"),
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to restart job: {str(e)}")
|
||||
|
||||
@router.get("/by-repository/{repository}")
|
||||
async def get_job_by_repository(repository: str):
|
||||
"""Get job information by repository URL or name."""
|
||||
job_info = config_service.get_job_from_repository(repository)
|
||||
if not job_info:
|
||||
raise HTTPException(status_code=404, detail=f"No job found for repository: {repository}")
|
||||
|
||||
job_id = job_info.get("job_id")
|
||||
namespace = job_info.get("namespace")
|
||||
|
||||
# Get the job using the specific namespace if provided
|
||||
try:
|
||||
if namespace:
|
||||
# Override the default namespace with the specific one
|
||||
custom_nomad = NomadService()
|
||||
custom_nomad.namespace = namespace
|
||||
job = custom_nomad.get_job(job_id)
|
||||
else:
|
||||
# Use the default namespace settings
|
||||
job = nomad_service.get_job(job_id)
|
||||
|
||||
# Add repository information
|
||||
job["repository"] = repository
|
||||
return job
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=404, detail=f"Job not found: {job_id}, Error: {str(e)}")
|
||||
|
||||
@router.post("/by-repository/{repository}/start")
|
||||
async def start_job_by_repository(repository: str):
|
||||
"""Start a job by its associated repository."""
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
job_info = config_service.get_job_from_repository(repository)
|
||||
if not job_info:
|
||||
raise HTTPException(status_code=404, detail=f"No job found for repository: {repository}")
|
||||
|
||||
job_id = job_info.get("job_id")
|
||||
namespace = job_info.get("namespace")
|
||||
|
||||
logger.info(f"Starting job for repository {repository}, job_id: {job_id}, namespace: {namespace}")
|
||||
|
||||
# Create a custom service with the specific namespace if provided
|
||||
custom_nomad = NomadService()
|
||||
if namespace:
|
||||
logger.info(f"Setting custom_nomad.namespace to {namespace}")
|
||||
custom_nomad.namespace = namespace
|
||||
|
||||
# Log the current namespace being used
|
||||
logger.info(f"Nomad client namespace: {custom_nomad.namespace}")
|
||||
|
||||
try:
|
||||
# Get the job specification from an existing job
|
||||
job_spec = custom_nomad.get_job(job_id)
|
||||
|
||||
# Log the job specification
|
||||
logger.info(f"Retrieved job specification for {job_id} from existing job")
|
||||
|
||||
# Ensure namespace is set in job spec
|
||||
if isinstance(job_spec, dict):
|
||||
# Ensure namespace is explicitly set
|
||||
if namespace:
|
||||
logger.info(f"Setting namespace in job spec to {namespace}")
|
||||
job_spec["Namespace"] = namespace
|
||||
|
||||
# Log the keys in the job specification
|
||||
logger.info(f"Job spec keys: {job_spec.keys()}")
|
||||
|
||||
# Start the job with the retrieved specification
|
||||
result = custom_nomad.start_job(job_spec)
|
||||
|
||||
return {
|
||||
"job_id": job_id,
|
||||
"repository": repository,
|
||||
"status": "started",
|
||||
"eval_id": result.get("eval_id"),
|
||||
"namespace": namespace
|
||||
}
|
||||
except HTTPException as e:
|
||||
# If job not found, try to get spec from config
|
||||
if e.status_code == 404:
|
||||
logger.info(f"Job {job_id} not found, attempting to get specification from config")
|
||||
|
||||
# Try to get job spec from repository config
|
||||
job_spec = config_service.get_job_spec_from_repository(repository)
|
||||
|
||||
if not job_spec:
|
||||
logger.warning(f"No job specification found for repository {repository}, creating a default one")
|
||||
|
||||
# Create a simple default job spec if none exists
|
||||
job_spec = {
|
||||
"ID": job_id,
|
||||
"Name": job_id,
|
||||
"Type": "service",
|
||||
"Datacenters": ["jm"], # Default datacenter
|
||||
"TaskGroups": [
|
||||
{
|
||||
"Name": "app",
|
||||
"Count": 1,
|
||||
"Tasks": [
|
||||
{
|
||||
"Name": job_id.split('-')[0], # Use first part of job ID as task name
|
||||
"Driver": "docker",
|
||||
"Config": {
|
||||
"image": f"registry.dev.meisheng.group/{repository}:latest",
|
||||
"force_pull": True,
|
||||
"ports": ["http"]
|
||||
},
|
||||
"Resources": {
|
||||
"CPU": 500,
|
||||
"MemoryMB": 512
|
||||
}
|
||||
}
|
||||
],
|
||||
"Networks": [
|
||||
{
|
||||
"DynamicPorts": [
|
||||
{
|
||||
"Label": "http",
|
||||
"Value": 0,
|
||||
"To": 8000
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"Meta": {
|
||||
"repository": repository
|
||||
}
|
||||
}
|
||||
|
||||
# Set the namespace explicitly in the job spec
|
||||
if namespace:
|
||||
logger.info(f"Setting namespace in default job spec to {namespace}")
|
||||
job_spec["Namespace"] = namespace
|
||||
|
||||
logger.info(f"Starting job {job_id} with specification")
|
||||
|
||||
# Log the job specification structure
|
||||
if isinstance(job_spec, dict):
|
||||
logger.info(f"Job spec keys: {job_spec.keys()}")
|
||||
if "Namespace" in job_spec:
|
||||
logger.info(f"Job spec namespace: {job_spec['Namespace']}")
|
||||
|
||||
# Start the job with the specification
|
||||
result = custom_nomad.start_job(job_spec)
|
||||
|
||||
return {
|
||||
"job_id": job_id,
|
||||
"repository": repository,
|
||||
"status": "started",
|
||||
"eval_id": result.get("eval_id"),
|
||||
"namespace": namespace
|
||||
}
|
||||
|
||||
@router.post("/by-repository/{repository}/stop")
|
||||
async def stop_job_by_repository(repository: str, purge: bool = Query(False)):
|
||||
"""Stop a job by its associated repository."""
|
||||
job_info = config_service.get_job_from_repository(repository)
|
||||
if not job_info:
|
||||
raise HTTPException(status_code=404, detail=f"No job found for repository: {repository}")
|
||||
|
||||
job_id = job_info.get("job_id")
|
||||
namespace = job_info.get("namespace")
|
||||
|
||||
# Create a custom service with the specific namespace if provided
|
||||
custom_nomad = NomadService()
|
||||
if namespace:
|
||||
custom_nomad.namespace = namespace
|
||||
|
||||
# Stop the job
|
||||
result = custom_nomad.stop_job(job_id, purge)
|
||||
|
||||
return {
|
||||
"job_id": job_id,
|
||||
"repository": repository,
|
||||
"status": "stopped",
|
||||
"eval_id": result.get("eval_id"),
|
||||
"namespace": namespace
|
||||
}
|
||||
|
||||
@router.post("/by-repository/{repository}/restart")
|
||||
async def restart_job_by_repository(repository: str):
|
||||
"""Restart a job by its associated repository."""
|
||||
job_info = config_service.get_job_from_repository(repository)
|
||||
if not job_info:
|
||||
raise HTTPException(status_code=404, detail=f"No job found for repository: {repository}")
|
||||
|
||||
job_id = job_info.get("job_id")
|
||||
namespace = job_info.get("namespace")
|
||||
|
||||
# Create a custom service with the specific namespace if provided
|
||||
custom_nomad = NomadService()
|
||||
if namespace:
|
||||
custom_nomad.namespace = namespace
|
||||
|
||||
# Get the job specification
|
||||
job_spec = custom_nomad.get_job(job_id)
|
||||
|
||||
# Stop the job first
|
||||
custom_nomad.stop_job(job_id)
|
||||
|
||||
# Start the job with the original specification
|
||||
result = custom_nomad.start_job(job_spec)
|
||||
|
||||
return {
|
||||
"job_id": job_id,
|
||||
"repository": repository,
|
||||
"status": "restarted",
|
||||
"eval_id": result.get("eval_id"),
|
||||
"namespace": namespace
|
||||
}
|
293
app/routers/logs.py
Normal file
293
app/routers/logs.py
Normal file
@ -0,0 +1,293 @@
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from typing import List, Dict, Any, Optional
|
||||
import logging
|
||||
|
||||
from app.services.nomad_client import NomadService
|
||||
from app.services.config_service import ConfigService
|
||||
|
||||
# Configure logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter()
|
||||
nomad_service = NomadService()
|
||||
config_service = ConfigService()
|
||||
|
||||
# More specific routes first
|
||||
@router.get("/repository/{repository}")
|
||||
async def get_repository_logs(
|
||||
repository: str,
|
||||
log_type: str = Query("stderr", description="Log type: stdout or stderr"),
|
||||
limit: int = Query(1, description="Number of allocations to return logs for"),
|
||||
plain_text: bool = Query(False, description="Return plain text logs instead of JSON")
|
||||
):
|
||||
"""Get logs for a repository's associated job."""
|
||||
# Get the job info for the repository
|
||||
job_info = config_service.get_job_from_repository(repository)
|
||||
if not job_info:
|
||||
raise HTTPException(status_code=404, detail=f"No job found for repository: {repository}")
|
||||
|
||||
job_id = job_info.get("job_id")
|
||||
namespace = job_info.get("namespace")
|
||||
|
||||
logger.info(f"Getting logs for job {job_id} in namespace {namespace}")
|
||||
|
||||
# Create a custom service with the specific namespace if provided
|
||||
custom_nomad = NomadService()
|
||||
if namespace:
|
||||
custom_nomad.namespace = namespace
|
||||
|
||||
# Get allocations for the job
|
||||
allocations = custom_nomad.get_allocations(job_id)
|
||||
if not allocations:
|
||||
raise HTTPException(status_code=404, detail=f"No allocations found for job {job_id}")
|
||||
|
||||
logger.info(f"Found {len(allocations)} allocations for job {job_id}")
|
||||
|
||||
# Sort allocations by creation time (descending)
|
||||
sorted_allocations = sorted(
|
||||
allocations,
|
||||
key=lambda a: a.get("CreateTime", 0),
|
||||
reverse=True
|
||||
)
|
||||
|
||||
# Limit the number of allocations
|
||||
allocations_to_check = sorted_allocations[:limit]
|
||||
|
||||
# Also get the job info to determine task names
|
||||
job = custom_nomad.get_job(job_id)
|
||||
|
||||
# Collect logs for each allocation and task
|
||||
result = []
|
||||
error_messages = []
|
||||
|
||||
for alloc in allocations_to_check:
|
||||
# Use the full UUID of the allocation
|
||||
alloc_id = alloc.get("ID")
|
||||
if not alloc_id:
|
||||
logger.warning(f"Allocation ID not found in allocation data")
|
||||
error_messages.append("Allocation ID not found in allocation data")
|
||||
continue
|
||||
|
||||
logger.info(f"Processing allocation {alloc_id} for job {job_id}")
|
||||
|
||||
# Get task name from the allocation's TaskStates
|
||||
task_states = alloc.get("TaskStates", {})
|
||||
if not task_states:
|
||||
logger.warning(f"No task states found in allocation {alloc_id}")
|
||||
error_messages.append(f"No task states found in allocation {alloc_id}")
|
||||
|
||||
for task_name, task_state in task_states.items():
|
||||
try:
|
||||
logger.info(f"Retrieving logs for allocation {alloc_id}, task {task_name}")
|
||||
|
||||
logs = custom_nomad.get_allocation_logs(alloc_id, task_name, log_type)
|
||||
|
||||
# Check if logs is an error message
|
||||
if logs and isinstance(logs, str):
|
||||
if logs.startswith("Error:") or logs.startswith("No "):
|
||||
logger.warning(f"Error retrieving logs for {task_name}: {logs}")
|
||||
error_messages.append(logs)
|
||||
continue
|
||||
|
||||
# Only add if we got some logs
|
||||
if logs:
|
||||
result.append({
|
||||
"alloc_id": alloc_id,
|
||||
"task": task_name,
|
||||
"type": log_type,
|
||||
"create_time": alloc.get("CreateTime"),
|
||||
"logs": logs
|
||||
})
|
||||
logger.info(f"Successfully retrieved logs for {task_name}")
|
||||
else:
|
||||
error_msg = f"No logs found for {task_name}"
|
||||
logger.warning(error_msg)
|
||||
error_messages.append(error_msg)
|
||||
except Exception as e:
|
||||
# Log but continue to try other tasks
|
||||
error_msg = f"Failed to get logs for {alloc_id}/{task_name}: {str(e)}"
|
||||
logger.error(error_msg)
|
||||
error_messages.append(error_msg)
|
||||
|
||||
# Return as plain text if requested
|
||||
if plain_text:
|
||||
if not result:
|
||||
if error_messages:
|
||||
return f"No logs found for this job. Errors: {'; '.join(error_messages)}"
|
||||
return "No logs found for this job"
|
||||
return "\n\n".join([f"=== {r.get('task')} ===\n{r.get('logs')}" for r in result])
|
||||
|
||||
# Otherwise return as JSON
|
||||
return {
|
||||
"job_id": job_id,
|
||||
"repository": repository,
|
||||
"namespace": namespace,
|
||||
"allocation_logs": result,
|
||||
"errors": error_messages if error_messages else None
|
||||
}
|
||||
|
||||
@router.get("/job/{job_id}")
|
||||
async def get_job_logs(
|
||||
job_id: str,
|
||||
namespace: str = Query(None, description="Nomad namespace"),
|
||||
log_type: str = Query("stderr", description="Log type: stdout or stderr"),
|
||||
limit: int = Query(1, description="Number of allocations to return logs for"),
|
||||
plain_text: bool = Query(False, description="Return plain text logs instead of JSON")
|
||||
):
|
||||
"""Get logs for the most recent allocations of a job."""
|
||||
# Create a custom service with the specific namespace if provided
|
||||
custom_nomad = NomadService()
|
||||
if namespace:
|
||||
custom_nomad.namespace = namespace
|
||||
logger.info(f"Getting logs for job {job_id} in namespace {namespace}")
|
||||
else:
|
||||
logger.info(f"Getting logs for job {job_id} in default namespace")
|
||||
|
||||
# Get all allocations for the job
|
||||
allocations = custom_nomad.get_allocations(job_id)
|
||||
if not allocations:
|
||||
raise HTTPException(status_code=404, detail=f"No allocations found for job {job_id}")
|
||||
|
||||
logger.info(f"Found {len(allocations)} allocations for job {job_id}")
|
||||
|
||||
# Sort allocations by creation time (descending)
|
||||
sorted_allocations = sorted(
|
||||
allocations,
|
||||
key=lambda a: a.get("CreateTime", 0),
|
||||
reverse=True
|
||||
)
|
||||
|
||||
# Limit the number of allocations
|
||||
allocations_to_check = sorted_allocations[:limit]
|
||||
|
||||
# Collect logs for each allocation and task
|
||||
result = []
|
||||
for alloc in allocations_to_check:
|
||||
alloc_id = alloc.get("ID")
|
||||
if not alloc_id:
|
||||
logger.warning(f"Allocation ID not found in allocation data")
|
||||
continue
|
||||
|
||||
logger.info(f"Processing allocation {alloc_id} for job {job_id}")
|
||||
|
||||
# Get task names from the allocation's TaskStates
|
||||
task_states = alloc.get("TaskStates", {})
|
||||
for task_name, task_state in task_states.items():
|
||||
try:
|
||||
logger.info(f"Retrieving logs for allocation {alloc_id}, task {task_name}")
|
||||
|
||||
logs = custom_nomad.get_allocation_logs(alloc_id, task_name, log_type)
|
||||
# Only add if we got some logs and not an error message
|
||||
if logs and not logs.startswith("No") and not logs.startswith("Error"):
|
||||
result.append({
|
||||
"alloc_id": alloc_id,
|
||||
"task": task_name,
|
||||
"type": log_type,
|
||||
"create_time": alloc.get("CreateTime"),
|
||||
"logs": logs
|
||||
})
|
||||
logger.info(f"Successfully retrieved logs for {task_name}")
|
||||
else:
|
||||
logger.warning(f"No logs found for {task_name}: {logs}")
|
||||
except Exception as e:
|
||||
# Log but continue to try other tasks
|
||||
logger.error(f"Failed to get logs for {alloc_id}/{task_name}: {str(e)}")
|
||||
|
||||
# Return as plain text if requested
|
||||
if plain_text:
|
||||
if not result:
|
||||
return "No logs found for this job"
|
||||
return "\n\n".join([f"=== {r.get('task')} ===\n{r.get('logs')}" for r in result])
|
||||
|
||||
# Otherwise return as JSON
|
||||
return {
|
||||
"job_id": job_id,
|
||||
"namespace": namespace,
|
||||
"allocation_logs": result
|
||||
}
|
||||
|
||||
@router.get("/latest/{job_id}")
|
||||
async def get_latest_allocation_logs(
|
||||
job_id: str,
|
||||
log_type: str = Query("stderr", description="Log type: stdout or stderr"),
|
||||
plain_text: bool = Query(False, description="Return plain text logs instead of JSON")
|
||||
):
|
||||
"""Get logs from the latest allocation of a job."""
|
||||
# Get all allocations for the job
|
||||
allocations = nomad_service.get_allocations(job_id)
|
||||
if not allocations:
|
||||
raise HTTPException(status_code=404, detail=f"No allocations found for job {job_id}")
|
||||
|
||||
# Sort allocations by creation time (descending)
|
||||
sorted_allocations = sorted(
|
||||
allocations,
|
||||
key=lambda a: a.get("CreateTime", 0),
|
||||
reverse=True
|
||||
)
|
||||
|
||||
# Get the latest allocation
|
||||
latest_alloc = sorted_allocations[0]
|
||||
alloc_id = latest_alloc.get("ID")
|
||||
|
||||
# Get task group and task information
|
||||
job = nomad_service.get_job(job_id)
|
||||
task_groups = job.get("TaskGroups", [])
|
||||
|
||||
# Collect logs for each task in the latest allocation
|
||||
result = []
|
||||
for task_group in task_groups:
|
||||
tasks = task_group.get("Tasks", [])
|
||||
for task in tasks:
|
||||
task_name = task.get("Name")
|
||||
try:
|
||||
logs = nomad_service.get_allocation_logs(alloc_id, task_name, log_type)
|
||||
result.append({
|
||||
"alloc_id": alloc_id,
|
||||
"task": task_name,
|
||||
"type": log_type,
|
||||
"create_time": latest_alloc.get("CreateTime"),
|
||||
"logs": logs
|
||||
})
|
||||
except Exception as e:
|
||||
# Skip if logs cannot be retrieved for this task
|
||||
pass
|
||||
|
||||
# Return as plain text if requested
|
||||
if plain_text:
|
||||
return "\n\n".join([f"=== {r['task']} ===\n{r['logs']}" for r in result])
|
||||
|
||||
# Otherwise return as JSON
|
||||
return {
|
||||
"job_id": job_id,
|
||||
"latest_allocation": alloc_id,
|
||||
"task_logs": result
|
||||
}
|
||||
|
||||
@router.get("/build/{job_id}")
|
||||
async def get_build_logs(job_id: str, plain_text: bool = Query(False)):
|
||||
"""Get build logs for a job (usually stderr logs from the latest allocation)."""
|
||||
# This is a convenience endpoint that returns stderr logs from the latest allocation
|
||||
return await get_latest_allocation_logs(job_id, "stderr", plain_text)
|
||||
|
||||
# Generic allocation logs route last
|
||||
@router.get("/allocation/{alloc_id}/{task}")
|
||||
async def get_allocation_logs(
|
||||
alloc_id: str,
|
||||
task: str,
|
||||
log_type: str = Query("stderr", description="Log type: stdout or stderr"),
|
||||
plain_text: bool = Query(False, description="Return plain text logs instead of JSON")
|
||||
):
|
||||
"""Get logs for a specific allocation and task."""
|
||||
# Validate log_type
|
||||
if log_type not in ["stdout", "stderr"]:
|
||||
raise HTTPException(status_code=400, detail="Log type must be stdout or stderr")
|
||||
|
||||
# Get logs from Nomad
|
||||
logs = nomad_service.get_allocation_logs(alloc_id, task, log_type)
|
||||
|
||||
# Return as plain text if requested
|
||||
if plain_text:
|
||||
return logs
|
||||
|
||||
# Otherwise return as JSON
|
||||
return {"alloc_id": alloc_id, "task": task, "type": log_type, "logs": logs}
|
89
app/routers/repositories.py
Normal file
89
app/routers/repositories.py
Normal file
@ -0,0 +1,89 @@
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from typing import List, Dict, Any, Optional
|
||||
|
||||
from app.services.gitea_client import GiteaClient
|
||||
from app.services.config_service import ConfigService
|
||||
|
||||
router = APIRouter()
|
||||
gitea_client = GiteaClient()
|
||||
config_service = ConfigService()
|
||||
|
||||
@router.get("/")
|
||||
async def list_repositories(limit: int = Query(100, description="Maximum number of repositories to return")):
|
||||
"""
|
||||
List all available repositories from Gitea.
|
||||
|
||||
If Gitea integration is not configured, returns an empty list.
|
||||
"""
|
||||
repositories = gitea_client.list_repositories(limit)
|
||||
|
||||
# Enhance with linked job information
|
||||
for repo in repositories:
|
||||
# Create a URL from clone_url
|
||||
repo_url = repo.get("clone_url")
|
||||
if repo_url:
|
||||
# Check if repository is linked to a job
|
||||
configs = config_service.list_configs()
|
||||
for config in configs:
|
||||
if config.get("repository") == repo_url:
|
||||
repo["linked_job"] = config.get("job_id")
|
||||
repo["config_name"] = config.get("name")
|
||||
break
|
||||
|
||||
return repositories
|
||||
|
||||
@router.get("/{repository}")
|
||||
async def get_repository_info(repository: str):
|
||||
"""
|
||||
Get information about a specific repository.
|
||||
|
||||
The repository parameter can be a repository URL or a repository alias.
|
||||
If it's a repository URL, we'll get the info directly from Gitea.
|
||||
If it's a repository alias, we'll get the info from the configuration and then from Gitea.
|
||||
"""
|
||||
# First check if it's a repository URL
|
||||
repo_info = gitea_client.get_repository_info(repository)
|
||||
|
||||
if repo_info:
|
||||
# Check if repository is linked to a job
|
||||
configs = config_service.list_configs()
|
||||
for config in configs:
|
||||
if config.get("repository") == repository:
|
||||
repo_info["linked_job"] = config.get("job_id")
|
||||
repo_info["config_name"] = config.get("name")
|
||||
repo_info["config"] = config
|
||||
break
|
||||
|
||||
return repo_info
|
||||
else:
|
||||
# Check if it's a repository alias in our configs
|
||||
config = config_service.get_config_by_repository(repository)
|
||||
if config:
|
||||
repo_url = config.get("repository")
|
||||
repo_info = gitea_client.get_repository_info(repo_url)
|
||||
|
||||
if repo_info:
|
||||
repo_info["linked_job"] = config.get("job_id")
|
||||
repo_info["config_name"] = config.get("name")
|
||||
repo_info["config"] = config
|
||||
return repo_info
|
||||
|
||||
raise HTTPException(status_code=404, detail=f"Repository not found: {repository}")
|
||||
|
||||
@router.get("/{repository}/branches")
|
||||
async def get_repository_branches(repository: str):
|
||||
"""
|
||||
Get branches for a specific repository.
|
||||
|
||||
The repository parameter can be a repository URL or a repository alias.
|
||||
"""
|
||||
# If it's a repository alias, get the actual URL
|
||||
config = config_service.get_config_by_repository(repository)
|
||||
if config:
|
||||
repository = config.get("repository")
|
||||
|
||||
branches = gitea_client.get_repository_branches(repository)
|
||||
if not branches:
|
||||
raise HTTPException(status_code=404, detail=f"No branches found for repository: {repository}")
|
||||
|
||||
return branches
|
1
app/schemas/__init__.py
Normal file
1
app/schemas/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
# Import schemas
|
BIN
app/schemas/__pycache__/__init__.cpython-313.pyc
Normal file
BIN
app/schemas/__pycache__/__init__.cpython-313.pyc
Normal file
Binary file not shown.
BIN
app/schemas/__pycache__/claude_api.cpython-313.pyc
Normal file
BIN
app/schemas/__pycache__/claude_api.cpython-313.pyc
Normal file
Binary file not shown.
BIN
app/schemas/__pycache__/config.cpython-313.pyc
Normal file
BIN
app/schemas/__pycache__/config.cpython-313.pyc
Normal file
Binary file not shown.
BIN
app/schemas/__pycache__/job.cpython-313.pyc
Normal file
BIN
app/schemas/__pycache__/job.cpython-313.pyc
Normal file
Binary file not shown.
78
app/schemas/claude_api.py
Normal file
78
app/schemas/claude_api.py
Normal file
@ -0,0 +1,78 @@
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import Dict, Any, List, Optional, Union
|
||||
|
||||
|
||||
class ClaudeJobRequest(BaseModel):
|
||||
"""Request model for Claude to start or manage a job"""
|
||||
job_id: str = Field(..., description="The ID of the job to manage")
|
||||
action: str = Field(..., description="Action to perform: start, stop, restart, status")
|
||||
namespace: Optional[str] = Field("development", description="Nomad namespace")
|
||||
purge: Optional[bool] = Field(False, description="Whether to purge the job when stopping")
|
||||
|
||||
|
||||
class ClaudeJobSpecification(BaseModel):
|
||||
"""Simplified job specification for Claude to create a new job"""
|
||||
job_id: str = Field(..., description="The ID for the new job")
|
||||
name: Optional[str] = Field(None, description="Name of the job (defaults to job_id)")
|
||||
type: str = Field("service", description="Job type: service, batch, or system")
|
||||
datacenters: List[str] = Field(["jm"], description="List of datacenters")
|
||||
namespace: str = Field("development", description="Nomad namespace")
|
||||
docker_image: str = Field(..., description="Docker image to run")
|
||||
count: int = Field(1, description="Number of instances to run")
|
||||
cpu: int = Field(100, description="CPU resources in MHz")
|
||||
memory: int = Field(128, description="Memory in MB")
|
||||
ports: Optional[List[Dict[str, Any]]] = Field(None, description="Port mappings")
|
||||
env_vars: Optional[Dict[str, str]] = Field(None, description="Environment variables")
|
||||
|
||||
def to_nomad_job_spec(self) -> Dict[str, Any]:
|
||||
"""Convert to Nomad job specification format"""
|
||||
# Create a task with the specified Docker image
|
||||
task = {
|
||||
"Name": "app",
|
||||
"Driver": "docker",
|
||||
"Config": {
|
||||
"image": self.docker_image,
|
||||
},
|
||||
"Resources": {
|
||||
"CPU": self.cpu,
|
||||
"MemoryMB": self.memory
|
||||
}
|
||||
}
|
||||
|
||||
# Add environment variables if specified
|
||||
if self.env_vars:
|
||||
task["Env"] = self.env_vars
|
||||
|
||||
# Create network configuration
|
||||
network = {}
|
||||
if self.ports:
|
||||
network["DynamicPorts"] = self.ports
|
||||
task["Config"]["ports"] = [port["Label"] for port in self.ports]
|
||||
|
||||
# Create the full job specification
|
||||
job_spec = {
|
||||
"ID": self.job_id,
|
||||
"Name": self.name or self.job_id,
|
||||
"Type": self.type,
|
||||
"Datacenters": self.datacenters,
|
||||
"Namespace": self.namespace,
|
||||
"TaskGroups": [
|
||||
{
|
||||
"Name": "app",
|
||||
"Count": self.count,
|
||||
"Tasks": [task],
|
||||
"Networks": [network] if network else []
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
return job_spec
|
||||
|
||||
|
||||
class ClaudeJobResponse(BaseModel):
|
||||
"""Response model for Claude job operations"""
|
||||
success: bool = Field(..., description="Whether the operation was successful")
|
||||
job_id: str = Field(..., description="The ID of the job")
|
||||
status: str = Field(..., description="Current status of the job")
|
||||
message: str = Field(..., description="Human-readable message about the operation")
|
||||
details: Optional[Dict[str, Any]] = Field(None, description="Additional details about the job")
|
56
app/schemas/config.py
Normal file
56
app/schemas/config.py
Normal file
@ -0,0 +1,56 @@
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
|
||||
class ConfigBase(BaseModel):
|
||||
"""Base class for configuration schemas."""
|
||||
repository: str = Field(..., description="Repository URL or identifier")
|
||||
job_id: str = Field(..., description="Nomad job ID")
|
||||
description: Optional[str] = Field(None, description="Description of this configuration")
|
||||
repository_alias: Optional[str] = Field(None, description="Short name or alias for the repository")
|
||||
|
||||
# Additional metadata can be stored in the meta field
|
||||
meta: Optional[Dict[str, Any]] = Field(None, description="Additional metadata")
|
||||
|
||||
|
||||
class ConfigCreate(ConfigBase):
|
||||
"""Schema for creating a new configuration."""
|
||||
name: str = Field(..., description="Configuration name (used as the file name)")
|
||||
|
||||
|
||||
class ConfigUpdate(BaseModel):
|
||||
"""Schema for updating an existing configuration."""
|
||||
repository: Optional[str] = Field(None, description="Repository URL or identifier")
|
||||
job_id: Optional[str] = Field(None, description="Nomad job ID")
|
||||
description: Optional[str] = Field(None, description="Description of this configuration")
|
||||
repository_alias: Optional[str] = Field(None, description="Short name or alias for the repository")
|
||||
meta: Optional[Dict[str, Any]] = Field(None, description="Additional metadata")
|
||||
|
||||
|
||||
class ConfigResponse(ConfigBase):
|
||||
"""Schema for configuration response."""
|
||||
name: str = Field(..., description="Configuration name")
|
||||
repository_info: Optional[Dict[str, Any]] = Field(None, description="Repository information from Gitea if available")
|
||||
|
||||
class Config:
|
||||
schema_extra = {
|
||||
"example": {
|
||||
"name": "my-web-app",
|
||||
"repository": "http://gitea.internal.example.com/username/repo-name",
|
||||
"repository_alias": "web-app",
|
||||
"job_id": "web-app",
|
||||
"description": "Web application running in Nomad",
|
||||
"meta": {
|
||||
"owner": "devops-team",
|
||||
"environment": "production"
|
||||
},
|
||||
"repository_info": {
|
||||
"description": "A web application",
|
||||
"default_branch": "main",
|
||||
"stars": 5,
|
||||
"forks": 2,
|
||||
"owner": "username",
|
||||
"html_url": "http://gitea.internal.example.com/username/repo-name"
|
||||
}
|
||||
}
|
||||
}
|
80
app/schemas/job.py
Normal file
80
app/schemas/job.py
Normal file
@ -0,0 +1,80 @@
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import Dict, Any, List, Optional
|
||||
|
||||
|
||||
class JobSpecification(BaseModel):
|
||||
"""
|
||||
Nomad job specification. This is a simplified schema as the actual
|
||||
Nomad job spec is quite complex and varies by job type.
|
||||
"""
|
||||
id: Optional[str] = Field(None, description="Job ID")
|
||||
ID: Optional[str] = Field(None, description="Job ID (Nomad format)")
|
||||
name: Optional[str] = Field(None, description="Job name")
|
||||
Name: Optional[str] = Field(None, description="Job name (Nomad format)")
|
||||
type: Optional[str] = Field(None, description="Job type (service, batch, system)")
|
||||
Type: Optional[str] = Field(None, description="Job type (Nomad format)")
|
||||
datacenters: Optional[List[str]] = Field(None, description="List of datacenters")
|
||||
Datacenters: Optional[List[str]] = Field(None, description="List of datacenters (Nomad format)")
|
||||
task_groups: Optional[List[Dict[str, Any]]] = Field(None, description="Task groups")
|
||||
TaskGroups: Optional[List[Dict[str, Any]]] = Field(None, description="Task groups (Nomad format)")
|
||||
meta: Optional[Dict[str, str]] = Field(None, description="Job metadata")
|
||||
Meta: Optional[Dict[str, str]] = Field(None, description="Job metadata (Nomad format)")
|
||||
|
||||
# Allow additional fields (to handle the complete Nomad job spec)
|
||||
class Config:
|
||||
extra = "allow"
|
||||
|
||||
|
||||
class JobOperation(BaseModel):
|
||||
"""Response after a job operation (start, stop, etc.)"""
|
||||
job_id: str = Field(..., description="The ID of the job")
|
||||
eval_id: Optional[str] = Field(None, description="The evaluation ID")
|
||||
status: str = Field(..., description="The status of the operation")
|
||||
warnings: Optional[str] = Field(None, description="Any warnings from Nomad")
|
||||
|
||||
|
||||
class JobResponse(BaseModel):
|
||||
"""
|
||||
Job response schema. This is a simplified version as the actual
|
||||
Nomad job response is quite complex and varies by job type.
|
||||
"""
|
||||
ID: str = Field(..., description="Job ID")
|
||||
Name: str = Field(..., description="Job name")
|
||||
Status: str = Field(..., description="Job status")
|
||||
Type: str = Field(..., description="Job type")
|
||||
repository: Optional[str] = Field(None, description="Associated repository if any")
|
||||
|
||||
# Allow additional fields (to handle the complete Nomad job response)
|
||||
class Config:
|
||||
extra = "allow"
|
||||
|
||||
|
||||
class TaskGroup(BaseModel):
|
||||
"""Task group schema."""
|
||||
Name: str
|
||||
Count: int
|
||||
Tasks: List[Dict[str, Any]]
|
||||
|
||||
class Config:
|
||||
extra = "allow"
|
||||
|
||||
|
||||
class Task(BaseModel):
|
||||
"""Task schema."""
|
||||
Name: str
|
||||
Driver: str
|
||||
Config: Dict[str, Any]
|
||||
|
||||
class Config:
|
||||
extra = "allow"
|
||||
|
||||
|
||||
class Allocation(BaseModel):
|
||||
"""Allocation schema."""
|
||||
ID: str
|
||||
JobID: str
|
||||
TaskGroup: str
|
||||
ClientStatus: str
|
||||
|
||||
class Config:
|
||||
extra = "allow"
|
1
app/services/__init__.py
Normal file
1
app/services/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
# Import services
|
BIN
app/services/__pycache__/__init__.cpython-313.pyc
Normal file
BIN
app/services/__pycache__/__init__.cpython-313.pyc
Normal file
Binary file not shown.
BIN
app/services/__pycache__/config_service.cpython-313.pyc
Normal file
BIN
app/services/__pycache__/config_service.cpython-313.pyc
Normal file
Binary file not shown.
BIN
app/services/__pycache__/gitea_client.cpython-313.pyc
Normal file
BIN
app/services/__pycache__/gitea_client.cpython-313.pyc
Normal file
Binary file not shown.
BIN
app/services/__pycache__/nomad_client.cpython-313.pyc
Normal file
BIN
app/services/__pycache__/nomad_client.cpython-313.pyc
Normal file
Binary file not shown.
299
app/services/config_service.py
Normal file
299
app/services/config_service.py
Normal file
@ -0,0 +1,299 @@
|
||||
import os
|
||||
import yaml
|
||||
import logging
|
||||
import json
|
||||
from typing import Dict, Any, Optional, List
|
||||
from fastapi import HTTPException
|
||||
from pathlib import Path
|
||||
|
||||
from app.services.gitea_client import GiteaClient
|
||||
|
||||
# Configure logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Default configs directory
|
||||
CONFIG_DIR = os.getenv("CONFIG_DIR", "./configs")
|
||||
|
||||
class ConfigService:
|
||||
"""Service for managing repository to job mappings."""
|
||||
|
||||
def __init__(self, config_dir: str = CONFIG_DIR):
|
||||
self.config_dir = Path(config_dir)
|
||||
self._ensure_config_dir()
|
||||
self.gitea_client = GiteaClient()
|
||||
|
||||
def _ensure_config_dir(self):
|
||||
"""Ensure the config directory exists."""
|
||||
try:
|
||||
self.config_dir.mkdir(parents=True, exist_ok=True)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create config directory {self.config_dir}: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to create config directory: {str(e)}")
|
||||
|
||||
def list_configs(self) -> List[Dict[str, Any]]:
|
||||
"""List all available configurations."""
|
||||
configs = []
|
||||
try:
|
||||
for file_path in self.config_dir.glob("*.yaml"):
|
||||
with open(file_path, "r") as f:
|
||||
config = yaml.safe_load(f)
|
||||
config["name"] = file_path.stem
|
||||
configs.append(config)
|
||||
return configs
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to list configurations: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to list configurations: {str(e)}")
|
||||
|
||||
def get_config(self, name: str) -> Dict[str, Any]:
|
||||
"""Get a specific configuration by name."""
|
||||
file_path = self.config_dir / f"{name}.yaml"
|
||||
try:
|
||||
if not file_path.exists():
|
||||
raise HTTPException(status_code=404, detail=f"Configuration not found: {name}")
|
||||
|
||||
with open(file_path, "r") as f:
|
||||
config = yaml.safe_load(f)
|
||||
config["name"] = name
|
||||
|
||||
# Enrich with repository information if available
|
||||
if repository := config.get("repository"):
|
||||
repo_info = self.gitea_client.get_repository_info(repository)
|
||||
if repo_info:
|
||||
config["repository_info"] = {
|
||||
"description": repo_info.get("description"),
|
||||
"default_branch": repo_info.get("default_branch"),
|
||||
"stars": repo_info.get("stars_count"),
|
||||
"forks": repo_info.get("forks_count"),
|
||||
"owner": repo_info.get("owner", {}).get("login"),
|
||||
"html_url": repo_info.get("html_url"),
|
||||
}
|
||||
|
||||
return config
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to read configuration {name}: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to read configuration: {str(e)}")
|
||||
|
||||
def create_config(self, name: str, config: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Create a new configuration."""
|
||||
file_path = self.config_dir / f"{name}.yaml"
|
||||
try:
|
||||
if file_path.exists():
|
||||
raise HTTPException(status_code=409, detail=f"Configuration already exists: {name}")
|
||||
|
||||
# Validate required fields
|
||||
required_fields = ["repository", "job_id"]
|
||||
for field in required_fields:
|
||||
if field not in config:
|
||||
raise HTTPException(status_code=400, detail=f"Missing required field: {field}")
|
||||
|
||||
# Validate repository exists if Gitea integration is configured
|
||||
if not self.gitea_client.check_repository_exists(config["repository"]):
|
||||
raise HTTPException(status_code=400, detail=f"Repository not found: {config['repository']}")
|
||||
|
||||
# Add name to the config
|
||||
config["name"] = name
|
||||
|
||||
# Get repository alias if not provided
|
||||
if "repository_alias" not in config:
|
||||
try:
|
||||
owner, repo = self.gitea_client.parse_repo_url(config["repository"])
|
||||
config["repository_alias"] = repo
|
||||
except:
|
||||
# Use job_id as fallback
|
||||
config["repository_alias"] = config["job_id"]
|
||||
|
||||
# Write config to file
|
||||
with open(file_path, "w") as f:
|
||||
yaml.dump(config, f, default_flow_style=False)
|
||||
|
||||
return config
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create configuration {name}: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to create configuration: {str(e)}")
|
||||
|
||||
def update_config(self, name: str, config: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Update an existing configuration."""
|
||||
file_path = self.config_dir / f"{name}.yaml"
|
||||
try:
|
||||
if not file_path.exists():
|
||||
raise HTTPException(status_code=404, detail=f"Configuration not found: {name}")
|
||||
|
||||
# Read existing config
|
||||
with open(file_path, "r") as f:
|
||||
existing_config = yaml.safe_load(f)
|
||||
|
||||
# Update with new values
|
||||
for key, value in config.items():
|
||||
existing_config[key] = value
|
||||
|
||||
# Validate repository exists if changed and Gitea integration is configured
|
||||
if "repository" in config and config["repository"] != existing_config.get("repository"):
|
||||
if not self.gitea_client.check_repository_exists(config["repository"]):
|
||||
raise HTTPException(status_code=400, detail=f"Repository not found: {config['repository']}")
|
||||
|
||||
# Validate required fields
|
||||
required_fields = ["repository", "job_id"]
|
||||
for field in required_fields:
|
||||
if field not in existing_config:
|
||||
raise HTTPException(status_code=400, detail=f"Missing required field: {field}")
|
||||
|
||||
# Add name to the config
|
||||
existing_config["name"] = name
|
||||
|
||||
# Update repository alias if repository changed
|
||||
if "repository" in config and "repository_alias" not in config:
|
||||
try:
|
||||
owner, repo = self.gitea_client.parse_repo_url(existing_config["repository"])
|
||||
existing_config["repository_alias"] = repo
|
||||
except:
|
||||
pass
|
||||
|
||||
# Write config to file
|
||||
with open(file_path, "w") as f:
|
||||
yaml.dump(existing_config, f, default_flow_style=False)
|
||||
|
||||
return existing_config
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to update configuration {name}: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to update configuration: {str(e)}")
|
||||
|
||||
def delete_config(self, name: str) -> Dict[str, Any]:
|
||||
"""Delete a configuration."""
|
||||
file_path = self.config_dir / f"{name}.yaml"
|
||||
try:
|
||||
if not file_path.exists():
|
||||
raise HTTPException(status_code=404, detail=f"Configuration not found: {name}")
|
||||
|
||||
# Get the config before deleting
|
||||
with open(file_path, "r") as f:
|
||||
config = yaml.safe_load(f)
|
||||
config["name"] = name
|
||||
|
||||
# Delete the file
|
||||
file_path.unlink()
|
||||
|
||||
return {"name": name, "status": "deleted"}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete configuration {name}: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to delete configuration: {str(e)}")
|
||||
|
||||
def get_job_from_repository(self, repository: str) -> Optional[Dict[str, str]]:
|
||||
"""Find job_id and namespace associated with a repository."""
|
||||
try:
|
||||
for config in self.list_configs():
|
||||
if config.get("repository") == repository or config.get("repository_alias") == repository:
|
||||
return {
|
||||
"job_id": config.get("job_id"),
|
||||
"namespace": config.get("namespace")
|
||||
}
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to find job for repository {repository}: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to find job for repository: {str(e)}")
|
||||
|
||||
def get_repository_from_job(self, job_id: str) -> Optional[str]:
|
||||
"""Find repository associated with a job_id."""
|
||||
try:
|
||||
for config in self.list_configs():
|
||||
if config.get("job_id") == job_id:
|
||||
return config.get("repository")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to find repository for job {job_id}: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to find repository for job: {str(e)}")
|
||||
|
||||
def get_config_by_repository(self, repository: str) -> Optional[Dict[str, Any]]:
|
||||
"""Find configuration by repository URL or alias."""
|
||||
try:
|
||||
for config in self.list_configs():
|
||||
if config.get("repository") == repository or config.get("repository_alias") == repository:
|
||||
return self.get_config(config.get("name"))
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to find config for repository {repository}: {str(e)}")
|
||||
return None
|
||||
|
||||
def get_job_spec_from_repository(self, repository: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get job specification from repository config and template."""
|
||||
try:
|
||||
# Get the repository configuration
|
||||
config = self.get_config_by_repository(repository)
|
||||
if not config:
|
||||
logger.error(f"No configuration found for repository: {repository}")
|
||||
return None
|
||||
|
||||
# Check if the job template is specified
|
||||
job_template = config.get("job_template")
|
||||
if not job_template:
|
||||
logger.error(f"No job template specified for repository: {repository}")
|
||||
return None
|
||||
|
||||
# Read the job template file
|
||||
template_path = Path(self.config_dir) / "templates" / f"{job_template}.json"
|
||||
if not template_path.exists():
|
||||
logger.error(f"Job template not found: {job_template}")
|
||||
return None
|
||||
|
||||
try:
|
||||
with open(template_path, "r") as f:
|
||||
job_spec = json.load(f)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to read job template {job_template}: {str(e)}")
|
||||
return None
|
||||
|
||||
# Apply configuration parameters to the template
|
||||
job_spec["ID"] = config.get("job_id")
|
||||
job_spec["Name"] = config.get("job_id")
|
||||
|
||||
# Apply other customizations from config
|
||||
if env_vars := config.get("environment_variables"):
|
||||
for task_group in job_spec.get("TaskGroups", []):
|
||||
for task in task_group.get("Tasks", []):
|
||||
if "Env" not in task:
|
||||
task["Env"] = {}
|
||||
task["Env"].update(env_vars)
|
||||
|
||||
if meta := config.get("metadata"):
|
||||
job_spec["Meta"] = meta
|
||||
|
||||
# Add repository info to the metadata
|
||||
if "Meta" not in job_spec:
|
||||
job_spec["Meta"] = {}
|
||||
job_spec["Meta"]["repository"] = repository
|
||||
|
||||
# Override specific job parameters if specified in config
|
||||
if job_params := config.get("job_parameters"):
|
||||
for param_key, param_value in job_params.items():
|
||||
# Handle nested parameters with dot notation (e.g., "TaskGroups.0.Tasks.0.Config.image")
|
||||
if "." in param_key:
|
||||
parts = param_key.split(".")
|
||||
current = job_spec
|
||||
for part in parts[:-1]:
|
||||
# Handle array indices
|
||||
if part.isdigit() and isinstance(current, list):
|
||||
current = current[int(part)]
|
||||
elif part in current:
|
||||
current = current[part]
|
||||
else:
|
||||
break
|
||||
else:
|
||||
# Only set the value if we successfully navigated the path
|
||||
current[parts[-1]] = param_value
|
||||
else:
|
||||
# Direct parameter
|
||||
job_spec[param_key] = param_value
|
||||
|
||||
logger.info(f"Generated job specification for repository {repository} using template {job_template}")
|
||||
return job_spec
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get job specification for repository {repository}: {str(e)}")
|
||||
return None
|
180
app/services/gitea_client.py
Normal file
180
app/services/gitea_client.py
Normal file
@ -0,0 +1,180 @@
|
||||
import os
|
||||
import logging
|
||||
import requests
|
||||
from typing import Dict, Any, List, Optional, Tuple
|
||||
from urllib.parse import urlparse
|
||||
from fastapi import HTTPException
|
||||
|
||||
# Configure logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class GiteaClient:
|
||||
"""Client for interacting with Gitea API."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize Gitea client with configuration from environment variables."""
|
||||
self.api_base_url = os.getenv("GITEA_API_URL", "").rstrip("/")
|
||||
self.token = os.getenv("GITEA_API_TOKEN")
|
||||
self.username = os.getenv("GITEA_USERNAME")
|
||||
self.verify_ssl = os.getenv("GITEA_VERIFY_SSL", "true").lower() == "true"
|
||||
|
||||
if not self.api_base_url:
|
||||
logger.warning("GITEA_API_URL is not configured. Gitea integration will not work.")
|
||||
|
||||
if not self.token and (self.username and os.getenv("GITEA_PASSWORD")):
|
||||
self.token = self._get_token_from_credentials()
|
||||
|
||||
def _get_token_from_credentials(self) -> Optional[str]:
|
||||
"""Get a token using username and password if provided."""
|
||||
try:
|
||||
response = requests.post(
|
||||
f"{self.api_base_url}/users/{self.username}/tokens",
|
||||
auth=(self.username, os.getenv("GITEA_PASSWORD", "")),
|
||||
json={
|
||||
"name": "nomad-mcp-service",
|
||||
"scopes": ["repo", "read:org"]
|
||||
},
|
||||
verify=self.verify_ssl
|
||||
)
|
||||
|
||||
if response.status_code == 201:
|
||||
return response.json().get("sha1")
|
||||
else:
|
||||
logger.error(f"Failed to get Gitea token: {response.text}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get Gitea token: {str(e)}")
|
||||
return None
|
||||
|
||||
def _get_headers(self) -> Dict[str, str]:
|
||||
"""Get request headers with authentication."""
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json"
|
||||
}
|
||||
|
||||
if self.token:
|
||||
headers["Authorization"] = f"token {self.token}"
|
||||
|
||||
return headers
|
||||
|
||||
def parse_repo_url(self, repo_url: str) -> Tuple[str, str]:
|
||||
"""
|
||||
Parse a Gitea repository URL to extract owner and repo name.
|
||||
|
||||
Examples:
|
||||
- http://gitea.internal.example.com/username/repo-name -> (username, repo-name)
|
||||
- https://gitea.example.com/org/project -> (org, project)
|
||||
"""
|
||||
try:
|
||||
# Parse the URL
|
||||
parsed_url = urlparse(repo_url)
|
||||
|
||||
# Get the path and remove leading/trailing slashes
|
||||
path = parsed_url.path.strip("/")
|
||||
|
||||
# Split the path
|
||||
parts = path.split("/")
|
||||
|
||||
if len(parts) < 2:
|
||||
raise ValueError(f"Invalid repository URL: {repo_url}")
|
||||
|
||||
# Extract owner and repo
|
||||
owner = parts[0]
|
||||
repo = parts[1]
|
||||
|
||||
return owner, repo
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to parse repository URL: {repo_url}, error: {str(e)}")
|
||||
raise ValueError(f"Invalid repository URL: {repo_url}")
|
||||
|
||||
def check_repository_exists(self, repo_url: str) -> bool:
|
||||
"""Check if a repository exists in Gitea."""
|
||||
if not self.api_base_url:
|
||||
# No Gitea integration configured, assume repository exists
|
||||
return True
|
||||
|
||||
try:
|
||||
owner, repo = self.parse_repo_url(repo_url)
|
||||
|
||||
response = requests.get(
|
||||
f"{self.api_base_url}/repos/{owner}/{repo}",
|
||||
headers=self._get_headers(),
|
||||
verify=self.verify_ssl
|
||||
)
|
||||
|
||||
return response.status_code == 200
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to check repository: {repo_url}, error: {str(e)}")
|
||||
return False
|
||||
|
||||
def get_repository_info(self, repo_url: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get repository information from Gitea."""
|
||||
if not self.api_base_url:
|
||||
# No Gitea integration configured
|
||||
return None
|
||||
|
||||
try:
|
||||
owner, repo = self.parse_repo_url(repo_url)
|
||||
|
||||
response = requests.get(
|
||||
f"{self.api_base_url}/repos/{owner}/{repo}",
|
||||
headers=self._get_headers(),
|
||||
verify=self.verify_ssl
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
else:
|
||||
logger.error(f"Failed to get repository info: {response.text}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get repository info: {repo_url}, error: {str(e)}")
|
||||
return None
|
||||
|
||||
def list_repositories(self, limit: int = 100) -> List[Dict[str, Any]]:
|
||||
"""List available repositories from Gitea."""
|
||||
if not self.api_base_url:
|
||||
# No Gitea integration configured
|
||||
return []
|
||||
|
||||
try:
|
||||
response = requests.get(
|
||||
f"{self.api_base_url}/user/repos",
|
||||
headers=self._get_headers(),
|
||||
params={"limit": limit},
|
||||
verify=self.verify_ssl
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
else:
|
||||
logger.error(f"Failed to list repositories: {response.text}")
|
||||
return []
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to list repositories: {str(e)}")
|
||||
return []
|
||||
|
||||
def get_repository_branches(self, repo_url: str) -> List[Dict[str, Any]]:
|
||||
"""Get branches for a repository."""
|
||||
if not self.api_base_url:
|
||||
# No Gitea integration configured
|
||||
return []
|
||||
|
||||
try:
|
||||
owner, repo = self.parse_repo_url(repo_url)
|
||||
|
||||
response = requests.get(
|
||||
f"{self.api_base_url}/repos/{owner}/{repo}/branches",
|
||||
headers=self._get_headers(),
|
||||
verify=self.verify_ssl
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
else:
|
||||
logger.error(f"Failed to get repository branches: {response.text}")
|
||||
return []
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get repository branches: {repo_url}, error: {str(e)}")
|
||||
return []
|
505
app/services/nomad_client.py
Normal file
505
app/services/nomad_client.py
Normal file
@ -0,0 +1,505 @@
|
||||
import os
|
||||
import logging
|
||||
import nomad
|
||||
from fastapi import HTTPException
|
||||
from typing import Dict, Any, Optional, List
|
||||
from dotenv import load_dotenv
|
||||
import time
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Configure logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def get_nomad_client():
|
||||
"""
|
||||
Create and return a Nomad client using environment variables.
|
||||
"""
|
||||
try:
|
||||
nomad_addr = os.getenv("NOMAD_ADDR", "http://localhost:4646").rstrip('/')
|
||||
nomad_token = os.getenv("NOMAD_TOKEN")
|
||||
# Use "development" as the default namespace since all jobs are likely to be in this namespace
|
||||
nomad_namespace = os.getenv("NOMAD_NAMESPACE", "development")
|
||||
|
||||
# Ensure namespace is never "*" (wildcard)
|
||||
if nomad_namespace == "*":
|
||||
nomad_namespace = "development"
|
||||
logger.info("Replaced wildcard namespace '*' with 'development'")
|
||||
|
||||
# Extract host and port from the address
|
||||
host_with_port = nomad_addr.replace("http://", "").replace("https://", "")
|
||||
host = host_with_port.split(":")[0]
|
||||
|
||||
# Safely extract port
|
||||
port_part = host_with_port.split(":")[-1] if ":" in host_with_port else "4646"
|
||||
port = int(port_part.split('/')[0]) # Remove any path components
|
||||
|
||||
logger.info(f"Creating Nomad client with host={host}, port={port}, namespace={nomad_namespace}")
|
||||
|
||||
return nomad.Nomad(
|
||||
host=host,
|
||||
port=port,
|
||||
secure=nomad_addr.startswith("https"),
|
||||
token=nomad_token,
|
||||
timeout=10,
|
||||
namespace=nomad_namespace, # Query across development namespace by default
|
||||
verify=False if os.getenv("NOMAD_SKIP_VERIFY", "false").lower() == "true" else True
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create Nomad client: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to connect to Nomad: {str(e)}")
|
||||
|
||||
class NomadService:
|
||||
"""Service for interacting with Nomad API."""
|
||||
|
||||
def __init__(self):
|
||||
self.client = get_nomad_client()
|
||||
self.namespace = os.getenv("NOMAD_NAMESPACE", "development") # Use "development" namespace as default
|
||||
|
||||
def get_job(self, job_id: str, max_retries: int = 3, retry_delay: int = 2) -> Dict[str, Any]:
|
||||
"""
|
||||
Get a job by ID with retry logic.
|
||||
|
||||
Args:
|
||||
job_id: The ID of the job to retrieve
|
||||
max_retries: Maximum number of retry attempts (default: 3)
|
||||
retry_delay: Delay between retries in seconds (default: 2)
|
||||
|
||||
Returns:
|
||||
Dict containing job details
|
||||
"""
|
||||
last_exception = None
|
||||
|
||||
# Try multiple times to get the job
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
# Get the Nomad address from the client
|
||||
nomad_addr = f"http://{self.client.host}:{self.client.port}"
|
||||
|
||||
# Build the URL for the job endpoint
|
||||
url = f"{nomad_addr}/v1/job/{job_id}"
|
||||
|
||||
# Set up headers
|
||||
headers = {}
|
||||
if hasattr(self.client, 'token') and self.client.token:
|
||||
headers["X-Nomad-Token"] = self.client.token
|
||||
|
||||
# Set up params with the correct namespace
|
||||
params = {"namespace": self.namespace}
|
||||
|
||||
# Make the request directly
|
||||
import requests
|
||||
response = requests.get(
|
||||
url=url,
|
||||
headers=headers,
|
||||
params=params,
|
||||
verify=False if os.getenv("NOMAD_SKIP_VERIFY", "false").lower() == "true" else True
|
||||
)
|
||||
|
||||
# Check if the request was successful
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
elif response.status_code == 404:
|
||||
# If not the last attempt, log and retry
|
||||
if attempt < max_retries - 1:
|
||||
logger.warning(f"Job {job_id} not found on attempt {attempt+1}/{max_retries}, retrying in {retry_delay}s...")
|
||||
time.sleep(retry_delay)
|
||||
continue
|
||||
else:
|
||||
raise ValueError(f"Job not found after {max_retries} attempts: {job_id}")
|
||||
else:
|
||||
raise ValueError(f"Failed to get job: {response.text}")
|
||||
|
||||
except Exception as e:
|
||||
last_exception = e
|
||||
# If not the last attempt, log and retry
|
||||
if attempt < max_retries - 1:
|
||||
logger.warning(f"Error getting job {job_id} on attempt {attempt+1}/{max_retries}: {str(e)}, retrying in {retry_delay}s...")
|
||||
time.sleep(retry_delay)
|
||||
continue
|
||||
else:
|
||||
logger.error(f"Failed to get job {job_id} after {max_retries} attempts: {str(e)}")
|
||||
raise HTTPException(status_code=404, detail=f"Job not found: {job_id}")
|
||||
|
||||
# If we get here, all retries failed
|
||||
logger.error(f"Failed to get job {job_id} after {max_retries} attempts")
|
||||
raise HTTPException(status_code=404, detail=f"Job not found: {job_id}")
|
||||
|
||||
def list_jobs(self) -> List[Dict[str, Any]]:
|
||||
"""List all jobs."""
|
||||
try:
|
||||
# Get the Nomad address from the client
|
||||
nomad_addr = f"http://{self.client.host}:{self.client.port}"
|
||||
|
||||
# Build the URL for the jobs endpoint
|
||||
url = f"{nomad_addr}/v1/jobs"
|
||||
|
||||
# Set up headers
|
||||
headers = {}
|
||||
if hasattr(self.client, 'token') and self.client.token:
|
||||
headers["X-Nomad-Token"] = self.client.token
|
||||
|
||||
# Set up params with the correct namespace
|
||||
params = {"namespace": self.namespace}
|
||||
|
||||
# Make the request directly
|
||||
import requests
|
||||
response = requests.get(
|
||||
url=url,
|
||||
headers=headers,
|
||||
params=params,
|
||||
verify=False if os.getenv("NOMAD_SKIP_VERIFY", "false").lower() == "true" else True
|
||||
)
|
||||
|
||||
# Check if the request was successful
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
else:
|
||||
raise ValueError(f"Failed to list jobs: {response.text}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to list jobs: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to list jobs: {str(e)}")
|
||||
|
||||
def start_job(self, job_spec: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Start a job using the provided specification.
|
||||
|
||||
Args:
|
||||
job_spec: The job specification to submit. Can be a raw job spec or wrapped in a "Job" key.
|
||||
|
||||
Returns:
|
||||
Dict containing job_id, eval_id, status, and any warnings
|
||||
"""
|
||||
try:
|
||||
# Extract job ID from specification
|
||||
job_id = None
|
||||
if "Job" in job_spec:
|
||||
job_id = job_spec["Job"].get("ID") or job_spec["Job"].get("id")
|
||||
else:
|
||||
job_id = job_spec.get("ID") or job_spec.get("id")
|
||||
|
||||
if not job_id:
|
||||
raise ValueError("Job ID is required in the job specification")
|
||||
|
||||
logger.info(f"Processing job start request for job ID: {job_id}")
|
||||
|
||||
# Determine the namespace to use, with clear priorities:
|
||||
# 1. Explicitly provided in the job spec (highest priority)
|
||||
# 2. Service instance namespace
|
||||
# 3. Fallback to "development"
|
||||
namespace = self.namespace
|
||||
|
||||
# Normalize the job structure to ensure it has a "Job" wrapper
|
||||
normalized_job_spec = {}
|
||||
if "Job" in job_spec:
|
||||
normalized_job_spec = job_spec
|
||||
# Check if namespace is specified in the job spec
|
||||
if "Namespace" in job_spec["Job"]:
|
||||
namespace = job_spec["Job"]["Namespace"]
|
||||
logger.info(f"Using namespace from job spec: {namespace}")
|
||||
else:
|
||||
# Check if namespace is specified in the job spec
|
||||
if "Namespace" in job_spec:
|
||||
namespace = job_spec["Namespace"]
|
||||
logger.info(f"Using namespace from job spec: {namespace}")
|
||||
|
||||
# Wrap the job spec in a "Job" key
|
||||
normalized_job_spec = {"Job": job_spec}
|
||||
|
||||
# Replace wildcard namespaces with the default
|
||||
if namespace == "*":
|
||||
namespace = "development"
|
||||
logger.info(f"Replaced wildcard namespace with default: {namespace}")
|
||||
|
||||
# Always explicitly set the namespace in the job spec
|
||||
normalized_job_spec["Job"]["Namespace"] = namespace
|
||||
|
||||
logger.info(f"Submitting job {job_id} to namespace {namespace}")
|
||||
logger.info(f"Job specification structure: {list(normalized_job_spec.keys())}")
|
||||
logger.info(f"Job keys: {list(normalized_job_spec['Job'].keys())}")
|
||||
|
||||
# Submit the job - pass the job_id and job spec directly
|
||||
# The namespace is already set in the job spec
|
||||
response = self.client.job.register_job(job_id, normalized_job_spec)
|
||||
|
||||
logger.info(f"Job registration response: {response}")
|
||||
|
||||
return {
|
||||
"job_id": job_id,
|
||||
"eval_id": response.get("EvalID"),
|
||||
"status": "started",
|
||||
"warnings": response.get("Warnings"),
|
||||
"namespace": namespace
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to start job: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to start job: {str(e)}")
|
||||
|
||||
def stop_job(self, job_id: str, purge: bool = False) -> Dict[str, Any]:
|
||||
"""
|
||||
Stop a job by ID.
|
||||
|
||||
Args:
|
||||
job_id: The ID of the job to stop
|
||||
purge: If true, the job will be purged from Nomad's state entirely
|
||||
|
||||
Returns:
|
||||
Dict containing job_id, eval_id, and status
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Stopping job {job_id} in namespace {self.namespace} (purge={purge})")
|
||||
|
||||
# Get the Nomad address from the client
|
||||
nomad_addr = f"http://{self.client.host}:{self.client.port}"
|
||||
|
||||
# Build the URL for the job endpoint
|
||||
url = f"{nomad_addr}/v1/job/{job_id}"
|
||||
|
||||
# Set up headers
|
||||
headers = {}
|
||||
if hasattr(self.client, 'token') and self.client.token:
|
||||
headers["X-Nomad-Token"] = self.client.token
|
||||
|
||||
# Set up params with the correct namespace and purge option
|
||||
params = {
|
||||
"namespace": self.namespace,
|
||||
"purge": str(purge).lower()
|
||||
}
|
||||
|
||||
# Make the request directly
|
||||
import requests
|
||||
response = requests.delete(
|
||||
url=url,
|
||||
headers=headers,
|
||||
params=params,
|
||||
verify=False if os.getenv("NOMAD_SKIP_VERIFY", "false").lower() == "true" else True
|
||||
)
|
||||
|
||||
# Check if the request was successful
|
||||
if response.status_code == 200:
|
||||
response_data = response.json()
|
||||
logger.info(f"Job stop response: {response_data}")
|
||||
|
||||
return {
|
||||
"job_id": job_id,
|
||||
"eval_id": response_data.get("EvalID"),
|
||||
"status": "stopped",
|
||||
"namespace": self.namespace
|
||||
}
|
||||
else:
|
||||
raise ValueError(f"Failed to stop job: {response.text}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to stop job {job_id}: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to stop job: {str(e)}")
|
||||
|
||||
def get_allocations(self, job_id: str) -> List[Dict[str, Any]]:
|
||||
"""Get all allocations for a job."""
|
||||
try:
|
||||
# Get the Nomad address from the client
|
||||
nomad_addr = f"http://{self.client.host}:{self.client.port}"
|
||||
|
||||
# Build the URL for the job allocations endpoint
|
||||
url = f"{nomad_addr}/v1/job/{job_id}/allocations"
|
||||
|
||||
# Set up headers
|
||||
headers = {}
|
||||
if hasattr(self.client, 'token') and self.client.token:
|
||||
headers["X-Nomad-Token"] = self.client.token
|
||||
|
||||
# Set up params with the correct namespace
|
||||
params = {"namespace": self.namespace}
|
||||
|
||||
# Make the request directly
|
||||
import requests
|
||||
response = requests.get(
|
||||
url=url,
|
||||
headers=headers,
|
||||
params=params,
|
||||
verify=False if os.getenv("NOMAD_SKIP_VERIFY", "false").lower() == "true" else True
|
||||
)
|
||||
|
||||
# Check if the request was successful
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
elif response.status_code == 404:
|
||||
logger.warning(f"No allocations found for job {job_id}")
|
||||
return []
|
||||
else:
|
||||
raise ValueError(f"Failed to get allocations: {response.text}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get allocations for job {job_id}: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get allocations: {str(e)}")
|
||||
|
||||
def get_allocation_logs(self, alloc_id: str, task: str, log_type: str = "stderr") -> str:
|
||||
"""Get logs for a specific allocation and task."""
|
||||
try:
|
||||
# More detailed debugging to understand what's happening
|
||||
logger.info(f"Getting logs for allocation {alloc_id}, task {task}, type {log_type}")
|
||||
|
||||
if alloc_id == "repository":
|
||||
logger.error("Invalid allocation ID 'repository' detected")
|
||||
return f"Error: Invalid allocation ID 'repository'"
|
||||
|
||||
# Verify the allocation ID is a valid UUID (must be 36 characters)
|
||||
if not alloc_id or len(alloc_id) != 36:
|
||||
logger.error(f"Invalid allocation ID format: {alloc_id} (length: {len(alloc_id) if alloc_id else 0})")
|
||||
return f"Error: Invalid allocation ID format - must be 36 character UUID"
|
||||
|
||||
# Get allocation info to verify it exists
|
||||
try:
|
||||
allocation = self.client.allocation.get_allocation(alloc_id)
|
||||
if not allocation:
|
||||
logger.warning(f"Allocation {alloc_id} not found")
|
||||
return f"Allocation {alloc_id} not found"
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking allocation: {str(e)}")
|
||||
return f"Error checking allocation: {str(e)}"
|
||||
|
||||
# Try multiple approaches to get logs
|
||||
log_content = None
|
||||
error_messages = []
|
||||
|
||||
# Approach 1: Standard API
|
||||
try:
|
||||
logger.info(f"Attempting to get logs using standard API")
|
||||
logs = self.client.allocation.logs.get_logs(
|
||||
alloc_id,
|
||||
task,
|
||||
log_type,
|
||||
plain=True
|
||||
)
|
||||
|
||||
if logs:
|
||||
if isinstance(logs, dict) and logs.get("Data"):
|
||||
log_content = logs.get("Data")
|
||||
logger.info(f"Successfully retrieved logs using standard API")
|
||||
elif isinstance(logs, str):
|
||||
log_content = logs
|
||||
logger.info(f"Successfully retrieved logs as string")
|
||||
else:
|
||||
error_messages.append(f"Unexpected log format: {type(logs)}")
|
||||
logger.warning(f"Unexpected log format: {type(logs)}")
|
||||
else:
|
||||
error_messages.append("No logs returned from standard API")
|
||||
logger.warning("No logs returned from standard API")
|
||||
except Exception as e:
|
||||
error_str = str(e)
|
||||
error_messages.append(f"Standard API error: {error_str}")
|
||||
logger.warning(f"Standard API failed: {error_str}")
|
||||
|
||||
# Approach 2: Try raw HTTP if the standard API didn't work
|
||||
if not log_content:
|
||||
try:
|
||||
import requests
|
||||
|
||||
# Get the Nomad address from environment or use default
|
||||
nomad_addr = os.getenv("NOMAD_ADDR", "http://localhost:4646").rstrip('/')
|
||||
nomad_token = os.getenv("NOMAD_TOKEN")
|
||||
|
||||
# Construct the URL for logs
|
||||
logs_url = f"{nomad_addr}/v1/client/fs/logs/{alloc_id}"
|
||||
|
||||
# Setup headers
|
||||
headers = {}
|
||||
if nomad_token:
|
||||
headers["X-Nomad-Token"] = nomad_token
|
||||
|
||||
# Setup query parameters
|
||||
params = {
|
||||
"task": task,
|
||||
"type": log_type,
|
||||
"plain": "true"
|
||||
}
|
||||
|
||||
if self.namespace and self.namespace != "*":
|
||||
params["namespace"] = self.namespace
|
||||
|
||||
logger.info(f"Attempting to get logs using direct HTTP request to: {logs_url}")
|
||||
response = requests.get(logs_url, headers=headers, params=params, verify=False)
|
||||
|
||||
if response.status_code == 200:
|
||||
log_content = response.text
|
||||
logger.info(f"Successfully retrieved logs using direct HTTP request")
|
||||
else:
|
||||
error_messages.append(f"HTTP request failed with status {response.status_code}: {response.text}")
|
||||
logger.warning(f"HTTP request failed: {response.status_code} - {response.text}")
|
||||
except ImportError:
|
||||
error_messages.append("Requests library not available for fallback HTTP request")
|
||||
logger.warning("Requests library not available for fallback HTTP request")
|
||||
except Exception as e:
|
||||
error_str = str(e)
|
||||
error_messages.append(f"HTTP request error: {error_str}")
|
||||
logger.warning(f"HTTP request failed: {error_str}")
|
||||
|
||||
# Approach 3: Direct system call as a last resort
|
||||
if not log_content:
|
||||
try:
|
||||
import subprocess
|
||||
|
||||
# Get the Nomad command-line client path
|
||||
nomad_cmd = "nomad" # Default, assumes nomad is in PATH
|
||||
|
||||
# Build the command
|
||||
cmd_parts = [
|
||||
nomad_cmd,
|
||||
"alloc", "logs",
|
||||
"-verbose",
|
||||
]
|
||||
|
||||
# Add namespace if specified
|
||||
if self.namespace and self.namespace != "*":
|
||||
cmd_parts.extend(["-namespace", self.namespace])
|
||||
|
||||
# Add allocation and task info
|
||||
cmd_parts.extend(["-job", alloc_id, task])
|
||||
|
||||
# Use stderr or stdout
|
||||
if log_type == "stderr":
|
||||
cmd_parts.append("-stderr")
|
||||
else:
|
||||
cmd_parts.append("-stdout")
|
||||
|
||||
logger.info(f"Attempting to get logs using command: {' '.join(cmd_parts)}")
|
||||
process = subprocess.run(cmd_parts, capture_output=True, text=True)
|
||||
|
||||
if process.returncode == 0:
|
||||
log_content = process.stdout
|
||||
logger.info(f"Successfully retrieved logs using command-line client")
|
||||
else:
|
||||
error_messages.append(f"Command-line client failed: {process.stderr}")
|
||||
logger.warning(f"Command-line client failed: {process.stderr}")
|
||||
except Exception as e:
|
||||
error_str = str(e)
|
||||
error_messages.append(f"Command-line client error: {error_str}")
|
||||
logger.warning(f"Command-line client failed: {error_str}")
|
||||
|
||||
# Return the logs if we got them, otherwise return error
|
||||
if log_content:
|
||||
return log_content
|
||||
else:
|
||||
error_msg = "; ".join(error_messages)
|
||||
logger.error(f"Failed to get logs after multiple attempts: {error_msg}")
|
||||
return f"Error retrieving {log_type} logs: {error_msg}"
|
||||
|
||||
except Exception as e:
|
||||
error_str = str(e)
|
||||
logger.error(f"Failed to get logs for allocation {alloc_id}, task {task}: {error_str}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get logs: {error_str}")
|
||||
|
||||
def get_deployment_status(self, job_id: str) -> Dict[str, Any]:
|
||||
"""Get the deployment status for a job."""
|
||||
try:
|
||||
return self.client.job.get_deployment(job_id, namespace=self.namespace)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get deployment status for job {job_id}: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get deployment status: {str(e)}")
|
||||
|
||||
def get_job_evaluations(self, job_id: str) -> List[Dict[str, Any]]:
|
||||
"""Get evaluations for a job."""
|
||||
try:
|
||||
return self.client.job.get_evaluations(job_id, namespace=self.namespace)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get evaluations for job {job_id}: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get evaluations: {str(e)}")
|
Reference in New Issue
Block a user