Update README.md

This commit is contained in:
2025-02-26 15:25:39 +07:00
parent d6acf632e3
commit baf1723a50
69 changed files with 5525 additions and 0 deletions

1
app/routers/__init__.py Normal file
View File

@ -0,0 +1 @@
# Import routers

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

230
app/routers/claude.py Normal file
View File

@ -0,0 +1,230 @@
from fastapi import APIRouter, HTTPException, Body, Query, Depends
from typing import Dict, Any, List, Optional
import logging
import json
from app.services.nomad_client import NomadService
from app.schemas.claude_api import ClaudeJobRequest, ClaudeJobSpecification, ClaudeJobResponse
router = APIRouter()
logger = logging.getLogger(__name__)
@router.post("/jobs", response_model=ClaudeJobResponse)
async def manage_job(request: ClaudeJobRequest):
"""
Endpoint for Claude to manage Nomad jobs with a simplified interface.
This endpoint handles job operations like start, stop, restart, and status checks.
"""
try:
# Create a Nomad service instance with the specified namespace
nomad_service = NomadService()
if request.namespace:
nomad_service.namespace = request.namespace
# Handle different actions
if request.action.lower() == "status":
# Get job status
job = nomad_service.get_job(request.job_id)
# Get allocations for more detailed status
allocations = nomad_service.get_allocations(request.job_id)
latest_alloc = None
if allocations:
# Sort allocations by creation time (descending)
sorted_allocations = sorted(
allocations,
key=lambda a: a.get("CreateTime", 0),
reverse=True
)
latest_alloc = sorted_allocations[0]
return ClaudeJobResponse(
success=True,
job_id=request.job_id,
status=job.get("Status", "unknown"),
message=f"Job {request.job_id} is {job.get('Status', 'unknown')}",
details={
"job": job,
"latest_allocation": latest_alloc
}
)
elif request.action.lower() == "stop":
# Stop the job
result = nomad_service.stop_job(request.job_id, purge=request.purge)
return ClaudeJobResponse(
success=True,
job_id=request.job_id,
status="stopped",
message=f"Job {request.job_id} has been stopped" + (" and purged" if request.purge else ""),
details=result
)
elif request.action.lower() == "restart":
# Get the current job specification
job_spec = nomad_service.get_job(request.job_id)
# Stop the job
nomad_service.stop_job(request.job_id)
# Start the job with the original specification
result = nomad_service.start_job(job_spec)
return ClaudeJobResponse(
success=True,
job_id=request.job_id,
status="restarted",
message=f"Job {request.job_id} has been restarted",
details=result
)
else:
# Unknown action
raise HTTPException(status_code=400, detail=f"Unknown action: {request.action}")
except Exception as e:
logger.error(f"Error managing job {request.job_id}: {str(e)}")
return ClaudeJobResponse(
success=False,
job_id=request.job_id,
status="error",
message=f"Error: {str(e)}",
details=None
)
@router.post("/create-job", response_model=ClaudeJobResponse)
async def create_job(job_spec: ClaudeJobSpecification):
"""
Endpoint for Claude to create a new Nomad job with a simplified interface.
This endpoint allows creating a job with minimal configuration.
"""
try:
# Create a Nomad service instance with the specified namespace
nomad_service = NomadService()
if job_spec.namespace:
nomad_service.namespace = job_spec.namespace
# Convert the simplified job spec to Nomad format
nomad_job_spec = job_spec.to_nomad_job_spec()
# Start the job
result = nomad_service.start_job(nomad_job_spec)
return ClaudeJobResponse(
success=True,
job_id=job_spec.job_id,
status="started",
message=f"Job {job_spec.job_id} has been created and started",
details=result
)
except Exception as e:
logger.error(f"Error creating job {job_spec.job_id}: {str(e)}")
return ClaudeJobResponse(
success=False,
job_id=job_spec.job_id,
status="error",
message=f"Error: {str(e)}",
details=None
)
@router.get("/list-jobs", response_model=List[Dict[str, Any]])
async def list_jobs(namespace: str = Query("development")):
"""
List all jobs in the specified namespace.
Returns a simplified list of jobs with their IDs and statuses.
"""
try:
# Create a Nomad service instance with the specified namespace
nomad_service = NomadService()
nomad_service.namespace = namespace
# Get all jobs
jobs = nomad_service.list_jobs()
# Return a simplified list
simplified_jobs = []
for job in jobs:
simplified_jobs.append({
"id": job.get("ID"),
"name": job.get("Name"),
"status": job.get("Status"),
"type": job.get("Type"),
"namespace": namespace
})
return simplified_jobs
except Exception as e:
logger.error(f"Error listing jobs: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error listing jobs: {str(e)}")
@router.get("/job-logs/{job_id}", response_model=Dict[str, Any])
async def get_job_logs(job_id: str, namespace: str = Query("development")):
"""
Get logs for a job.
Returns logs from the latest allocation of the job.
"""
try:
# Create a Nomad service instance with the specified namespace
nomad_service = NomadService()
nomad_service.namespace = namespace
# Get allocations for the job
allocations = nomad_service.get_allocations(job_id)
if not allocations:
return {
"success": False,
"job_id": job_id,
"message": f"No allocations found for job {job_id}",
"logs": None
}
# Sort allocations by creation time (descending)
sorted_allocations = sorted(
allocations,
key=lambda a: a.get("CreateTime", 0),
reverse=True
)
latest_alloc = sorted_allocations[0]
alloc_id = latest_alloc.get("ID")
# Get the task name from the allocation
task_name = None
if "TaskStates" in latest_alloc:
task_states = latest_alloc["TaskStates"]
if task_states:
task_name = next(iter(task_states.keys()))
if not task_name:
task_name = "app" # Default task name
# Get logs for the allocation
stdout_logs = nomad_service.get_allocation_logs(alloc_id, task_name, "stdout")
stderr_logs = nomad_service.get_allocation_logs(alloc_id, task_name, "stderr")
return {
"success": True,
"job_id": job_id,
"allocation_id": alloc_id,
"task_name": task_name,
"message": f"Retrieved logs for job {job_id}",
"logs": {
"stdout": stdout_logs,
"stderr": stderr_logs
}
}
except Exception as e:
logger.error(f"Error getting logs for job {job_id}: {str(e)}")
return {
"success": False,
"job_id": job_id,
"message": f"Error getting logs: {str(e)}",
"logs": None
}

80
app/routers/configs.py Normal file
View File

@ -0,0 +1,80 @@
from fastapi import APIRouter, HTTPException, Body, Path
from typing import List, Dict, Any
import json
from app.services.config_service import ConfigService
from app.schemas.config import ConfigCreate, ConfigUpdate, ConfigResponse
router = APIRouter()
config_service = ConfigService()
@router.get("/", response_model=List[ConfigResponse])
async def list_configs():
"""List all available configurations."""
return config_service.list_configs()
@router.get("/{name}", response_model=ConfigResponse)
async def get_config(name: str = Path(..., description="Configuration name")):
"""Get a specific configuration by name."""
return config_service.get_config(name)
@router.post("/", response_model=ConfigResponse, status_code=201)
async def create_config(config_data: ConfigCreate):
"""Create a new configuration."""
return config_service.create_config(config_data.name, config_data.dict(exclude={"name"}))
@router.put("/{name}", response_model=ConfigResponse)
async def update_config(name: str, config_data: ConfigUpdate):
"""Update an existing configuration."""
return config_service.update_config(name, config_data.dict(exclude_unset=True))
@router.delete("/{name}", response_model=Dict[str, Any])
async def delete_config(name: str = Path(..., description="Configuration name")):
"""Delete a configuration."""
return config_service.delete_config(name)
@router.get("/repository/{repository}")
async def get_config_by_repository(repository: str):
"""Find configuration by repository."""
configs = config_service.list_configs()
for config in configs:
if config.get("repository") == repository:
return config
raise HTTPException(status_code=404, detail=f"No configuration found for repository: {repository}")
@router.get("/job/{job_id}")
async def get_config_by_job(job_id: str):
"""Find configuration by job ID."""
configs = config_service.list_configs()
for config in configs:
if config.get("job_id") == job_id:
return config
raise HTTPException(status_code=404, detail=f"No configuration found for job_id: {job_id}")
@router.post("/link")
async def link_repository_to_job(
repository: str = Body(..., embed=True),
job_id: str = Body(..., embed=True),
name: str = Body(None, embed=True)
):
"""Link a repository to a job."""
# Generate a name if not provided
if not name:
name = f"{job_id.lower().replace('/', '_').replace(' ', '_')}"
# Create the config
config = {
"repository": repository,
"job_id": job_id,
}
return config_service.create_config(name, config)
@router.post("/unlink/{name}")
async def unlink_repository_from_job(name: str):
"""Unlink a repository from a job by deleting the configuration."""
return config_service.delete_config(name)

396
app/routers/jobs.py Normal file
View File

@ -0,0 +1,396 @@
from fastapi import APIRouter, Depends, HTTPException, Body, Query
from typing import Dict, Any, List, Optional
import json
import logging
from app.services.nomad_client import NomadService
from app.services.config_service import ConfigService
from app.schemas.job import JobResponse, JobOperation, JobSpecification
router = APIRouter()
nomad_service = NomadService()
config_service = ConfigService()
# Configure logging
logger = logging.getLogger(__name__)
@router.get("/", response_model=List[JobResponse])
async def list_jobs():
"""List all jobs."""
jobs = nomad_service.list_jobs()
# Enhance job responses with repository information if available
for job in jobs:
job_id = job.get("ID")
if job_id:
repository = config_service.get_repository_from_job(job_id)
if repository:
job["repository"] = repository
return jobs
@router.get("/{job_id}", response_model=JobResponse)
async def get_job(job_id: str):
"""Get a job by ID."""
job = nomad_service.get_job(job_id)
# Add repository information if available
repository = config_service.get_repository_from_job(job_id)
if repository:
job["repository"] = repository
return job
@router.post("/", response_model=JobOperation)
async def start_job(job_spec: JobSpecification = Body(...)):
"""Start a Nomad job with the provided specification."""
return nomad_service.start_job(job_spec.dict())
@router.delete("/{job_id}", response_model=JobOperation)
async def stop_job(job_id: str, purge: bool = Query(False)):
"""Stop a job by ID."""
return nomad_service.stop_job(job_id, purge)
@router.get("/{job_id}/allocations")
async def get_job_allocations(job_id: str):
"""Get all allocations for a job."""
return nomad_service.get_allocations(job_id)
@router.get("/{job_id}/latest-allocation")
async def get_latest_allocation(job_id: str):
"""Get the latest allocation for a job."""
allocations = nomad_service.get_allocations(job_id)
if not allocations:
raise HTTPException(status_code=404, detail=f"No allocations found for job {job_id}")
# Sort allocations by creation time (descending)
sorted_allocations = sorted(
allocations,
key=lambda a: a.get("CreateTime", 0),
reverse=True
)
return sorted_allocations[0]
@router.get("/{job_id}/status")
async def get_job_status(job_id: str, namespace: str = Query(None, description="Nomad namespace")):
"""Get the current status of a job, including deployment and latest allocation."""
try:
# Create a custom service with the specific namespace if provided
custom_nomad = NomadService()
if namespace:
custom_nomad.namespace = namespace
logger.info(f"Getting job status for {job_id} in namespace {namespace}")
else:
logger.info(f"Getting job status for {job_id} in default namespace (development)")
job = custom_nomad.get_job(job_id)
status = {
"job_id": job_id,
"namespace": namespace or custom_nomad.namespace,
"status": job.get("Status", "unknown"),
"stable": job.get("Stable", False),
"submitted_at": job.get("SubmitTime", 0),
}
# Get the latest deployment if any
try:
deployment = custom_nomad.get_deployment_status(job_id)
if deployment:
status["deployment"] = {
"id": deployment.get("ID"),
"status": deployment.get("Status"),
"description": deployment.get("StatusDescription"),
}
except Exception as e:
logger.warning(f"Failed to get deployment for job {job_id}: {str(e)}")
pass # Deployment info is optional
# Get the latest allocation if any
try:
allocations = custom_nomad.get_allocations(job_id)
if allocations:
sorted_allocations = sorted(
allocations,
key=lambda a: a.get("CreateTime", 0),
reverse=True
)
latest_alloc = sorted_allocations[0]
status["latest_allocation"] = {
"id": latest_alloc.get("ID"),
"status": latest_alloc.get("ClientStatus"),
"description": latest_alloc.get("ClientDescription", ""),
"created_at": latest_alloc.get("CreateTime", 0),
}
except Exception as e:
logger.warning(f"Failed to get allocations for job {job_id}: {str(e)}")
pass # Allocation info is optional
return status
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to get job status: {str(e)}")
@router.get("/{job_id}/specification")
async def get_job_specification(job_id: str, namespace: str = Query(None, description="Nomad namespace"), raw: bool = Query(False)):
"""Get the job specification for a job."""
try:
# Create a custom service with the specific namespace if provided
custom_nomad = NomadService()
if namespace:
custom_nomad.namespace = namespace
logger.info(f"Getting job specification for {job_id} in namespace {namespace}")
else:
logger.info(f"Getting job specification for {job_id} in default namespace (development)")
job = custom_nomad.get_job(job_id)
if raw:
return job
# Extract just the job specification part if present
if "JobID" in job:
job_spec = {
"id": job.get("ID"),
"name": job.get("Name"),
"type": job.get("Type"),
"status": job.get("Status"),
"datacenters": job.get("Datacenters", []),
"namespace": job.get("Namespace"),
"task_groups": job.get("TaskGroups", []),
"meta": job.get("Meta", {}),
}
return job_spec
return job
except Exception as e:
raise HTTPException(status_code=404, detail=f"Failed to get job specification: {str(e)}")
@router.post("/{job_id}/restart")
async def restart_job(job_id: str):
"""Restart a job by stopping it and starting it again."""
try:
# Get the current job specification
job_spec = nomad_service.get_job(job_id)
# Stop the job
nomad_service.stop_job(job_id)
# Start the job with the original specification
result = nomad_service.start_job(job_spec)
return {
"job_id": job_id,
"status": "restarted",
"eval_id": result.get("eval_id"),
}
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to restart job: {str(e)}")
@router.get("/by-repository/{repository}")
async def get_job_by_repository(repository: str):
"""Get job information by repository URL or name."""
job_info = config_service.get_job_from_repository(repository)
if not job_info:
raise HTTPException(status_code=404, detail=f"No job found for repository: {repository}")
job_id = job_info.get("job_id")
namespace = job_info.get("namespace")
# Get the job using the specific namespace if provided
try:
if namespace:
# Override the default namespace with the specific one
custom_nomad = NomadService()
custom_nomad.namespace = namespace
job = custom_nomad.get_job(job_id)
else:
# Use the default namespace settings
job = nomad_service.get_job(job_id)
# Add repository information
job["repository"] = repository
return job
except Exception as e:
raise HTTPException(status_code=404, detail=f"Job not found: {job_id}, Error: {str(e)}")
@router.post("/by-repository/{repository}/start")
async def start_job_by_repository(repository: str):
"""Start a job by its associated repository."""
logger = logging.getLogger(__name__)
job_info = config_service.get_job_from_repository(repository)
if not job_info:
raise HTTPException(status_code=404, detail=f"No job found for repository: {repository}")
job_id = job_info.get("job_id")
namespace = job_info.get("namespace")
logger.info(f"Starting job for repository {repository}, job_id: {job_id}, namespace: {namespace}")
# Create a custom service with the specific namespace if provided
custom_nomad = NomadService()
if namespace:
logger.info(f"Setting custom_nomad.namespace to {namespace}")
custom_nomad.namespace = namespace
# Log the current namespace being used
logger.info(f"Nomad client namespace: {custom_nomad.namespace}")
try:
# Get the job specification from an existing job
job_spec = custom_nomad.get_job(job_id)
# Log the job specification
logger.info(f"Retrieved job specification for {job_id} from existing job")
# Ensure namespace is set in job spec
if isinstance(job_spec, dict):
# Ensure namespace is explicitly set
if namespace:
logger.info(f"Setting namespace in job spec to {namespace}")
job_spec["Namespace"] = namespace
# Log the keys in the job specification
logger.info(f"Job spec keys: {job_spec.keys()}")
# Start the job with the retrieved specification
result = custom_nomad.start_job(job_spec)
return {
"job_id": job_id,
"repository": repository,
"status": "started",
"eval_id": result.get("eval_id"),
"namespace": namespace
}
except HTTPException as e:
# If job not found, try to get spec from config
if e.status_code == 404:
logger.info(f"Job {job_id} not found, attempting to get specification from config")
# Try to get job spec from repository config
job_spec = config_service.get_job_spec_from_repository(repository)
if not job_spec:
logger.warning(f"No job specification found for repository {repository}, creating a default one")
# Create a simple default job spec if none exists
job_spec = {
"ID": job_id,
"Name": job_id,
"Type": "service",
"Datacenters": ["jm"], # Default datacenter
"TaskGroups": [
{
"Name": "app",
"Count": 1,
"Tasks": [
{
"Name": job_id.split('-')[0], # Use first part of job ID as task name
"Driver": "docker",
"Config": {
"image": f"registry.dev.meisheng.group/{repository}:latest",
"force_pull": True,
"ports": ["http"]
},
"Resources": {
"CPU": 500,
"MemoryMB": 512
}
}
],
"Networks": [
{
"DynamicPorts": [
{
"Label": "http",
"Value": 0,
"To": 8000
}
]
}
]
}
],
"Meta": {
"repository": repository
}
}
# Set the namespace explicitly in the job spec
if namespace:
logger.info(f"Setting namespace in default job spec to {namespace}")
job_spec["Namespace"] = namespace
logger.info(f"Starting job {job_id} with specification")
# Log the job specification structure
if isinstance(job_spec, dict):
logger.info(f"Job spec keys: {job_spec.keys()}")
if "Namespace" in job_spec:
logger.info(f"Job spec namespace: {job_spec['Namespace']}")
# Start the job with the specification
result = custom_nomad.start_job(job_spec)
return {
"job_id": job_id,
"repository": repository,
"status": "started",
"eval_id": result.get("eval_id"),
"namespace": namespace
}
@router.post("/by-repository/{repository}/stop")
async def stop_job_by_repository(repository: str, purge: bool = Query(False)):
"""Stop a job by its associated repository."""
job_info = config_service.get_job_from_repository(repository)
if not job_info:
raise HTTPException(status_code=404, detail=f"No job found for repository: {repository}")
job_id = job_info.get("job_id")
namespace = job_info.get("namespace")
# Create a custom service with the specific namespace if provided
custom_nomad = NomadService()
if namespace:
custom_nomad.namespace = namespace
# Stop the job
result = custom_nomad.stop_job(job_id, purge)
return {
"job_id": job_id,
"repository": repository,
"status": "stopped",
"eval_id": result.get("eval_id"),
"namespace": namespace
}
@router.post("/by-repository/{repository}/restart")
async def restart_job_by_repository(repository: str):
"""Restart a job by its associated repository."""
job_info = config_service.get_job_from_repository(repository)
if not job_info:
raise HTTPException(status_code=404, detail=f"No job found for repository: {repository}")
job_id = job_info.get("job_id")
namespace = job_info.get("namespace")
# Create a custom service with the specific namespace if provided
custom_nomad = NomadService()
if namespace:
custom_nomad.namespace = namespace
# Get the job specification
job_spec = custom_nomad.get_job(job_id)
# Stop the job first
custom_nomad.stop_job(job_id)
# Start the job with the original specification
result = custom_nomad.start_job(job_spec)
return {
"job_id": job_id,
"repository": repository,
"status": "restarted",
"eval_id": result.get("eval_id"),
"namespace": namespace
}

293
app/routers/logs.py Normal file
View File

@ -0,0 +1,293 @@
from fastapi import APIRouter, HTTPException, Query
from typing import List, Dict, Any, Optional
import logging
from app.services.nomad_client import NomadService
from app.services.config_service import ConfigService
# Configure logging
logger = logging.getLogger(__name__)
router = APIRouter()
nomad_service = NomadService()
config_service = ConfigService()
# More specific routes first
@router.get("/repository/{repository}")
async def get_repository_logs(
repository: str,
log_type: str = Query("stderr", description="Log type: stdout or stderr"),
limit: int = Query(1, description="Number of allocations to return logs for"),
plain_text: bool = Query(False, description="Return plain text logs instead of JSON")
):
"""Get logs for a repository's associated job."""
# Get the job info for the repository
job_info = config_service.get_job_from_repository(repository)
if not job_info:
raise HTTPException(status_code=404, detail=f"No job found for repository: {repository}")
job_id = job_info.get("job_id")
namespace = job_info.get("namespace")
logger.info(f"Getting logs for job {job_id} in namespace {namespace}")
# Create a custom service with the specific namespace if provided
custom_nomad = NomadService()
if namespace:
custom_nomad.namespace = namespace
# Get allocations for the job
allocations = custom_nomad.get_allocations(job_id)
if not allocations:
raise HTTPException(status_code=404, detail=f"No allocations found for job {job_id}")
logger.info(f"Found {len(allocations)} allocations for job {job_id}")
# Sort allocations by creation time (descending)
sorted_allocations = sorted(
allocations,
key=lambda a: a.get("CreateTime", 0),
reverse=True
)
# Limit the number of allocations
allocations_to_check = sorted_allocations[:limit]
# Also get the job info to determine task names
job = custom_nomad.get_job(job_id)
# Collect logs for each allocation and task
result = []
error_messages = []
for alloc in allocations_to_check:
# Use the full UUID of the allocation
alloc_id = alloc.get("ID")
if not alloc_id:
logger.warning(f"Allocation ID not found in allocation data")
error_messages.append("Allocation ID not found in allocation data")
continue
logger.info(f"Processing allocation {alloc_id} for job {job_id}")
# Get task name from the allocation's TaskStates
task_states = alloc.get("TaskStates", {})
if not task_states:
logger.warning(f"No task states found in allocation {alloc_id}")
error_messages.append(f"No task states found in allocation {alloc_id}")
for task_name, task_state in task_states.items():
try:
logger.info(f"Retrieving logs for allocation {alloc_id}, task {task_name}")
logs = custom_nomad.get_allocation_logs(alloc_id, task_name, log_type)
# Check if logs is an error message
if logs and isinstance(logs, str):
if logs.startswith("Error:") or logs.startswith("No "):
logger.warning(f"Error retrieving logs for {task_name}: {logs}")
error_messages.append(logs)
continue
# Only add if we got some logs
if logs:
result.append({
"alloc_id": alloc_id,
"task": task_name,
"type": log_type,
"create_time": alloc.get("CreateTime"),
"logs": logs
})
logger.info(f"Successfully retrieved logs for {task_name}")
else:
error_msg = f"No logs found for {task_name}"
logger.warning(error_msg)
error_messages.append(error_msg)
except Exception as e:
# Log but continue to try other tasks
error_msg = f"Failed to get logs for {alloc_id}/{task_name}: {str(e)}"
logger.error(error_msg)
error_messages.append(error_msg)
# Return as plain text if requested
if plain_text:
if not result:
if error_messages:
return f"No logs found for this job. Errors: {'; '.join(error_messages)}"
return "No logs found for this job"
return "\n\n".join([f"=== {r.get('task')} ===\n{r.get('logs')}" for r in result])
# Otherwise return as JSON
return {
"job_id": job_id,
"repository": repository,
"namespace": namespace,
"allocation_logs": result,
"errors": error_messages if error_messages else None
}
@router.get("/job/{job_id}")
async def get_job_logs(
job_id: str,
namespace: str = Query(None, description="Nomad namespace"),
log_type: str = Query("stderr", description="Log type: stdout or stderr"),
limit: int = Query(1, description="Number of allocations to return logs for"),
plain_text: bool = Query(False, description="Return plain text logs instead of JSON")
):
"""Get logs for the most recent allocations of a job."""
# Create a custom service with the specific namespace if provided
custom_nomad = NomadService()
if namespace:
custom_nomad.namespace = namespace
logger.info(f"Getting logs for job {job_id} in namespace {namespace}")
else:
logger.info(f"Getting logs for job {job_id} in default namespace")
# Get all allocations for the job
allocations = custom_nomad.get_allocations(job_id)
if not allocations:
raise HTTPException(status_code=404, detail=f"No allocations found for job {job_id}")
logger.info(f"Found {len(allocations)} allocations for job {job_id}")
# Sort allocations by creation time (descending)
sorted_allocations = sorted(
allocations,
key=lambda a: a.get("CreateTime", 0),
reverse=True
)
# Limit the number of allocations
allocations_to_check = sorted_allocations[:limit]
# Collect logs for each allocation and task
result = []
for alloc in allocations_to_check:
alloc_id = alloc.get("ID")
if not alloc_id:
logger.warning(f"Allocation ID not found in allocation data")
continue
logger.info(f"Processing allocation {alloc_id} for job {job_id}")
# Get task names from the allocation's TaskStates
task_states = alloc.get("TaskStates", {})
for task_name, task_state in task_states.items():
try:
logger.info(f"Retrieving logs for allocation {alloc_id}, task {task_name}")
logs = custom_nomad.get_allocation_logs(alloc_id, task_name, log_type)
# Only add if we got some logs and not an error message
if logs and not logs.startswith("No") and not logs.startswith("Error"):
result.append({
"alloc_id": alloc_id,
"task": task_name,
"type": log_type,
"create_time": alloc.get("CreateTime"),
"logs": logs
})
logger.info(f"Successfully retrieved logs for {task_name}")
else:
logger.warning(f"No logs found for {task_name}: {logs}")
except Exception as e:
# Log but continue to try other tasks
logger.error(f"Failed to get logs for {alloc_id}/{task_name}: {str(e)}")
# Return as plain text if requested
if plain_text:
if not result:
return "No logs found for this job"
return "\n\n".join([f"=== {r.get('task')} ===\n{r.get('logs')}" for r in result])
# Otherwise return as JSON
return {
"job_id": job_id,
"namespace": namespace,
"allocation_logs": result
}
@router.get("/latest/{job_id}")
async def get_latest_allocation_logs(
job_id: str,
log_type: str = Query("stderr", description="Log type: stdout or stderr"),
plain_text: bool = Query(False, description="Return plain text logs instead of JSON")
):
"""Get logs from the latest allocation of a job."""
# Get all allocations for the job
allocations = nomad_service.get_allocations(job_id)
if not allocations:
raise HTTPException(status_code=404, detail=f"No allocations found for job {job_id}")
# Sort allocations by creation time (descending)
sorted_allocations = sorted(
allocations,
key=lambda a: a.get("CreateTime", 0),
reverse=True
)
# Get the latest allocation
latest_alloc = sorted_allocations[0]
alloc_id = latest_alloc.get("ID")
# Get task group and task information
job = nomad_service.get_job(job_id)
task_groups = job.get("TaskGroups", [])
# Collect logs for each task in the latest allocation
result = []
for task_group in task_groups:
tasks = task_group.get("Tasks", [])
for task in tasks:
task_name = task.get("Name")
try:
logs = nomad_service.get_allocation_logs(alloc_id, task_name, log_type)
result.append({
"alloc_id": alloc_id,
"task": task_name,
"type": log_type,
"create_time": latest_alloc.get("CreateTime"),
"logs": logs
})
except Exception as e:
# Skip if logs cannot be retrieved for this task
pass
# Return as plain text if requested
if plain_text:
return "\n\n".join([f"=== {r['task']} ===\n{r['logs']}" for r in result])
# Otherwise return as JSON
return {
"job_id": job_id,
"latest_allocation": alloc_id,
"task_logs": result
}
@router.get("/build/{job_id}")
async def get_build_logs(job_id: str, plain_text: bool = Query(False)):
"""Get build logs for a job (usually stderr logs from the latest allocation)."""
# This is a convenience endpoint that returns stderr logs from the latest allocation
return await get_latest_allocation_logs(job_id, "stderr", plain_text)
# Generic allocation logs route last
@router.get("/allocation/{alloc_id}/{task}")
async def get_allocation_logs(
alloc_id: str,
task: str,
log_type: str = Query("stderr", description="Log type: stdout or stderr"),
plain_text: bool = Query(False, description="Return plain text logs instead of JSON")
):
"""Get logs for a specific allocation and task."""
# Validate log_type
if log_type not in ["stdout", "stderr"]:
raise HTTPException(status_code=400, detail="Log type must be stdout or stderr")
# Get logs from Nomad
logs = nomad_service.get_allocation_logs(alloc_id, task, log_type)
# Return as plain text if requested
if plain_text:
return logs
# Otherwise return as JSON
return {"alloc_id": alloc_id, "task": task, "type": log_type, "logs": logs}

View File

@ -0,0 +1,89 @@
from fastapi import APIRouter, HTTPException, Query
from typing import List, Dict, Any, Optional
from app.services.gitea_client import GiteaClient
from app.services.config_service import ConfigService
router = APIRouter()
gitea_client = GiteaClient()
config_service = ConfigService()
@router.get("/")
async def list_repositories(limit: int = Query(100, description="Maximum number of repositories to return")):
"""
List all available repositories from Gitea.
If Gitea integration is not configured, returns an empty list.
"""
repositories = gitea_client.list_repositories(limit)
# Enhance with linked job information
for repo in repositories:
# Create a URL from clone_url
repo_url = repo.get("clone_url")
if repo_url:
# Check if repository is linked to a job
configs = config_service.list_configs()
for config in configs:
if config.get("repository") == repo_url:
repo["linked_job"] = config.get("job_id")
repo["config_name"] = config.get("name")
break
return repositories
@router.get("/{repository}")
async def get_repository_info(repository: str):
"""
Get information about a specific repository.
The repository parameter can be a repository URL or a repository alias.
If it's a repository URL, we'll get the info directly from Gitea.
If it's a repository alias, we'll get the info from the configuration and then from Gitea.
"""
# First check if it's a repository URL
repo_info = gitea_client.get_repository_info(repository)
if repo_info:
# Check if repository is linked to a job
configs = config_service.list_configs()
for config in configs:
if config.get("repository") == repository:
repo_info["linked_job"] = config.get("job_id")
repo_info["config_name"] = config.get("name")
repo_info["config"] = config
break
return repo_info
else:
# Check if it's a repository alias in our configs
config = config_service.get_config_by_repository(repository)
if config:
repo_url = config.get("repository")
repo_info = gitea_client.get_repository_info(repo_url)
if repo_info:
repo_info["linked_job"] = config.get("job_id")
repo_info["config_name"] = config.get("name")
repo_info["config"] = config
return repo_info
raise HTTPException(status_code=404, detail=f"Repository not found: {repository}")
@router.get("/{repository}/branches")
async def get_repository_branches(repository: str):
"""
Get branches for a specific repository.
The repository parameter can be a repository URL or a repository alias.
"""
# If it's a repository alias, get the actual URL
config = config_service.get_config_by_repository(repository)
if config:
repository = config.get("repository")
branches = gitea_client.get_repository_branches(repository)
if not branches:
raise HTTPException(status_code=404, detail=f"No branches found for repository: {repository}")
return branches