Update README.md
This commit is contained in:
293
app/routers/logs.py
Normal file
293
app/routers/logs.py
Normal file
@ -0,0 +1,293 @@
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from typing import List, Dict, Any, Optional
|
||||
import logging
|
||||
|
||||
from app.services.nomad_client import NomadService
|
||||
from app.services.config_service import ConfigService
|
||||
|
||||
# Configure logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter()
|
||||
nomad_service = NomadService()
|
||||
config_service = ConfigService()
|
||||
|
||||
# More specific routes first
|
||||
@router.get("/repository/{repository}")
|
||||
async def get_repository_logs(
|
||||
repository: str,
|
||||
log_type: str = Query("stderr", description="Log type: stdout or stderr"),
|
||||
limit: int = Query(1, description="Number of allocations to return logs for"),
|
||||
plain_text: bool = Query(False, description="Return plain text logs instead of JSON")
|
||||
):
|
||||
"""Get logs for a repository's associated job."""
|
||||
# Get the job info for the repository
|
||||
job_info = config_service.get_job_from_repository(repository)
|
||||
if not job_info:
|
||||
raise HTTPException(status_code=404, detail=f"No job found for repository: {repository}")
|
||||
|
||||
job_id = job_info.get("job_id")
|
||||
namespace = job_info.get("namespace")
|
||||
|
||||
logger.info(f"Getting logs for job {job_id} in namespace {namespace}")
|
||||
|
||||
# Create a custom service with the specific namespace if provided
|
||||
custom_nomad = NomadService()
|
||||
if namespace:
|
||||
custom_nomad.namespace = namespace
|
||||
|
||||
# Get allocations for the job
|
||||
allocations = custom_nomad.get_allocations(job_id)
|
||||
if not allocations:
|
||||
raise HTTPException(status_code=404, detail=f"No allocations found for job {job_id}")
|
||||
|
||||
logger.info(f"Found {len(allocations)} allocations for job {job_id}")
|
||||
|
||||
# Sort allocations by creation time (descending)
|
||||
sorted_allocations = sorted(
|
||||
allocations,
|
||||
key=lambda a: a.get("CreateTime", 0),
|
||||
reverse=True
|
||||
)
|
||||
|
||||
# Limit the number of allocations
|
||||
allocations_to_check = sorted_allocations[:limit]
|
||||
|
||||
# Also get the job info to determine task names
|
||||
job = custom_nomad.get_job(job_id)
|
||||
|
||||
# Collect logs for each allocation and task
|
||||
result = []
|
||||
error_messages = []
|
||||
|
||||
for alloc in allocations_to_check:
|
||||
# Use the full UUID of the allocation
|
||||
alloc_id = alloc.get("ID")
|
||||
if not alloc_id:
|
||||
logger.warning(f"Allocation ID not found in allocation data")
|
||||
error_messages.append("Allocation ID not found in allocation data")
|
||||
continue
|
||||
|
||||
logger.info(f"Processing allocation {alloc_id} for job {job_id}")
|
||||
|
||||
# Get task name from the allocation's TaskStates
|
||||
task_states = alloc.get("TaskStates", {})
|
||||
if not task_states:
|
||||
logger.warning(f"No task states found in allocation {alloc_id}")
|
||||
error_messages.append(f"No task states found in allocation {alloc_id}")
|
||||
|
||||
for task_name, task_state in task_states.items():
|
||||
try:
|
||||
logger.info(f"Retrieving logs for allocation {alloc_id}, task {task_name}")
|
||||
|
||||
logs = custom_nomad.get_allocation_logs(alloc_id, task_name, log_type)
|
||||
|
||||
# Check if logs is an error message
|
||||
if logs and isinstance(logs, str):
|
||||
if logs.startswith("Error:") or logs.startswith("No "):
|
||||
logger.warning(f"Error retrieving logs for {task_name}: {logs}")
|
||||
error_messages.append(logs)
|
||||
continue
|
||||
|
||||
# Only add if we got some logs
|
||||
if logs:
|
||||
result.append({
|
||||
"alloc_id": alloc_id,
|
||||
"task": task_name,
|
||||
"type": log_type,
|
||||
"create_time": alloc.get("CreateTime"),
|
||||
"logs": logs
|
||||
})
|
||||
logger.info(f"Successfully retrieved logs for {task_name}")
|
||||
else:
|
||||
error_msg = f"No logs found for {task_name}"
|
||||
logger.warning(error_msg)
|
||||
error_messages.append(error_msg)
|
||||
except Exception as e:
|
||||
# Log but continue to try other tasks
|
||||
error_msg = f"Failed to get logs for {alloc_id}/{task_name}: {str(e)}"
|
||||
logger.error(error_msg)
|
||||
error_messages.append(error_msg)
|
||||
|
||||
# Return as plain text if requested
|
||||
if plain_text:
|
||||
if not result:
|
||||
if error_messages:
|
||||
return f"No logs found for this job. Errors: {'; '.join(error_messages)}"
|
||||
return "No logs found for this job"
|
||||
return "\n\n".join([f"=== {r.get('task')} ===\n{r.get('logs')}" for r in result])
|
||||
|
||||
# Otherwise return as JSON
|
||||
return {
|
||||
"job_id": job_id,
|
||||
"repository": repository,
|
||||
"namespace": namespace,
|
||||
"allocation_logs": result,
|
||||
"errors": error_messages if error_messages else None
|
||||
}
|
||||
|
||||
@router.get("/job/{job_id}")
|
||||
async def get_job_logs(
|
||||
job_id: str,
|
||||
namespace: str = Query(None, description="Nomad namespace"),
|
||||
log_type: str = Query("stderr", description="Log type: stdout or stderr"),
|
||||
limit: int = Query(1, description="Number of allocations to return logs for"),
|
||||
plain_text: bool = Query(False, description="Return plain text logs instead of JSON")
|
||||
):
|
||||
"""Get logs for the most recent allocations of a job."""
|
||||
# Create a custom service with the specific namespace if provided
|
||||
custom_nomad = NomadService()
|
||||
if namespace:
|
||||
custom_nomad.namespace = namespace
|
||||
logger.info(f"Getting logs for job {job_id} in namespace {namespace}")
|
||||
else:
|
||||
logger.info(f"Getting logs for job {job_id} in default namespace")
|
||||
|
||||
# Get all allocations for the job
|
||||
allocations = custom_nomad.get_allocations(job_id)
|
||||
if not allocations:
|
||||
raise HTTPException(status_code=404, detail=f"No allocations found for job {job_id}")
|
||||
|
||||
logger.info(f"Found {len(allocations)} allocations for job {job_id}")
|
||||
|
||||
# Sort allocations by creation time (descending)
|
||||
sorted_allocations = sorted(
|
||||
allocations,
|
||||
key=lambda a: a.get("CreateTime", 0),
|
||||
reverse=True
|
||||
)
|
||||
|
||||
# Limit the number of allocations
|
||||
allocations_to_check = sorted_allocations[:limit]
|
||||
|
||||
# Collect logs for each allocation and task
|
||||
result = []
|
||||
for alloc in allocations_to_check:
|
||||
alloc_id = alloc.get("ID")
|
||||
if not alloc_id:
|
||||
logger.warning(f"Allocation ID not found in allocation data")
|
||||
continue
|
||||
|
||||
logger.info(f"Processing allocation {alloc_id} for job {job_id}")
|
||||
|
||||
# Get task names from the allocation's TaskStates
|
||||
task_states = alloc.get("TaskStates", {})
|
||||
for task_name, task_state in task_states.items():
|
||||
try:
|
||||
logger.info(f"Retrieving logs for allocation {alloc_id}, task {task_name}")
|
||||
|
||||
logs = custom_nomad.get_allocation_logs(alloc_id, task_name, log_type)
|
||||
# Only add if we got some logs and not an error message
|
||||
if logs and not logs.startswith("No") and not logs.startswith("Error"):
|
||||
result.append({
|
||||
"alloc_id": alloc_id,
|
||||
"task": task_name,
|
||||
"type": log_type,
|
||||
"create_time": alloc.get("CreateTime"),
|
||||
"logs": logs
|
||||
})
|
||||
logger.info(f"Successfully retrieved logs for {task_name}")
|
||||
else:
|
||||
logger.warning(f"No logs found for {task_name}: {logs}")
|
||||
except Exception as e:
|
||||
# Log but continue to try other tasks
|
||||
logger.error(f"Failed to get logs for {alloc_id}/{task_name}: {str(e)}")
|
||||
|
||||
# Return as plain text if requested
|
||||
if plain_text:
|
||||
if not result:
|
||||
return "No logs found for this job"
|
||||
return "\n\n".join([f"=== {r.get('task')} ===\n{r.get('logs')}" for r in result])
|
||||
|
||||
# Otherwise return as JSON
|
||||
return {
|
||||
"job_id": job_id,
|
||||
"namespace": namespace,
|
||||
"allocation_logs": result
|
||||
}
|
||||
|
||||
@router.get("/latest/{job_id}")
|
||||
async def get_latest_allocation_logs(
|
||||
job_id: str,
|
||||
log_type: str = Query("stderr", description="Log type: stdout or stderr"),
|
||||
plain_text: bool = Query(False, description="Return plain text logs instead of JSON")
|
||||
):
|
||||
"""Get logs from the latest allocation of a job."""
|
||||
# Get all allocations for the job
|
||||
allocations = nomad_service.get_allocations(job_id)
|
||||
if not allocations:
|
||||
raise HTTPException(status_code=404, detail=f"No allocations found for job {job_id}")
|
||||
|
||||
# Sort allocations by creation time (descending)
|
||||
sorted_allocations = sorted(
|
||||
allocations,
|
||||
key=lambda a: a.get("CreateTime", 0),
|
||||
reverse=True
|
||||
)
|
||||
|
||||
# Get the latest allocation
|
||||
latest_alloc = sorted_allocations[0]
|
||||
alloc_id = latest_alloc.get("ID")
|
||||
|
||||
# Get task group and task information
|
||||
job = nomad_service.get_job(job_id)
|
||||
task_groups = job.get("TaskGroups", [])
|
||||
|
||||
# Collect logs for each task in the latest allocation
|
||||
result = []
|
||||
for task_group in task_groups:
|
||||
tasks = task_group.get("Tasks", [])
|
||||
for task in tasks:
|
||||
task_name = task.get("Name")
|
||||
try:
|
||||
logs = nomad_service.get_allocation_logs(alloc_id, task_name, log_type)
|
||||
result.append({
|
||||
"alloc_id": alloc_id,
|
||||
"task": task_name,
|
||||
"type": log_type,
|
||||
"create_time": latest_alloc.get("CreateTime"),
|
||||
"logs": logs
|
||||
})
|
||||
except Exception as e:
|
||||
# Skip if logs cannot be retrieved for this task
|
||||
pass
|
||||
|
||||
# Return as plain text if requested
|
||||
if plain_text:
|
||||
return "\n\n".join([f"=== {r['task']} ===\n{r['logs']}" for r in result])
|
||||
|
||||
# Otherwise return as JSON
|
||||
return {
|
||||
"job_id": job_id,
|
||||
"latest_allocation": alloc_id,
|
||||
"task_logs": result
|
||||
}
|
||||
|
||||
@router.get("/build/{job_id}")
|
||||
async def get_build_logs(job_id: str, plain_text: bool = Query(False)):
|
||||
"""Get build logs for a job (usually stderr logs from the latest allocation)."""
|
||||
# This is a convenience endpoint that returns stderr logs from the latest allocation
|
||||
return await get_latest_allocation_logs(job_id, "stderr", plain_text)
|
||||
|
||||
# Generic allocation logs route last
|
||||
@router.get("/allocation/{alloc_id}/{task}")
|
||||
async def get_allocation_logs(
|
||||
alloc_id: str,
|
||||
task: str,
|
||||
log_type: str = Query("stderr", description="Log type: stdout or stderr"),
|
||||
plain_text: bool = Query(False, description="Return plain text logs instead of JSON")
|
||||
):
|
||||
"""Get logs for a specific allocation and task."""
|
||||
# Validate log_type
|
||||
if log_type not in ["stdout", "stderr"]:
|
||||
raise HTTPException(status_code=400, detail="Log type must be stdout or stderr")
|
||||
|
||||
# Get logs from Nomad
|
||||
logs = nomad_service.get_allocation_logs(alloc_id, task, log_type)
|
||||
|
||||
# Return as plain text if requested
|
||||
if plain_text:
|
||||
return logs
|
||||
|
||||
# Otherwise return as JSON
|
||||
return {"alloc_id": alloc_id, "task": task, "type": log_type, "logs": logs}
|
Reference in New Issue
Block a user