diff --git a/.env b/.env new file mode 100644 index 0000000..85250b5 --- /dev/null +++ b/.env @@ -0,0 +1,26 @@ +# Nomad connection settings +NOMAD_ADDR=http://pjmldk01.ds.meisheng.group:4646 +NOMAD_TOKEN= +NOMAD_SKIP_VERIFY=true +NOMAD_NAMESPACE=development + +# Gitea API configuration +GITEA_API_URL=https://gitea.dev.meisheng.group/api/v1 +GITEA_API_TOKEN=a2de6c0014e6d0108edb94fb7d524777bb75d33a +# Alternative authentication (uncomment if needed) +# GITEA_USERNAME=your-gitea-username +# GITEA_PASSWORD=your-gitea-password +GITEA_VERIFY_SSL=false + +# API settings +PORT=8000 +HOST=0.0.0.0 + +# Configuration directory +CONFIG_DIR=./configs + +# Logging level +LOG_LEVEL=INFO + +# Enable to make development easier +RELOAD=true \ No newline at end of file diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..b1f0627 --- /dev/null +++ b/.env.example @@ -0,0 +1,22 @@ +# Nomad connection settings +NOMAD_ADDR=http://localhost:4646 +NOMAD_TOKEN= +NOMAD_SKIP_VERIFY=false + +# Gitea API configuration +GITEA_API_URL=http://gitea.internal.example.com/api/v1 +GITEA_API_TOKEN= +# Alternative authentication (if token is not available) +# GITEA_USERNAME= +# GITEA_PASSWORD= +GITEA_VERIFY_SSL=false + +# API settings +PORT=8000 +HOST=0.0.0.0 + +# Configuration directory +CONFIG_DIR=./configs + +# Optional: Logging level +LOG_LEVEL=INFO \ No newline at end of file diff --git a/CLAUDE_API_INTEGRATION.md b/CLAUDE_API_INTEGRATION.md new file mode 100644 index 0000000..72fefc8 --- /dev/null +++ b/CLAUDE_API_INTEGRATION.md @@ -0,0 +1,249 @@ +# Claude Integration with Nomad MCP + +This document explains how to configure Claude to connect to the Nomad MCP service and manage jobs. + +## Overview + +The Nomad MCP service provides a simplified REST API specifically designed for Claude to interact with Nomad jobs. This API allows Claude to: + +1. List all jobs in a namespace +2. Get the status of a specific job +3. Start, stop, and restart jobs +4. Create new jobs with a simplified specification +5. Retrieve logs from jobs + +## API Endpoints + +The Claude-specific API is available at the `/api/claude` prefix. The following endpoints are available: + +### List Jobs + +``` +GET /api/claude/list-jobs?namespace=development +``` + +Returns a list of all jobs in the specified namespace with their IDs, names, statuses, and types. + +### Manage Jobs + +``` +POST /api/claude/jobs +``` + +Manages existing jobs with operations like status check, stop, and restart. + +Request body: +```json +{ + "job_id": "example-job", + "action": "status|stop|restart", + "namespace": "development", + "purge": false +} +``` + +### Create Jobs + +``` +POST /api/claude/create-job +``` + +Creates a new job with a simplified specification. + +Request body: +```json +{ + "job_id": "example-job", + "name": "Example Job", + "type": "service", + "datacenters": ["jm"], + "namespace": "development", + "docker_image": "nginx:latest", + "count": 1, + "cpu": 100, + "memory": 128, + "ports": [ + { + "Label": "http", + "Value": 0, + "To": 80 + } + ], + "env_vars": { + "ENV_VAR1": "value1", + "ENV_VAR2": "value2" + } +} +``` + +### Get Job Logs + +``` +GET /api/claude/job-logs/{job_id}?namespace=development +``` + +Retrieves logs from the latest allocation of the specified job. + +## Configuring Claude Desktop Application + +To configure Claude to connect to the Nomad MCP service, follow these steps: + +### 1. Set Up API Access + +Claude needs to be configured with the base URL of your Nomad MCP service. This is typically: + +``` +http://your-server-address:8000 +``` + +### 2. Create a Claude Tool Configuration + +In the Claude desktop application, you can create a custom tool configuration that allows Claude to interact with the Nomad MCP API. Here's a sample configuration: + +```json +{ + "tools": [ + { + "name": "nomad_mcp", + "description": "Manage Nomad jobs through the MCP service", + "api_endpoints": [ + { + "name": "list_jobs", + "description": "List all jobs in a namespace", + "method": "GET", + "url": "http://your-server-address:8000/api/claude/list-jobs", + "params": [ + { + "name": "namespace", + "type": "string", + "description": "Nomad namespace", + "required": false, + "default": "development" + } + ] + }, + { + "name": "manage_job", + "description": "Manage a job (status, stop, restart)", + "method": "POST", + "url": "http://your-server-address:8000/api/claude/jobs", + "body": { + "job_id": "string", + "action": "string", + "namespace": "string", + "purge": "boolean" + } + }, + { + "name": "create_job", + "description": "Create a new job", + "method": "POST", + "url": "http://your-server-address:8000/api/claude/create-job", + "body": { + "job_id": "string", + "name": "string", + "type": "string", + "datacenters": "array", + "namespace": "string", + "docker_image": "string", + "count": "integer", + "cpu": "integer", + "memory": "integer", + "ports": "array", + "env_vars": "object" + } + }, + { + "name": "get_job_logs", + "description": "Get logs for a job", + "method": "GET", + "url": "http://your-server-address:8000/api/claude/job-logs/{job_id}", + "params": [ + { + "name": "namespace", + "type": "string", + "description": "Nomad namespace", + "required": false, + "default": "development" + } + ] + } + ] + } + ] +} +``` + +### 3. Import the Tool Configuration + +1. Open the Claude desktop application +2. Go to Settings > Tools +3. Click "Import Tool Configuration" +4. Select the JSON file with the above configuration +5. Click "Save" + +### 4. Test the Connection + +You can test the connection by asking Claude to list all jobs: + +``` +Please list all jobs in the development namespace using the Nomad MCP service. +``` + +Claude should use the configured tool to make an API request to the Nomad MCP service and return the list of jobs. + +## Example Prompts for Claude + +Here are some example prompts you can use with Claude to interact with the Nomad MCP service: + +### List Jobs + +``` +Please list all jobs in the development namespace. +``` + +### Check Job Status + +``` +What is the status of the job "example-job"? +``` + +### Start a New Job + +``` +Please create a new job with the following specifications: +- Job ID: test-nginx +- Docker image: nginx:latest +- Memory: 256MB +- CPU: 200MHz +- Port mapping: HTTP port 80 +``` + +### Stop a Job + +``` +Please stop the job "test-nginx" and purge it from Nomad. +``` + +### Get Job Logs + +``` +Show me the logs for the job "example-job". +``` + +## Troubleshooting + +If Claude is unable to connect to the Nomad MCP service, check the following: + +1. Ensure the Nomad MCP service is running and accessible from Claude's network +2. Verify the base URL in the tool configuration is correct +3. Check that the Nomad MCP service has proper connectivity to the Nomad server +4. Review the logs of the Nomad MCP service for any errors + +## Security Considerations + +The Claude API integration does not include authentication by default. If you need to secure the API: + +1. Add an API key requirement to the FastAPI application +2. Include the API key in the Claude tool configuration +3. Consider using HTTPS for all communications between Claude and the Nomad MCP service \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..24956fc --- /dev/null +++ b/Dockerfile @@ -0,0 +1,19 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Copy requirements first for better layer caching +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy application code +COPY . . + +# Create configs directory +RUN mkdir -p configs + +# Expose the API port +EXPOSE 8000 + +# Run the application +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/QUICK_START.md b/QUICK_START.md new file mode 100644 index 0000000..c207998 --- /dev/null +++ b/QUICK_START.md @@ -0,0 +1,122 @@ +# Nomad MCP - Quick Start Guide + +This guide will help you quickly set up and start using the Nomad MCP service for managing Nomad jobs. + +## 1. Installation + +### Clone the Repository + +```bash +git clone https://github.com/your-org/nomad-mcp.git +cd nomad-mcp +``` + +### Install Dependencies + +```bash +pip install -r requirements.txt +``` + +## 2. Configuration + +### Set Up Environment Variables + +Create a `.env` file in the project root: + +``` +# Nomad connection settings +NOMAD_ADDR=http://your-nomad-server:4646 +NOMAD_TOKEN=your-nomad-token +NOMAD_NAMESPACE=development +NOMAD_SKIP_VERIFY=true + +# API settings +PORT=8000 +HOST=0.0.0.0 + +# Logging level +LOG_LEVEL=INFO +``` + +Replace `your-nomad-server` and `your-nomad-token` with your actual Nomad server address and token. + +## 3. Start the Service + +```bash +python -m uvicorn app.main:app --reload --host 0.0.0.0 --port 8000 +``` + +The service will be available at `http://localhost:8000`. + +## 4. Access the Web UI + +Open your browser and navigate to: + +``` +http://localhost:8000 +``` + +You should see the Nomad Job Manager UI with a list of jobs in your default namespace. + +## 5. Basic Operations + +### View Jobs + +1. Select a namespace from the dropdown in the header +2. Browse the list of jobs with their statuses + +### Manage a Job + +1. Click the "View" button next to a job to see its details +2. Use the "Restart" button to restart a job +3. Use the "Stop" button to stop a job + +### View Logs + +1. Select a job to view its details +2. Scroll down to the "Logs" section +3. Switch between stdout and stderr using the tabs + +## 6. API Usage + +### List Jobs + +```bash +curl http://localhost:8000/api/claude/list-jobs?namespace=development +``` + +### Get Job Status + +```bash +curl -X POST http://localhost:8000/api/claude/jobs \ + -H "Content-Type: application/json" \ + -d '{"job_id": "example-job", "action": "status", "namespace": "development"}' +``` + +### Stop a Job + +```bash +curl -X POST http://localhost:8000/api/claude/jobs \ + -H "Content-Type: application/json" \ + -d '{"job_id": "example-job", "action": "stop", "namespace": "development", "purge": false}' +``` + +## 7. Claude AI Integration + +To set up Claude AI integration: + +1. Configure Claude with the provided `claude_nomad_tool.json` file +2. Update the URLs in the configuration to point to your Nomad MCP service +3. Use natural language to ask Claude to manage your Nomad jobs + +Example prompt for Claude: +``` +Please list all jobs in the development namespace using the Nomad MCP service. +``` + +## Next Steps + +- Read the full [README.md](README.md) for detailed information +- Check out the [User Guide](USER_GUIDE.md) for the web UI +- Explore the [Claude API Integration Documentation](CLAUDE_API_INTEGRATION.md) for AI integration +- Review the API documentation at `http://localhost:8000/docs` \ No newline at end of file diff --git a/README.md b/README.md index e9fa580..2c0e375 100644 Binary files a/README.md and b/README.md differ diff --git a/README_NOMAD_API.md b/README_NOMAD_API.md new file mode 100644 index 0000000..c124159 --- /dev/null +++ b/README_NOMAD_API.md @@ -0,0 +1,116 @@ +# Nomad API Integration + +This document explains how the Nomad API integration works in this application, the recent improvements made, and how to test the functionality. + +## Overview + +This application uses Hashicorp Nomad for job orchestration, interacting with Nomad through its HTTP API. The integration allows starting, stopping, and monitoring jobs in Nomad. + +## Recent Improvements + +The following improvements have been made to the Nomad service integration: + +1. **Simplified Namespace Handling**: + - Clear priority order for determining which namespace to use: + 1. Explicitly specified in job spec (highest priority) + 2. Service instance namespace (default: "development") + - Consistent namespace handling across all API operations + - Better logging of namespace resolution + +2. **Standardized Job Specification Formatting**: + - Consistent normalization of job specifications to ensure proper structure + - Always ensures job specs are wrapped in a "Job" key as required by Nomad + - Maintains any existing structure while normalizing as needed + +3. **Enhanced Error Handling**: + - Improved error messages with more context + - Added logging of API responses for better troubleshooting + - Returns namespace information in responses + +4. **Automated Testing**: + - Added pytest tests to verify job start/stop functionality + - Tests cover different job specification formats + - Auto-cleanup of test jobs + +## How to Run Tests + +### Prerequisites + +1. Set up the environment variables: + - `NOMAD_ADDR`: URL of your Nomad server (e.g., `http://pjmldk01.ds.meisheng.group:4646`) + - `NOMAD_TOKEN`: Authentication token (if your Nomad cluster uses ACLs) + - `NOMAD_NAMESPACE`: Default namespace to use (defaults to "development") + +2. Install test dependencies: + ``` + pip install pytest pytest-cov + ``` + +### Running the Tests + +From the project root directory: + +```bash +python -m pytest tests/test_nomad_service.py -v +``` + +Add coverage reporting: + +```bash +python -m pytest tests/test_nomad_service.py --cov=app.services.nomad_client -v +``` + +## Manual API Testing + +You can use PowerShell to test Nomad API operations directly: + +### List Jobs + +```powershell +Invoke-RestMethod -Uri "http://pjmldk01.ds.meisheng.group:4646/v1/jobs?namespace=development" -Method GET +``` + +### Get Job Details + +```powershell +Invoke-RestMethod -Uri "http://pjmldk01.ds.meisheng.group:4646/v1/job/example-job?namespace=development" -Method GET +``` + +### Start a Job + +```powershell +$jobSpec = @{ + "Job" = @{ + "ID" = "example-job" + "Name" = "example-job" + "Namespace" = "development" + # Other job properties + } +} | ConvertTo-Json -Depth 20 + +Invoke-RestMethod -Uri "http://pjmldk01.ds.meisheng.group:4646/v1/jobs" -Method POST -Body $jobSpec -ContentType "application/json" +``` + +### Stop a Job + +```powershell +Invoke-RestMethod -Uri "http://pjmldk01.ds.meisheng.group:4646/v1/job/example-job?namespace=development" -Method DELETE +``` + +## API Documentation + +For more comprehensive documentation on the Nomad API integration, refer to the `nomad_job_api_docs.md` file. + +## Troubleshooting + +### Common Issues + +1. **Job Not Found**: Ensure you're specifying the correct namespace +2. **Failed to Start Job**: Check job specification format and resource requirements +3. **Permission Denied**: Verify ACL token has appropriate permissions + +### Debugging Tips + +1. Check the application logs for detailed error messages +2. Use the `-v` flag with pytest to see more verbose output +3. Try direct API requests to isolate application vs. Nomad API issues \ No newline at end of file diff --git a/USER_GUIDE.md b/USER_GUIDE.md new file mode 100644 index 0000000..84a5110 --- /dev/null +++ b/USER_GUIDE.md @@ -0,0 +1,135 @@ +# Nomad Job Manager UI - User Guide + +This guide provides instructions on how to use the Nomad Job Manager web interface to monitor and manage your Nomad jobs. + +## Accessing the UI + +The Nomad Job Manager UI is available at the root URL of the Nomad MCP service: + +``` +http://your-server-address:8000 +``` + +## Interface Overview + +The UI is divided into two main sections: + +1. **Job List** (left panel): Displays all jobs in the selected namespace +2. **Job Details** (right panel): Shows detailed information about the selected job and its logs + +### Header Controls + +- **Namespace Selector**: Dropdown to switch between different Nomad namespaces +- **Refresh Button**: Updates the job list with the latest information from Nomad + +## Managing Jobs + +### Viewing Jobs + +1. Select the desired namespace from the dropdown in the header +2. The job list will display all jobs in that namespace with their: + - Job ID + - Type (service, batch, system) + - Status (running, pending, dead) + - Action buttons + +### Job Actions + +For each job in the list, you can perform the following actions: + +- **View**: Display detailed information about the job and its logs +- **Restart**: Stop and restart the job with its current configuration +- **Stop**: Stop the job (with an option to purge it) + +### Viewing Job Details + +When you click the "View" button for a job, the right panel will display: + +1. **Job Information**: + - Job ID + - Status + - Type + - Namespace + - Datacenters + +2. **Allocation Information** (if available): + - Allocation ID + - Status + - Description + +3. **Logs**: + - Tabs to switch between stdout and stderr logs + - Scrollable log content + +## Working with Logs + +The logs section allows you to view the output from your job's tasks: + +1. Click on a job to view its details +2. Scroll down to the "Logs" section +3. Use the tabs to switch between: + - **stdout**: Standard output logs + - **stderr**: Standard error logs + +The logs are automatically retrieved from the most recent allocation of the job. + +## Common Tasks + +### Restarting a Failed Job + +1. Find the job in the job list +2. Click the "Restart" button +3. Confirm the restart when prompted +4. The job status will update once the restart is complete + +### Stopping a Job + +1. Find the job in the job list +2. Click the "Stop" button +3. Choose whether to purge the job when prompted +4. Confirm the stop operation +5. The job will be removed from the list if purged, or shown as "dead" if not purged + +### Troubleshooting a Job + +1. Select the job to view its details +2. Check the status and any error messages in the job details +3. Review the stderr logs for error information +4. If needed, restart the job to attempt recovery + +## Tips and Tricks + +- **Regular Refreshes**: Use the refresh button to get the latest job status +- **Log Navigation**: For large log files, use your browser's search function (Ctrl+F) to find specific messages +- **Multiple Namespaces**: Switch between namespaces to manage different environments (development, production, etc.) +- **Job Status Colors**: + - Green: Running jobs + - Orange: Pending jobs + - Red: Dead or failed jobs + +## Troubleshooting the UI + +If you encounter issues with the UI: + +1. **UI Doesn't Load**: + - Check that the Nomad MCP service is running + - Verify your browser can reach the server + - Check browser console for JavaScript errors + +2. **Jobs Not Appearing**: + - Ensure you've selected the correct namespace + - Verify that your Nomad server is accessible + - Check that your Nomad token has permission to list jobs + +3. **Cannot Perform Actions**: + - Verify that your Nomad token has appropriate permissions + - Check the browser console for API errors + - Review the Nomad MCP service logs for backend errors + +## Next Steps + +For more advanced operations or programmatic access, consider: + +1. Using the REST API directly (see the API documentation) +2. Setting up Claude AI integration for natural language job management +3. Creating job configuration mappings for repository-based job management \ No newline at end of file diff --git a/__pycache__/test_gitea_integration.cpython-313.pyc b/__pycache__/test_gitea_integration.cpython-313.pyc new file mode 100644 index 0000000..3be7d4e Binary files /dev/null and b/__pycache__/test_gitea_integration.cpython-313.pyc differ diff --git a/__pycache__/test_gitea_repos.cpython-313.pyc b/__pycache__/test_gitea_repos.cpython-313.pyc new file mode 100644 index 0000000..d02624a Binary files /dev/null and b/__pycache__/test_gitea_repos.cpython-313.pyc differ diff --git a/__pycache__/test_nomad_connection.cpython-313.pyc b/__pycache__/test_nomad_connection.cpython-313.pyc new file mode 100644 index 0000000..3ecd494 Binary files /dev/null and b/__pycache__/test_nomad_connection.cpython-313.pyc differ diff --git a/__pycache__/test_nomad_namespaces.cpython-313.pyc b/__pycache__/test_nomad_namespaces.cpython-313.pyc new file mode 100644 index 0000000..5744ae2 Binary files /dev/null and b/__pycache__/test_nomad_namespaces.cpython-313.pyc differ diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..df85467 --- /dev/null +++ b/app/__init__.py @@ -0,0 +1,2 @@ +# Import version +__version__ = "0.1.0" \ No newline at end of file diff --git a/app/__pycache__/__init__.cpython-313.pyc b/app/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000..a0b6897 Binary files /dev/null and b/app/__pycache__/__init__.cpython-313.pyc differ diff --git a/app/__pycache__/main.cpython-313.pyc b/app/__pycache__/main.cpython-313.pyc new file mode 100644 index 0000000..c0ec8b1 Binary files /dev/null and b/app/__pycache__/main.cpython-313.pyc differ diff --git a/app/main.py b/app/main.py new file mode 100644 index 0000000..638824d --- /dev/null +++ b/app/main.py @@ -0,0 +1,101 @@ +from fastapi import FastAPI, HTTPException, Depends +from fastapi.middleware.cors import CORSMiddleware +from fastapi.staticfiles import StaticFiles +import os +import logging +from dotenv import load_dotenv + +from app.routers import jobs, logs, configs, repositories, claude +from app.services.nomad_client import get_nomad_client +from app.services.gitea_client import GiteaClient + +# Load environment variables +load_dotenv() + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", +) +logger = logging.getLogger(__name__) + +# Initialize the FastAPI app +app = FastAPI( + title="Nomad MCP", + description="Service for AI agents to manage Nomad jobs via MCP protocol", + version="0.1.0", +) + +# Add CORS middleware +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], # Can be set to specific origins in production + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# Include routers +app.include_router(jobs.router, prefix="/api/jobs", tags=["jobs"]) +app.include_router(logs.router, prefix="/api/logs", tags=["logs"]) +app.include_router(configs.router, prefix="/api/configs", tags=["configs"]) +app.include_router(repositories.router, prefix="/api/repositories", tags=["repositories"]) +app.include_router(claude.router, prefix="/api/claude", tags=["claude"]) + +@app.get("/api/health", tags=["health"]) +async def health_check(): + """Health check endpoint.""" + health_status = { + "status": "healthy", + "services": {} + } + + # Check Nomad connection + try: + client = get_nomad_client() + nomad_status = client.agent.get_agent() + health_status["services"]["nomad"] = { + "status": "connected", + "version": nomad_status.get("config", {}).get("Version", "unknown"), + } + except Exception as e: + logger.error(f"Nomad health check failed: {str(e)}") + health_status["services"]["nomad"] = { + "status": "failed", + "error": str(e), + } + + # Check Gitea connection + try: + gitea_client = GiteaClient() + if gitea_client.api_base_url: + # Try to list repositories as a connection test + repos = gitea_client.list_repositories(limit=1) + health_status["services"]["gitea"] = { + "status": "connected", + "api_url": gitea_client.api_base_url, + } + else: + health_status["services"]["gitea"] = { + "status": "not_configured", + } + except Exception as e: + logger.error(f"Gitea health check failed: {str(e)}") + health_status["services"]["gitea"] = { + "status": "failed", + "error": str(e), + } + + # Overall status is unhealthy if any service is failed + if any(service["status"] == "failed" for service in health_status["services"].values()): + health_status["status"] = "unhealthy" + + return health_status + +# Mount static files +app.mount("/", StaticFiles(directory="static", html=True), name="static") + +if __name__ == "__main__": + import uvicorn + port = int(os.getenv("PORT", "8000")) + uvicorn.run("app.main:app", host="0.0.0.0", port=port, reload=True) \ No newline at end of file diff --git a/app/routers/__init__.py b/app/routers/__init__.py new file mode 100644 index 0000000..e7e31a5 --- /dev/null +++ b/app/routers/__init__.py @@ -0,0 +1 @@ +# Import routers \ No newline at end of file diff --git a/app/routers/__pycache__/__init__.cpython-313.pyc b/app/routers/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000..c5d4423 Binary files /dev/null and b/app/routers/__pycache__/__init__.cpython-313.pyc differ diff --git a/app/routers/__pycache__/claude.cpython-313.pyc b/app/routers/__pycache__/claude.cpython-313.pyc new file mode 100644 index 0000000..7093613 Binary files /dev/null and b/app/routers/__pycache__/claude.cpython-313.pyc differ diff --git a/app/routers/__pycache__/configs.cpython-313.pyc b/app/routers/__pycache__/configs.cpython-313.pyc new file mode 100644 index 0000000..930f073 Binary files /dev/null and b/app/routers/__pycache__/configs.cpython-313.pyc differ diff --git a/app/routers/__pycache__/jobs.cpython-313.pyc b/app/routers/__pycache__/jobs.cpython-313.pyc new file mode 100644 index 0000000..786ca60 Binary files /dev/null and b/app/routers/__pycache__/jobs.cpython-313.pyc differ diff --git a/app/routers/__pycache__/logs.cpython-313.pyc b/app/routers/__pycache__/logs.cpython-313.pyc new file mode 100644 index 0000000..c478cec Binary files /dev/null and b/app/routers/__pycache__/logs.cpython-313.pyc differ diff --git a/app/routers/__pycache__/repositories.cpython-313.pyc b/app/routers/__pycache__/repositories.cpython-313.pyc new file mode 100644 index 0000000..a2c4c0a Binary files /dev/null and b/app/routers/__pycache__/repositories.cpython-313.pyc differ diff --git a/app/routers/claude.py b/app/routers/claude.py new file mode 100644 index 0000000..1a03b73 --- /dev/null +++ b/app/routers/claude.py @@ -0,0 +1,230 @@ +from fastapi import APIRouter, HTTPException, Body, Query, Depends +from typing import Dict, Any, List, Optional +import logging +import json + +from app.services.nomad_client import NomadService +from app.schemas.claude_api import ClaudeJobRequest, ClaudeJobSpecification, ClaudeJobResponse + +router = APIRouter() +logger = logging.getLogger(__name__) + +@router.post("/jobs", response_model=ClaudeJobResponse) +async def manage_job(request: ClaudeJobRequest): + """ + Endpoint for Claude to manage Nomad jobs with a simplified interface. + + This endpoint handles job operations like start, stop, restart, and status checks. + """ + try: + # Create a Nomad service instance with the specified namespace + nomad_service = NomadService() + if request.namespace: + nomad_service.namespace = request.namespace + + # Handle different actions + if request.action.lower() == "status": + # Get job status + job = nomad_service.get_job(request.job_id) + + # Get allocations for more detailed status + allocations = nomad_service.get_allocations(request.job_id) + latest_alloc = None + if allocations: + # Sort allocations by creation time (descending) + sorted_allocations = sorted( + allocations, + key=lambda a: a.get("CreateTime", 0), + reverse=True + ) + latest_alloc = sorted_allocations[0] + + return ClaudeJobResponse( + success=True, + job_id=request.job_id, + status=job.get("Status", "unknown"), + message=f"Job {request.job_id} is {job.get('Status', 'unknown')}", + details={ + "job": job, + "latest_allocation": latest_alloc + } + ) + + elif request.action.lower() == "stop": + # Stop the job + result = nomad_service.stop_job(request.job_id, purge=request.purge) + + return ClaudeJobResponse( + success=True, + job_id=request.job_id, + status="stopped", + message=f"Job {request.job_id} has been stopped" + (" and purged" if request.purge else ""), + details=result + ) + + elif request.action.lower() == "restart": + # Get the current job specification + job_spec = nomad_service.get_job(request.job_id) + + # Stop the job + nomad_service.stop_job(request.job_id) + + # Start the job with the original specification + result = nomad_service.start_job(job_spec) + + return ClaudeJobResponse( + success=True, + job_id=request.job_id, + status="restarted", + message=f"Job {request.job_id} has been restarted", + details=result + ) + + else: + # Unknown action + raise HTTPException(status_code=400, detail=f"Unknown action: {request.action}") + + except Exception as e: + logger.error(f"Error managing job {request.job_id}: {str(e)}") + return ClaudeJobResponse( + success=False, + job_id=request.job_id, + status="error", + message=f"Error: {str(e)}", + details=None + ) + +@router.post("/create-job", response_model=ClaudeJobResponse) +async def create_job(job_spec: ClaudeJobSpecification): + """ + Endpoint for Claude to create a new Nomad job with a simplified interface. + + This endpoint allows creating a job with minimal configuration. + """ + try: + # Create a Nomad service instance with the specified namespace + nomad_service = NomadService() + if job_spec.namespace: + nomad_service.namespace = job_spec.namespace + + # Convert the simplified job spec to Nomad format + nomad_job_spec = job_spec.to_nomad_job_spec() + + # Start the job + result = nomad_service.start_job(nomad_job_spec) + + return ClaudeJobResponse( + success=True, + job_id=job_spec.job_id, + status="started", + message=f"Job {job_spec.job_id} has been created and started", + details=result + ) + + except Exception as e: + logger.error(f"Error creating job {job_spec.job_id}: {str(e)}") + return ClaudeJobResponse( + success=False, + job_id=job_spec.job_id, + status="error", + message=f"Error: {str(e)}", + details=None + ) + +@router.get("/list-jobs", response_model=List[Dict[str, Any]]) +async def list_jobs(namespace: str = Query("development")): + """ + List all jobs in the specified namespace. + + Returns a simplified list of jobs with their IDs and statuses. + """ + try: + # Create a Nomad service instance with the specified namespace + nomad_service = NomadService() + nomad_service.namespace = namespace + + # Get all jobs + jobs = nomad_service.list_jobs() + + # Return a simplified list + simplified_jobs = [] + for job in jobs: + simplified_jobs.append({ + "id": job.get("ID"), + "name": job.get("Name"), + "status": job.get("Status"), + "type": job.get("Type"), + "namespace": namespace + }) + + return simplified_jobs + + except Exception as e: + logger.error(f"Error listing jobs: {str(e)}") + raise HTTPException(status_code=500, detail=f"Error listing jobs: {str(e)}") + +@router.get("/job-logs/{job_id}", response_model=Dict[str, Any]) +async def get_job_logs(job_id: str, namespace: str = Query("development")): + """ + Get logs for a job. + + Returns logs from the latest allocation of the job. + """ + try: + # Create a Nomad service instance with the specified namespace + nomad_service = NomadService() + nomad_service.namespace = namespace + + # Get allocations for the job + allocations = nomad_service.get_allocations(job_id) + if not allocations: + return { + "success": False, + "job_id": job_id, + "message": f"No allocations found for job {job_id}", + "logs": None + } + + # Sort allocations by creation time (descending) + sorted_allocations = sorted( + allocations, + key=lambda a: a.get("CreateTime", 0), + reverse=True + ) + latest_alloc = sorted_allocations[0] + alloc_id = latest_alloc.get("ID") + + # Get the task name from the allocation + task_name = None + if "TaskStates" in latest_alloc: + task_states = latest_alloc["TaskStates"] + if task_states: + task_name = next(iter(task_states.keys())) + + if not task_name: + task_name = "app" # Default task name + + # Get logs for the allocation + stdout_logs = nomad_service.get_allocation_logs(alloc_id, task_name, "stdout") + stderr_logs = nomad_service.get_allocation_logs(alloc_id, task_name, "stderr") + + return { + "success": True, + "job_id": job_id, + "allocation_id": alloc_id, + "task_name": task_name, + "message": f"Retrieved logs for job {job_id}", + "logs": { + "stdout": stdout_logs, + "stderr": stderr_logs + } + } + + except Exception as e: + logger.error(f"Error getting logs for job {job_id}: {str(e)}") + return { + "success": False, + "job_id": job_id, + "message": f"Error getting logs: {str(e)}", + "logs": None + } \ No newline at end of file diff --git a/app/routers/configs.py b/app/routers/configs.py new file mode 100644 index 0000000..ee03de1 --- /dev/null +++ b/app/routers/configs.py @@ -0,0 +1,80 @@ +from fastapi import APIRouter, HTTPException, Body, Path +from typing import List, Dict, Any +import json + +from app.services.config_service import ConfigService +from app.schemas.config import ConfigCreate, ConfigUpdate, ConfigResponse + +router = APIRouter() +config_service = ConfigService() + +@router.get("/", response_model=List[ConfigResponse]) +async def list_configs(): + """List all available configurations.""" + return config_service.list_configs() + +@router.get("/{name}", response_model=ConfigResponse) +async def get_config(name: str = Path(..., description="Configuration name")): + """Get a specific configuration by name.""" + return config_service.get_config(name) + +@router.post("/", response_model=ConfigResponse, status_code=201) +async def create_config(config_data: ConfigCreate): + """Create a new configuration.""" + return config_service.create_config(config_data.name, config_data.dict(exclude={"name"})) + +@router.put("/{name}", response_model=ConfigResponse) +async def update_config(name: str, config_data: ConfigUpdate): + """Update an existing configuration.""" + return config_service.update_config(name, config_data.dict(exclude_unset=True)) + +@router.delete("/{name}", response_model=Dict[str, Any]) +async def delete_config(name: str = Path(..., description="Configuration name")): + """Delete a configuration.""" + return config_service.delete_config(name) + +@router.get("/repository/{repository}") +async def get_config_by_repository(repository: str): + """Find configuration by repository.""" + configs = config_service.list_configs() + + for config in configs: + if config.get("repository") == repository: + return config + + raise HTTPException(status_code=404, detail=f"No configuration found for repository: {repository}") + +@router.get("/job/{job_id}") +async def get_config_by_job(job_id: str): + """Find configuration by job ID.""" + configs = config_service.list_configs() + + for config in configs: + if config.get("job_id") == job_id: + return config + + raise HTTPException(status_code=404, detail=f"No configuration found for job_id: {job_id}") + +@router.post("/link") +async def link_repository_to_job( + repository: str = Body(..., embed=True), + job_id: str = Body(..., embed=True), + name: str = Body(None, embed=True) +): + """Link a repository to a job.""" + # Generate a name if not provided + if not name: + name = f"{job_id.lower().replace('/', '_').replace(' ', '_')}" + + # Create the config + config = { + "repository": repository, + "job_id": job_id, + } + + return config_service.create_config(name, config) + +@router.post("/unlink/{name}") +async def unlink_repository_from_job(name: str): + """Unlink a repository from a job by deleting the configuration.""" + return config_service.delete_config(name) \ No newline at end of file diff --git a/app/routers/jobs.py b/app/routers/jobs.py new file mode 100644 index 0000000..5dffca6 --- /dev/null +++ b/app/routers/jobs.py @@ -0,0 +1,396 @@ +from fastapi import APIRouter, Depends, HTTPException, Body, Query +from typing import Dict, Any, List, Optional +import json +import logging + +from app.services.nomad_client import NomadService +from app.services.config_service import ConfigService +from app.schemas.job import JobResponse, JobOperation, JobSpecification + +router = APIRouter() +nomad_service = NomadService() +config_service = ConfigService() + +# Configure logging +logger = logging.getLogger(__name__) + +@router.get("/", response_model=List[JobResponse]) +async def list_jobs(): + """List all jobs.""" + jobs = nomad_service.list_jobs() + # Enhance job responses with repository information if available + for job in jobs: + job_id = job.get("ID") + if job_id: + repository = config_service.get_repository_from_job(job_id) + if repository: + job["repository"] = repository + return jobs + +@router.get("/{job_id}", response_model=JobResponse) +async def get_job(job_id: str): + """Get a job by ID.""" + job = nomad_service.get_job(job_id) + # Add repository information if available + repository = config_service.get_repository_from_job(job_id) + if repository: + job["repository"] = repository + return job + +@router.post("/", response_model=JobOperation) +async def start_job(job_spec: JobSpecification = Body(...)): + """Start a Nomad job with the provided specification.""" + return nomad_service.start_job(job_spec.dict()) + +@router.delete("/{job_id}", response_model=JobOperation) +async def stop_job(job_id: str, purge: bool = Query(False)): + """Stop a job by ID.""" + return nomad_service.stop_job(job_id, purge) + +@router.get("/{job_id}/allocations") +async def get_job_allocations(job_id: str): + """Get all allocations for a job.""" + return nomad_service.get_allocations(job_id) + +@router.get("/{job_id}/latest-allocation") +async def get_latest_allocation(job_id: str): + """Get the latest allocation for a job.""" + allocations = nomad_service.get_allocations(job_id) + if not allocations: + raise HTTPException(status_code=404, detail=f"No allocations found for job {job_id}") + + # Sort allocations by creation time (descending) + sorted_allocations = sorted( + allocations, + key=lambda a: a.get("CreateTime", 0), + reverse=True + ) + + return sorted_allocations[0] + +@router.get("/{job_id}/status") +async def get_job_status(job_id: str, namespace: str = Query(None, description="Nomad namespace")): + """Get the current status of a job, including deployment and latest allocation.""" + try: + # Create a custom service with the specific namespace if provided + custom_nomad = NomadService() + if namespace: + custom_nomad.namespace = namespace + logger.info(f"Getting job status for {job_id} in namespace {namespace}") + else: + logger.info(f"Getting job status for {job_id} in default namespace (development)") + + job = custom_nomad.get_job(job_id) + status = { + "job_id": job_id, + "namespace": namespace or custom_nomad.namespace, + "status": job.get("Status", "unknown"), + "stable": job.get("Stable", False), + "submitted_at": job.get("SubmitTime", 0), + } + + # Get the latest deployment if any + try: + deployment = custom_nomad.get_deployment_status(job_id) + if deployment: + status["deployment"] = { + "id": deployment.get("ID"), + "status": deployment.get("Status"), + "description": deployment.get("StatusDescription"), + } + except Exception as e: + logger.warning(f"Failed to get deployment for job {job_id}: {str(e)}") + pass # Deployment info is optional + + # Get the latest allocation if any + try: + allocations = custom_nomad.get_allocations(job_id) + if allocations: + sorted_allocations = sorted( + allocations, + key=lambda a: a.get("CreateTime", 0), + reverse=True + ) + latest_alloc = sorted_allocations[0] + status["latest_allocation"] = { + "id": latest_alloc.get("ID"), + "status": latest_alloc.get("ClientStatus"), + "description": latest_alloc.get("ClientDescription", ""), + "created_at": latest_alloc.get("CreateTime", 0), + } + except Exception as e: + logger.warning(f"Failed to get allocations for job {job_id}: {str(e)}") + pass # Allocation info is optional + + return status + except Exception as e: + raise HTTPException(status_code=500, detail=f"Failed to get job status: {str(e)}") + +@router.get("/{job_id}/specification") +async def get_job_specification(job_id: str, namespace: str = Query(None, description="Nomad namespace"), raw: bool = Query(False)): + """Get the job specification for a job.""" + try: + # Create a custom service with the specific namespace if provided + custom_nomad = NomadService() + if namespace: + custom_nomad.namespace = namespace + logger.info(f"Getting job specification for {job_id} in namespace {namespace}") + else: + logger.info(f"Getting job specification for {job_id} in default namespace (development)") + + job = custom_nomad.get_job(job_id) + + if raw: + return job + + # Extract just the job specification part if present + if "JobID" in job: + job_spec = { + "id": job.get("ID"), + "name": job.get("Name"), + "type": job.get("Type"), + "status": job.get("Status"), + "datacenters": job.get("Datacenters", []), + "namespace": job.get("Namespace"), + "task_groups": job.get("TaskGroups", []), + "meta": job.get("Meta", {}), + } + return job_spec + + return job + except Exception as e: + raise HTTPException(status_code=404, detail=f"Failed to get job specification: {str(e)}") + +@router.post("/{job_id}/restart") +async def restart_job(job_id: str): + """Restart a job by stopping it and starting it again.""" + try: + # Get the current job specification + job_spec = nomad_service.get_job(job_id) + + # Stop the job + nomad_service.stop_job(job_id) + + # Start the job with the original specification + result = nomad_service.start_job(job_spec) + + return { + "job_id": job_id, + "status": "restarted", + "eval_id": result.get("eval_id"), + } + except Exception as e: + raise HTTPException(status_code=500, detail=f"Failed to restart job: {str(e)}") + +@router.get("/by-repository/{repository}") +async def get_job_by_repository(repository: str): + """Get job information by repository URL or name.""" + job_info = config_service.get_job_from_repository(repository) + if not job_info: + raise HTTPException(status_code=404, detail=f"No job found for repository: {repository}") + + job_id = job_info.get("job_id") + namespace = job_info.get("namespace") + + # Get the job using the specific namespace if provided + try: + if namespace: + # Override the default namespace with the specific one + custom_nomad = NomadService() + custom_nomad.namespace = namespace + job = custom_nomad.get_job(job_id) + else: + # Use the default namespace settings + job = nomad_service.get_job(job_id) + + # Add repository information + job["repository"] = repository + return job + except Exception as e: + raise HTTPException(status_code=404, detail=f"Job not found: {job_id}, Error: {str(e)}") + +@router.post("/by-repository/{repository}/start") +async def start_job_by_repository(repository: str): + """Start a job by its associated repository.""" + logger = logging.getLogger(__name__) + + job_info = config_service.get_job_from_repository(repository) + if not job_info: + raise HTTPException(status_code=404, detail=f"No job found for repository: {repository}") + + job_id = job_info.get("job_id") + namespace = job_info.get("namespace") + + logger.info(f"Starting job for repository {repository}, job_id: {job_id}, namespace: {namespace}") + + # Create a custom service with the specific namespace if provided + custom_nomad = NomadService() + if namespace: + logger.info(f"Setting custom_nomad.namespace to {namespace}") + custom_nomad.namespace = namespace + + # Log the current namespace being used + logger.info(f"Nomad client namespace: {custom_nomad.namespace}") + + try: + # Get the job specification from an existing job + job_spec = custom_nomad.get_job(job_id) + + # Log the job specification + logger.info(f"Retrieved job specification for {job_id} from existing job") + + # Ensure namespace is set in job spec + if isinstance(job_spec, dict): + # Ensure namespace is explicitly set + if namespace: + logger.info(f"Setting namespace in job spec to {namespace}") + job_spec["Namespace"] = namespace + + # Log the keys in the job specification + logger.info(f"Job spec keys: {job_spec.keys()}") + + # Start the job with the retrieved specification + result = custom_nomad.start_job(job_spec) + + return { + "job_id": job_id, + "repository": repository, + "status": "started", + "eval_id": result.get("eval_id"), + "namespace": namespace + } + except HTTPException as e: + # If job not found, try to get spec from config + if e.status_code == 404: + logger.info(f"Job {job_id} not found, attempting to get specification from config") + + # Try to get job spec from repository config + job_spec = config_service.get_job_spec_from_repository(repository) + + if not job_spec: + logger.warning(f"No job specification found for repository {repository}, creating a default one") + + # Create a simple default job spec if none exists + job_spec = { + "ID": job_id, + "Name": job_id, + "Type": "service", + "Datacenters": ["jm"], # Default datacenter + "TaskGroups": [ + { + "Name": "app", + "Count": 1, + "Tasks": [ + { + "Name": job_id.split('-')[0], # Use first part of job ID as task name + "Driver": "docker", + "Config": { + "image": f"registry.dev.meisheng.group/{repository}:latest", + "force_pull": True, + "ports": ["http"] + }, + "Resources": { + "CPU": 500, + "MemoryMB": 512 + } + } + ], + "Networks": [ + { + "DynamicPorts": [ + { + "Label": "http", + "Value": 0, + "To": 8000 + } + ] + } + ] + } + ], + "Meta": { + "repository": repository + } + } + + # Set the namespace explicitly in the job spec + if namespace: + logger.info(f"Setting namespace in default job spec to {namespace}") + job_spec["Namespace"] = namespace + + logger.info(f"Starting job {job_id} with specification") + + # Log the job specification structure + if isinstance(job_spec, dict): + logger.info(f"Job spec keys: {job_spec.keys()}") + if "Namespace" in job_spec: + logger.info(f"Job spec namespace: {job_spec['Namespace']}") + + # Start the job with the specification + result = custom_nomad.start_job(job_spec) + + return { + "job_id": job_id, + "repository": repository, + "status": "started", + "eval_id": result.get("eval_id"), + "namespace": namespace + } + +@router.post("/by-repository/{repository}/stop") +async def stop_job_by_repository(repository: str, purge: bool = Query(False)): + """Stop a job by its associated repository.""" + job_info = config_service.get_job_from_repository(repository) + if not job_info: + raise HTTPException(status_code=404, detail=f"No job found for repository: {repository}") + + job_id = job_info.get("job_id") + namespace = job_info.get("namespace") + + # Create a custom service with the specific namespace if provided + custom_nomad = NomadService() + if namespace: + custom_nomad.namespace = namespace + + # Stop the job + result = custom_nomad.stop_job(job_id, purge) + + return { + "job_id": job_id, + "repository": repository, + "status": "stopped", + "eval_id": result.get("eval_id"), + "namespace": namespace + } + +@router.post("/by-repository/{repository}/restart") +async def restart_job_by_repository(repository: str): + """Restart a job by its associated repository.""" + job_info = config_service.get_job_from_repository(repository) + if not job_info: + raise HTTPException(status_code=404, detail=f"No job found for repository: {repository}") + + job_id = job_info.get("job_id") + namespace = job_info.get("namespace") + + # Create a custom service with the specific namespace if provided + custom_nomad = NomadService() + if namespace: + custom_nomad.namespace = namespace + + # Get the job specification + job_spec = custom_nomad.get_job(job_id) + + # Stop the job first + custom_nomad.stop_job(job_id) + + # Start the job with the original specification + result = custom_nomad.start_job(job_spec) + + return { + "job_id": job_id, + "repository": repository, + "status": "restarted", + "eval_id": result.get("eval_id"), + "namespace": namespace + } \ No newline at end of file diff --git a/app/routers/logs.py b/app/routers/logs.py new file mode 100644 index 0000000..e14a094 --- /dev/null +++ b/app/routers/logs.py @@ -0,0 +1,293 @@ +from fastapi import APIRouter, HTTPException, Query +from typing import List, Dict, Any, Optional +import logging + +from app.services.nomad_client import NomadService +from app.services.config_service import ConfigService + +# Configure logging +logger = logging.getLogger(__name__) + +router = APIRouter() +nomad_service = NomadService() +config_service = ConfigService() + +# More specific routes first +@router.get("/repository/{repository}") +async def get_repository_logs( + repository: str, + log_type: str = Query("stderr", description="Log type: stdout or stderr"), + limit: int = Query(1, description="Number of allocations to return logs for"), + plain_text: bool = Query(False, description="Return plain text logs instead of JSON") +): + """Get logs for a repository's associated job.""" + # Get the job info for the repository + job_info = config_service.get_job_from_repository(repository) + if not job_info: + raise HTTPException(status_code=404, detail=f"No job found for repository: {repository}") + + job_id = job_info.get("job_id") + namespace = job_info.get("namespace") + + logger.info(f"Getting logs for job {job_id} in namespace {namespace}") + + # Create a custom service with the specific namespace if provided + custom_nomad = NomadService() + if namespace: + custom_nomad.namespace = namespace + + # Get allocations for the job + allocations = custom_nomad.get_allocations(job_id) + if not allocations: + raise HTTPException(status_code=404, detail=f"No allocations found for job {job_id}") + + logger.info(f"Found {len(allocations)} allocations for job {job_id}") + + # Sort allocations by creation time (descending) + sorted_allocations = sorted( + allocations, + key=lambda a: a.get("CreateTime", 0), + reverse=True + ) + + # Limit the number of allocations + allocations_to_check = sorted_allocations[:limit] + + # Also get the job info to determine task names + job = custom_nomad.get_job(job_id) + + # Collect logs for each allocation and task + result = [] + error_messages = [] + + for alloc in allocations_to_check: + # Use the full UUID of the allocation + alloc_id = alloc.get("ID") + if not alloc_id: + logger.warning(f"Allocation ID not found in allocation data") + error_messages.append("Allocation ID not found in allocation data") + continue + + logger.info(f"Processing allocation {alloc_id} for job {job_id}") + + # Get task name from the allocation's TaskStates + task_states = alloc.get("TaskStates", {}) + if not task_states: + logger.warning(f"No task states found in allocation {alloc_id}") + error_messages.append(f"No task states found in allocation {alloc_id}") + + for task_name, task_state in task_states.items(): + try: + logger.info(f"Retrieving logs for allocation {alloc_id}, task {task_name}") + + logs = custom_nomad.get_allocation_logs(alloc_id, task_name, log_type) + + # Check if logs is an error message + if logs and isinstance(logs, str): + if logs.startswith("Error:") or logs.startswith("No "): + logger.warning(f"Error retrieving logs for {task_name}: {logs}") + error_messages.append(logs) + continue + + # Only add if we got some logs + if logs: + result.append({ + "alloc_id": alloc_id, + "task": task_name, + "type": log_type, + "create_time": alloc.get("CreateTime"), + "logs": logs + }) + logger.info(f"Successfully retrieved logs for {task_name}") + else: + error_msg = f"No logs found for {task_name}" + logger.warning(error_msg) + error_messages.append(error_msg) + except Exception as e: + # Log but continue to try other tasks + error_msg = f"Failed to get logs for {alloc_id}/{task_name}: {str(e)}" + logger.error(error_msg) + error_messages.append(error_msg) + + # Return as plain text if requested + if plain_text: + if not result: + if error_messages: + return f"No logs found for this job. Errors: {'; '.join(error_messages)}" + return "No logs found for this job" + return "\n\n".join([f"=== {r.get('task')} ===\n{r.get('logs')}" for r in result]) + + # Otherwise return as JSON + return { + "job_id": job_id, + "repository": repository, + "namespace": namespace, + "allocation_logs": result, + "errors": error_messages if error_messages else None + } + +@router.get("/job/{job_id}") +async def get_job_logs( + job_id: str, + namespace: str = Query(None, description="Nomad namespace"), + log_type: str = Query("stderr", description="Log type: stdout or stderr"), + limit: int = Query(1, description="Number of allocations to return logs for"), + plain_text: bool = Query(False, description="Return plain text logs instead of JSON") +): + """Get logs for the most recent allocations of a job.""" + # Create a custom service with the specific namespace if provided + custom_nomad = NomadService() + if namespace: + custom_nomad.namespace = namespace + logger.info(f"Getting logs for job {job_id} in namespace {namespace}") + else: + logger.info(f"Getting logs for job {job_id} in default namespace") + + # Get all allocations for the job + allocations = custom_nomad.get_allocations(job_id) + if not allocations: + raise HTTPException(status_code=404, detail=f"No allocations found for job {job_id}") + + logger.info(f"Found {len(allocations)} allocations for job {job_id}") + + # Sort allocations by creation time (descending) + sorted_allocations = sorted( + allocations, + key=lambda a: a.get("CreateTime", 0), + reverse=True + ) + + # Limit the number of allocations + allocations_to_check = sorted_allocations[:limit] + + # Collect logs for each allocation and task + result = [] + for alloc in allocations_to_check: + alloc_id = alloc.get("ID") + if not alloc_id: + logger.warning(f"Allocation ID not found in allocation data") + continue + + logger.info(f"Processing allocation {alloc_id} for job {job_id}") + + # Get task names from the allocation's TaskStates + task_states = alloc.get("TaskStates", {}) + for task_name, task_state in task_states.items(): + try: + logger.info(f"Retrieving logs for allocation {alloc_id}, task {task_name}") + + logs = custom_nomad.get_allocation_logs(alloc_id, task_name, log_type) + # Only add if we got some logs and not an error message + if logs and not logs.startswith("No") and not logs.startswith("Error"): + result.append({ + "alloc_id": alloc_id, + "task": task_name, + "type": log_type, + "create_time": alloc.get("CreateTime"), + "logs": logs + }) + logger.info(f"Successfully retrieved logs for {task_name}") + else: + logger.warning(f"No logs found for {task_name}: {logs}") + except Exception as e: + # Log but continue to try other tasks + logger.error(f"Failed to get logs for {alloc_id}/{task_name}: {str(e)}") + + # Return as plain text if requested + if plain_text: + if not result: + return "No logs found for this job" + return "\n\n".join([f"=== {r.get('task')} ===\n{r.get('logs')}" for r in result]) + + # Otherwise return as JSON + return { + "job_id": job_id, + "namespace": namespace, + "allocation_logs": result + } + +@router.get("/latest/{job_id}") +async def get_latest_allocation_logs( + job_id: str, + log_type: str = Query("stderr", description="Log type: stdout or stderr"), + plain_text: bool = Query(False, description="Return plain text logs instead of JSON") +): + """Get logs from the latest allocation of a job.""" + # Get all allocations for the job + allocations = nomad_service.get_allocations(job_id) + if not allocations: + raise HTTPException(status_code=404, detail=f"No allocations found for job {job_id}") + + # Sort allocations by creation time (descending) + sorted_allocations = sorted( + allocations, + key=lambda a: a.get("CreateTime", 0), + reverse=True + ) + + # Get the latest allocation + latest_alloc = sorted_allocations[0] + alloc_id = latest_alloc.get("ID") + + # Get task group and task information + job = nomad_service.get_job(job_id) + task_groups = job.get("TaskGroups", []) + + # Collect logs for each task in the latest allocation + result = [] + for task_group in task_groups: + tasks = task_group.get("Tasks", []) + for task in tasks: + task_name = task.get("Name") + try: + logs = nomad_service.get_allocation_logs(alloc_id, task_name, log_type) + result.append({ + "alloc_id": alloc_id, + "task": task_name, + "type": log_type, + "create_time": latest_alloc.get("CreateTime"), + "logs": logs + }) + except Exception as e: + # Skip if logs cannot be retrieved for this task + pass + + # Return as plain text if requested + if plain_text: + return "\n\n".join([f"=== {r['task']} ===\n{r['logs']}" for r in result]) + + # Otherwise return as JSON + return { + "job_id": job_id, + "latest_allocation": alloc_id, + "task_logs": result + } + +@router.get("/build/{job_id}") +async def get_build_logs(job_id: str, plain_text: bool = Query(False)): + """Get build logs for a job (usually stderr logs from the latest allocation).""" + # This is a convenience endpoint that returns stderr logs from the latest allocation + return await get_latest_allocation_logs(job_id, "stderr", plain_text) + +# Generic allocation logs route last +@router.get("/allocation/{alloc_id}/{task}") +async def get_allocation_logs( + alloc_id: str, + task: str, + log_type: str = Query("stderr", description="Log type: stdout or stderr"), + plain_text: bool = Query(False, description="Return plain text logs instead of JSON") +): + """Get logs for a specific allocation and task.""" + # Validate log_type + if log_type not in ["stdout", "stderr"]: + raise HTTPException(status_code=400, detail="Log type must be stdout or stderr") + + # Get logs from Nomad + logs = nomad_service.get_allocation_logs(alloc_id, task, log_type) + + # Return as plain text if requested + if plain_text: + return logs + + # Otherwise return as JSON + return {"alloc_id": alloc_id, "task": task, "type": log_type, "logs": logs} \ No newline at end of file diff --git a/app/routers/repositories.py b/app/routers/repositories.py new file mode 100644 index 0000000..816ff5a --- /dev/null +++ b/app/routers/repositories.py @@ -0,0 +1,89 @@ +from fastapi import APIRouter, HTTPException, Query +from typing import List, Dict, Any, Optional + +from app.services.gitea_client import GiteaClient +from app.services.config_service import ConfigService + +router = APIRouter() +gitea_client = GiteaClient() +config_service = ConfigService() + +@router.get("/") +async def list_repositories(limit: int = Query(100, description="Maximum number of repositories to return")): + """ + List all available repositories from Gitea. + + If Gitea integration is not configured, returns an empty list. + """ + repositories = gitea_client.list_repositories(limit) + + # Enhance with linked job information + for repo in repositories: + # Create a URL from clone_url + repo_url = repo.get("clone_url") + if repo_url: + # Check if repository is linked to a job + configs = config_service.list_configs() + for config in configs: + if config.get("repository") == repo_url: + repo["linked_job"] = config.get("job_id") + repo["config_name"] = config.get("name") + break + + return repositories + +@router.get("/{repository}") +async def get_repository_info(repository: str): + """ + Get information about a specific repository. + + The repository parameter can be a repository URL or a repository alias. + If it's a repository URL, we'll get the info directly from Gitea. + If it's a repository alias, we'll get the info from the configuration and then from Gitea. + """ + # First check if it's a repository URL + repo_info = gitea_client.get_repository_info(repository) + + if repo_info: + # Check if repository is linked to a job + configs = config_service.list_configs() + for config in configs: + if config.get("repository") == repository: + repo_info["linked_job"] = config.get("job_id") + repo_info["config_name"] = config.get("name") + repo_info["config"] = config + break + + return repo_info + else: + # Check if it's a repository alias in our configs + config = config_service.get_config_by_repository(repository) + if config: + repo_url = config.get("repository") + repo_info = gitea_client.get_repository_info(repo_url) + + if repo_info: + repo_info["linked_job"] = config.get("job_id") + repo_info["config_name"] = config.get("name") + repo_info["config"] = config + return repo_info + + raise HTTPException(status_code=404, detail=f"Repository not found: {repository}") + +@router.get("/{repository}/branches") +async def get_repository_branches(repository: str): + """ + Get branches for a specific repository. + + The repository parameter can be a repository URL or a repository alias. + """ + # If it's a repository alias, get the actual URL + config = config_service.get_config_by_repository(repository) + if config: + repository = config.get("repository") + + branches = gitea_client.get_repository_branches(repository) + if not branches: + raise HTTPException(status_code=404, detail=f"No branches found for repository: {repository}") + + return branches \ No newline at end of file diff --git a/app/schemas/__init__.py b/app/schemas/__init__.py new file mode 100644 index 0000000..911fe69 --- /dev/null +++ b/app/schemas/__init__.py @@ -0,0 +1 @@ +# Import schemas \ No newline at end of file diff --git a/app/schemas/__pycache__/__init__.cpython-313.pyc b/app/schemas/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000..6588d33 Binary files /dev/null and b/app/schemas/__pycache__/__init__.cpython-313.pyc differ diff --git a/app/schemas/__pycache__/claude_api.cpython-313.pyc b/app/schemas/__pycache__/claude_api.cpython-313.pyc new file mode 100644 index 0000000..add979e Binary files /dev/null and b/app/schemas/__pycache__/claude_api.cpython-313.pyc differ diff --git a/app/schemas/__pycache__/config.cpython-313.pyc b/app/schemas/__pycache__/config.cpython-313.pyc new file mode 100644 index 0000000..f216bb4 Binary files /dev/null and b/app/schemas/__pycache__/config.cpython-313.pyc differ diff --git a/app/schemas/__pycache__/job.cpython-313.pyc b/app/schemas/__pycache__/job.cpython-313.pyc new file mode 100644 index 0000000..5aab022 Binary files /dev/null and b/app/schemas/__pycache__/job.cpython-313.pyc differ diff --git a/app/schemas/claude_api.py b/app/schemas/claude_api.py new file mode 100644 index 0000000..74050e1 --- /dev/null +++ b/app/schemas/claude_api.py @@ -0,0 +1,78 @@ +from pydantic import BaseModel, Field +from typing import Dict, Any, List, Optional, Union + + +class ClaudeJobRequest(BaseModel): + """Request model for Claude to start or manage a job""" + job_id: str = Field(..., description="The ID of the job to manage") + action: str = Field(..., description="Action to perform: start, stop, restart, status") + namespace: Optional[str] = Field("development", description="Nomad namespace") + purge: Optional[bool] = Field(False, description="Whether to purge the job when stopping") + + +class ClaudeJobSpecification(BaseModel): + """Simplified job specification for Claude to create a new job""" + job_id: str = Field(..., description="The ID for the new job") + name: Optional[str] = Field(None, description="Name of the job (defaults to job_id)") + type: str = Field("service", description="Job type: service, batch, or system") + datacenters: List[str] = Field(["jm"], description="List of datacenters") + namespace: str = Field("development", description="Nomad namespace") + docker_image: str = Field(..., description="Docker image to run") + count: int = Field(1, description="Number of instances to run") + cpu: int = Field(100, description="CPU resources in MHz") + memory: int = Field(128, description="Memory in MB") + ports: Optional[List[Dict[str, Any]]] = Field(None, description="Port mappings") + env_vars: Optional[Dict[str, str]] = Field(None, description="Environment variables") + + def to_nomad_job_spec(self) -> Dict[str, Any]: + """Convert to Nomad job specification format""" + # Create a task with the specified Docker image + task = { + "Name": "app", + "Driver": "docker", + "Config": { + "image": self.docker_image, + }, + "Resources": { + "CPU": self.cpu, + "MemoryMB": self.memory + } + } + + # Add environment variables if specified + if self.env_vars: + task["Env"] = self.env_vars + + # Create network configuration + network = {} + if self.ports: + network["DynamicPorts"] = self.ports + task["Config"]["ports"] = [port["Label"] for port in self.ports] + + # Create the full job specification + job_spec = { + "ID": self.job_id, + "Name": self.name or self.job_id, + "Type": self.type, + "Datacenters": self.datacenters, + "Namespace": self.namespace, + "TaskGroups": [ + { + "Name": "app", + "Count": self.count, + "Tasks": [task], + "Networks": [network] if network else [] + } + ] + } + + return job_spec + + +class ClaudeJobResponse(BaseModel): + """Response model for Claude job operations""" + success: bool = Field(..., description="Whether the operation was successful") + job_id: str = Field(..., description="The ID of the job") + status: str = Field(..., description="Current status of the job") + message: str = Field(..., description="Human-readable message about the operation") + details: Optional[Dict[str, Any]] = Field(None, description="Additional details about the job") \ No newline at end of file diff --git a/app/schemas/config.py b/app/schemas/config.py new file mode 100644 index 0000000..36a8506 --- /dev/null +++ b/app/schemas/config.py @@ -0,0 +1,56 @@ +from pydantic import BaseModel, Field +from typing import Dict, Any, Optional + + +class ConfigBase(BaseModel): + """Base class for configuration schemas.""" + repository: str = Field(..., description="Repository URL or identifier") + job_id: str = Field(..., description="Nomad job ID") + description: Optional[str] = Field(None, description="Description of this configuration") + repository_alias: Optional[str] = Field(None, description="Short name or alias for the repository") + + # Additional metadata can be stored in the meta field + meta: Optional[Dict[str, Any]] = Field(None, description="Additional metadata") + + +class ConfigCreate(ConfigBase): + """Schema for creating a new configuration.""" + name: str = Field(..., description="Configuration name (used as the file name)") + + +class ConfigUpdate(BaseModel): + """Schema for updating an existing configuration.""" + repository: Optional[str] = Field(None, description="Repository URL or identifier") + job_id: Optional[str] = Field(None, description="Nomad job ID") + description: Optional[str] = Field(None, description="Description of this configuration") + repository_alias: Optional[str] = Field(None, description="Short name or alias for the repository") + meta: Optional[Dict[str, Any]] = Field(None, description="Additional metadata") + + +class ConfigResponse(ConfigBase): + """Schema for configuration response.""" + name: str = Field(..., description="Configuration name") + repository_info: Optional[Dict[str, Any]] = Field(None, description="Repository information from Gitea if available") + + class Config: + schema_extra = { + "example": { + "name": "my-web-app", + "repository": "http://gitea.internal.example.com/username/repo-name", + "repository_alias": "web-app", + "job_id": "web-app", + "description": "Web application running in Nomad", + "meta": { + "owner": "devops-team", + "environment": "production" + }, + "repository_info": { + "description": "A web application", + "default_branch": "main", + "stars": 5, + "forks": 2, + "owner": "username", + "html_url": "http://gitea.internal.example.com/username/repo-name" + } + } + } \ No newline at end of file diff --git a/app/schemas/job.py b/app/schemas/job.py new file mode 100644 index 0000000..ef0407e --- /dev/null +++ b/app/schemas/job.py @@ -0,0 +1,80 @@ +from pydantic import BaseModel, Field +from typing import Dict, Any, List, Optional + + +class JobSpecification(BaseModel): + """ + Nomad job specification. This is a simplified schema as the actual + Nomad job spec is quite complex and varies by job type. + """ + id: Optional[str] = Field(None, description="Job ID") + ID: Optional[str] = Field(None, description="Job ID (Nomad format)") + name: Optional[str] = Field(None, description="Job name") + Name: Optional[str] = Field(None, description="Job name (Nomad format)") + type: Optional[str] = Field(None, description="Job type (service, batch, system)") + Type: Optional[str] = Field(None, description="Job type (Nomad format)") + datacenters: Optional[List[str]] = Field(None, description="List of datacenters") + Datacenters: Optional[List[str]] = Field(None, description="List of datacenters (Nomad format)") + task_groups: Optional[List[Dict[str, Any]]] = Field(None, description="Task groups") + TaskGroups: Optional[List[Dict[str, Any]]] = Field(None, description="Task groups (Nomad format)") + meta: Optional[Dict[str, str]] = Field(None, description="Job metadata") + Meta: Optional[Dict[str, str]] = Field(None, description="Job metadata (Nomad format)") + + # Allow additional fields (to handle the complete Nomad job spec) + class Config: + extra = "allow" + + +class JobOperation(BaseModel): + """Response after a job operation (start, stop, etc.)""" + job_id: str = Field(..., description="The ID of the job") + eval_id: Optional[str] = Field(None, description="The evaluation ID") + status: str = Field(..., description="The status of the operation") + warnings: Optional[str] = Field(None, description="Any warnings from Nomad") + + +class JobResponse(BaseModel): + """ + Job response schema. This is a simplified version as the actual + Nomad job response is quite complex and varies by job type. + """ + ID: str = Field(..., description="Job ID") + Name: str = Field(..., description="Job name") + Status: str = Field(..., description="Job status") + Type: str = Field(..., description="Job type") + repository: Optional[str] = Field(None, description="Associated repository if any") + + # Allow additional fields (to handle the complete Nomad job response) + class Config: + extra = "allow" + + +class TaskGroup(BaseModel): + """Task group schema.""" + Name: str + Count: int + Tasks: List[Dict[str, Any]] + + class Config: + extra = "allow" + + +class Task(BaseModel): + """Task schema.""" + Name: str + Driver: str + Config: Dict[str, Any] + + class Config: + extra = "allow" + + +class Allocation(BaseModel): + """Allocation schema.""" + ID: str + JobID: str + TaskGroup: str + ClientStatus: str + + class Config: + extra = "allow" \ No newline at end of file diff --git a/app/services/__init__.py b/app/services/__init__.py new file mode 100644 index 0000000..5d17f52 --- /dev/null +++ b/app/services/__init__.py @@ -0,0 +1 @@ +# Import services \ No newline at end of file diff --git a/app/services/__pycache__/__init__.cpython-313.pyc b/app/services/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000..2ea0473 Binary files /dev/null and b/app/services/__pycache__/__init__.cpython-313.pyc differ diff --git a/app/services/__pycache__/config_service.cpython-313.pyc b/app/services/__pycache__/config_service.cpython-313.pyc new file mode 100644 index 0000000..79990c9 Binary files /dev/null and b/app/services/__pycache__/config_service.cpython-313.pyc differ diff --git a/app/services/__pycache__/gitea_client.cpython-313.pyc b/app/services/__pycache__/gitea_client.cpython-313.pyc new file mode 100644 index 0000000..fa644c1 Binary files /dev/null and b/app/services/__pycache__/gitea_client.cpython-313.pyc differ diff --git a/app/services/__pycache__/nomad_client.cpython-313.pyc b/app/services/__pycache__/nomad_client.cpython-313.pyc new file mode 100644 index 0000000..81b60a5 Binary files /dev/null and b/app/services/__pycache__/nomad_client.cpython-313.pyc differ diff --git a/app/services/config_service.py b/app/services/config_service.py new file mode 100644 index 0000000..e520d43 --- /dev/null +++ b/app/services/config_service.py @@ -0,0 +1,299 @@ +import os +import yaml +import logging +import json +from typing import Dict, Any, Optional, List +from fastapi import HTTPException +from pathlib import Path + +from app.services.gitea_client import GiteaClient + +# Configure logging +logger = logging.getLogger(__name__) + +# Default configs directory +CONFIG_DIR = os.getenv("CONFIG_DIR", "./configs") + +class ConfigService: + """Service for managing repository to job mappings.""" + + def __init__(self, config_dir: str = CONFIG_DIR): + self.config_dir = Path(config_dir) + self._ensure_config_dir() + self.gitea_client = GiteaClient() + + def _ensure_config_dir(self): + """Ensure the config directory exists.""" + try: + self.config_dir.mkdir(parents=True, exist_ok=True) + except Exception as e: + logger.error(f"Failed to create config directory {self.config_dir}: {str(e)}") + raise HTTPException(status_code=500, detail=f"Failed to create config directory: {str(e)}") + + def list_configs(self) -> List[Dict[str, Any]]: + """List all available configurations.""" + configs = [] + try: + for file_path in self.config_dir.glob("*.yaml"): + with open(file_path, "r") as f: + config = yaml.safe_load(f) + config["name"] = file_path.stem + configs.append(config) + return configs + except Exception as e: + logger.error(f"Failed to list configurations: {str(e)}") + raise HTTPException(status_code=500, detail=f"Failed to list configurations: {str(e)}") + + def get_config(self, name: str) -> Dict[str, Any]: + """Get a specific configuration by name.""" + file_path = self.config_dir / f"{name}.yaml" + try: + if not file_path.exists(): + raise HTTPException(status_code=404, detail=f"Configuration not found: {name}") + + with open(file_path, "r") as f: + config = yaml.safe_load(f) + config["name"] = name + + # Enrich with repository information if available + if repository := config.get("repository"): + repo_info = self.gitea_client.get_repository_info(repository) + if repo_info: + config["repository_info"] = { + "description": repo_info.get("description"), + "default_branch": repo_info.get("default_branch"), + "stars": repo_info.get("stars_count"), + "forks": repo_info.get("forks_count"), + "owner": repo_info.get("owner", {}).get("login"), + "html_url": repo_info.get("html_url"), + } + + return config + except HTTPException: + raise + except Exception as e: + logger.error(f"Failed to read configuration {name}: {str(e)}") + raise HTTPException(status_code=500, detail=f"Failed to read configuration: {str(e)}") + + def create_config(self, name: str, config: Dict[str, Any]) -> Dict[str, Any]: + """Create a new configuration.""" + file_path = self.config_dir / f"{name}.yaml" + try: + if file_path.exists(): + raise HTTPException(status_code=409, detail=f"Configuration already exists: {name}") + + # Validate required fields + required_fields = ["repository", "job_id"] + for field in required_fields: + if field not in config: + raise HTTPException(status_code=400, detail=f"Missing required field: {field}") + + # Validate repository exists if Gitea integration is configured + if not self.gitea_client.check_repository_exists(config["repository"]): + raise HTTPException(status_code=400, detail=f"Repository not found: {config['repository']}") + + # Add name to the config + config["name"] = name + + # Get repository alias if not provided + if "repository_alias" not in config: + try: + owner, repo = self.gitea_client.parse_repo_url(config["repository"]) + config["repository_alias"] = repo + except: + # Use job_id as fallback + config["repository_alias"] = config["job_id"] + + # Write config to file + with open(file_path, "w") as f: + yaml.dump(config, f, default_flow_style=False) + + return config + except HTTPException: + raise + except Exception as e: + logger.error(f"Failed to create configuration {name}: {str(e)}") + raise HTTPException(status_code=500, detail=f"Failed to create configuration: {str(e)}") + + def update_config(self, name: str, config: Dict[str, Any]) -> Dict[str, Any]: + """Update an existing configuration.""" + file_path = self.config_dir / f"{name}.yaml" + try: + if not file_path.exists(): + raise HTTPException(status_code=404, detail=f"Configuration not found: {name}") + + # Read existing config + with open(file_path, "r") as f: + existing_config = yaml.safe_load(f) + + # Update with new values + for key, value in config.items(): + existing_config[key] = value + + # Validate repository exists if changed and Gitea integration is configured + if "repository" in config and config["repository"] != existing_config.get("repository"): + if not self.gitea_client.check_repository_exists(config["repository"]): + raise HTTPException(status_code=400, detail=f"Repository not found: {config['repository']}") + + # Validate required fields + required_fields = ["repository", "job_id"] + for field in required_fields: + if field not in existing_config: + raise HTTPException(status_code=400, detail=f"Missing required field: {field}") + + # Add name to the config + existing_config["name"] = name + + # Update repository alias if repository changed + if "repository" in config and "repository_alias" not in config: + try: + owner, repo = self.gitea_client.parse_repo_url(existing_config["repository"]) + existing_config["repository_alias"] = repo + except: + pass + + # Write config to file + with open(file_path, "w") as f: + yaml.dump(existing_config, f, default_flow_style=False) + + return existing_config + except HTTPException: + raise + except Exception as e: + logger.error(f"Failed to update configuration {name}: {str(e)}") + raise HTTPException(status_code=500, detail=f"Failed to update configuration: {str(e)}") + + def delete_config(self, name: str) -> Dict[str, Any]: + """Delete a configuration.""" + file_path = self.config_dir / f"{name}.yaml" + try: + if not file_path.exists(): + raise HTTPException(status_code=404, detail=f"Configuration not found: {name}") + + # Get the config before deleting + with open(file_path, "r") as f: + config = yaml.safe_load(f) + config["name"] = name + + # Delete the file + file_path.unlink() + + return {"name": name, "status": "deleted"} + except HTTPException: + raise + except Exception as e: + logger.error(f"Failed to delete configuration {name}: {str(e)}") + raise HTTPException(status_code=500, detail=f"Failed to delete configuration: {str(e)}") + + def get_job_from_repository(self, repository: str) -> Optional[Dict[str, str]]: + """Find job_id and namespace associated with a repository.""" + try: + for config in self.list_configs(): + if config.get("repository") == repository or config.get("repository_alias") == repository: + return { + "job_id": config.get("job_id"), + "namespace": config.get("namespace") + } + return None + except Exception as e: + logger.error(f"Failed to find job for repository {repository}: {str(e)}") + raise HTTPException(status_code=500, detail=f"Failed to find job for repository: {str(e)}") + + def get_repository_from_job(self, job_id: str) -> Optional[str]: + """Find repository associated with a job_id.""" + try: + for config in self.list_configs(): + if config.get("job_id") == job_id: + return config.get("repository") + return None + except Exception as e: + logger.error(f"Failed to find repository for job {job_id}: {str(e)}") + raise HTTPException(status_code=500, detail=f"Failed to find repository for job: {str(e)}") + + def get_config_by_repository(self, repository: str) -> Optional[Dict[str, Any]]: + """Find configuration by repository URL or alias.""" + try: + for config in self.list_configs(): + if config.get("repository") == repository or config.get("repository_alias") == repository: + return self.get_config(config.get("name")) + return None + except Exception as e: + logger.error(f"Failed to find config for repository {repository}: {str(e)}") + return None + + def get_job_spec_from_repository(self, repository: str) -> Optional[Dict[str, Any]]: + """Get job specification from repository config and template.""" + try: + # Get the repository configuration + config = self.get_config_by_repository(repository) + if not config: + logger.error(f"No configuration found for repository: {repository}") + return None + + # Check if the job template is specified + job_template = config.get("job_template") + if not job_template: + logger.error(f"No job template specified for repository: {repository}") + return None + + # Read the job template file + template_path = Path(self.config_dir) / "templates" / f"{job_template}.json" + if not template_path.exists(): + logger.error(f"Job template not found: {job_template}") + return None + + try: + with open(template_path, "r") as f: + job_spec = json.load(f) + except Exception as e: + logger.error(f"Failed to read job template {job_template}: {str(e)}") + return None + + # Apply configuration parameters to the template + job_spec["ID"] = config.get("job_id") + job_spec["Name"] = config.get("job_id") + + # Apply other customizations from config + if env_vars := config.get("environment_variables"): + for task_group in job_spec.get("TaskGroups", []): + for task in task_group.get("Tasks", []): + if "Env" not in task: + task["Env"] = {} + task["Env"].update(env_vars) + + if meta := config.get("metadata"): + job_spec["Meta"] = meta + + # Add repository info to the metadata + if "Meta" not in job_spec: + job_spec["Meta"] = {} + job_spec["Meta"]["repository"] = repository + + # Override specific job parameters if specified in config + if job_params := config.get("job_parameters"): + for param_key, param_value in job_params.items(): + # Handle nested parameters with dot notation (e.g., "TaskGroups.0.Tasks.0.Config.image") + if "." in param_key: + parts = param_key.split(".") + current = job_spec + for part in parts[:-1]: + # Handle array indices + if part.isdigit() and isinstance(current, list): + current = current[int(part)] + elif part in current: + current = current[part] + else: + break + else: + # Only set the value if we successfully navigated the path + current[parts[-1]] = param_value + else: + # Direct parameter + job_spec[param_key] = param_value + + logger.info(f"Generated job specification for repository {repository} using template {job_template}") + return job_spec + + except Exception as e: + logger.error(f"Failed to get job specification for repository {repository}: {str(e)}") + return None \ No newline at end of file diff --git a/app/services/gitea_client.py b/app/services/gitea_client.py new file mode 100644 index 0000000..66b5d19 --- /dev/null +++ b/app/services/gitea_client.py @@ -0,0 +1,180 @@ +import os +import logging +import requests +from typing import Dict, Any, List, Optional, Tuple +from urllib.parse import urlparse +from fastapi import HTTPException + +# Configure logging +logger = logging.getLogger(__name__) + +class GiteaClient: + """Client for interacting with Gitea API.""" + + def __init__(self): + """Initialize Gitea client with configuration from environment variables.""" + self.api_base_url = os.getenv("GITEA_API_URL", "").rstrip("/") + self.token = os.getenv("GITEA_API_TOKEN") + self.username = os.getenv("GITEA_USERNAME") + self.verify_ssl = os.getenv("GITEA_VERIFY_SSL", "true").lower() == "true" + + if not self.api_base_url: + logger.warning("GITEA_API_URL is not configured. Gitea integration will not work.") + + if not self.token and (self.username and os.getenv("GITEA_PASSWORD")): + self.token = self._get_token_from_credentials() + + def _get_token_from_credentials(self) -> Optional[str]: + """Get a token using username and password if provided.""" + try: + response = requests.post( + f"{self.api_base_url}/users/{self.username}/tokens", + auth=(self.username, os.getenv("GITEA_PASSWORD", "")), + json={ + "name": "nomad-mcp-service", + "scopes": ["repo", "read:org"] + }, + verify=self.verify_ssl + ) + + if response.status_code == 201: + return response.json().get("sha1") + else: + logger.error(f"Failed to get Gitea token: {response.text}") + return None + except Exception as e: + logger.error(f"Failed to get Gitea token: {str(e)}") + return None + + def _get_headers(self) -> Dict[str, str]: + """Get request headers with authentication.""" + headers = { + "Content-Type": "application/json", + "Accept": "application/json" + } + + if self.token: + headers["Authorization"] = f"token {self.token}" + + return headers + + def parse_repo_url(self, repo_url: str) -> Tuple[str, str]: + """ + Parse a Gitea repository URL to extract owner and repo name. + + Examples: + - http://gitea.internal.example.com/username/repo-name -> (username, repo-name) + - https://gitea.example.com/org/project -> (org, project) + """ + try: + # Parse the URL + parsed_url = urlparse(repo_url) + + # Get the path and remove leading/trailing slashes + path = parsed_url.path.strip("/") + + # Split the path + parts = path.split("/") + + if len(parts) < 2: + raise ValueError(f"Invalid repository URL: {repo_url}") + + # Extract owner and repo + owner = parts[0] + repo = parts[1] + + return owner, repo + except Exception as e: + logger.error(f"Failed to parse repository URL: {repo_url}, error: {str(e)}") + raise ValueError(f"Invalid repository URL: {repo_url}") + + def check_repository_exists(self, repo_url: str) -> bool: + """Check if a repository exists in Gitea.""" + if not self.api_base_url: + # No Gitea integration configured, assume repository exists + return True + + try: + owner, repo = self.parse_repo_url(repo_url) + + response = requests.get( + f"{self.api_base_url}/repos/{owner}/{repo}", + headers=self._get_headers(), + verify=self.verify_ssl + ) + + return response.status_code == 200 + except Exception as e: + logger.error(f"Failed to check repository: {repo_url}, error: {str(e)}") + return False + + def get_repository_info(self, repo_url: str) -> Optional[Dict[str, Any]]: + """Get repository information from Gitea.""" + if not self.api_base_url: + # No Gitea integration configured + return None + + try: + owner, repo = self.parse_repo_url(repo_url) + + response = requests.get( + f"{self.api_base_url}/repos/{owner}/{repo}", + headers=self._get_headers(), + verify=self.verify_ssl + ) + + if response.status_code == 200: + return response.json() + else: + logger.error(f"Failed to get repository info: {response.text}") + return None + except Exception as e: + logger.error(f"Failed to get repository info: {repo_url}, error: {str(e)}") + return None + + def list_repositories(self, limit: int = 100) -> List[Dict[str, Any]]: + """List available repositories from Gitea.""" + if not self.api_base_url: + # No Gitea integration configured + return [] + + try: + response = requests.get( + f"{self.api_base_url}/user/repos", + headers=self._get_headers(), + params={"limit": limit}, + verify=self.verify_ssl + ) + + if response.status_code == 200: + return response.json() + else: + logger.error(f"Failed to list repositories: {response.text}") + return [] + except Exception as e: + logger.error(f"Failed to list repositories: {str(e)}") + return [] + + def get_repository_branches(self, repo_url: str) -> List[Dict[str, Any]]: + """Get branches for a repository.""" + if not self.api_base_url: + # No Gitea integration configured + return [] + + try: + owner, repo = self.parse_repo_url(repo_url) + + response = requests.get( + f"{self.api_base_url}/repos/{owner}/{repo}/branches", + headers=self._get_headers(), + verify=self.verify_ssl + ) + + if response.status_code == 200: + return response.json() + else: + logger.error(f"Failed to get repository branches: {response.text}") + return [] + except Exception as e: + logger.error(f"Failed to get repository branches: {repo_url}, error: {str(e)}") + return [] \ No newline at end of file diff --git a/app/services/nomad_client.py b/app/services/nomad_client.py new file mode 100644 index 0000000..4f79505 --- /dev/null +++ b/app/services/nomad_client.py @@ -0,0 +1,505 @@ +import os +import logging +import nomad +from fastapi import HTTPException +from typing import Dict, Any, Optional, List +from dotenv import load_dotenv +import time + +# Load environment variables +load_dotenv() + +# Configure logging +logger = logging.getLogger(__name__) + +def get_nomad_client(): + """ + Create and return a Nomad client using environment variables. + """ + try: + nomad_addr = os.getenv("NOMAD_ADDR", "http://localhost:4646").rstrip('/') + nomad_token = os.getenv("NOMAD_TOKEN") + # Use "development" as the default namespace since all jobs are likely to be in this namespace + nomad_namespace = os.getenv("NOMAD_NAMESPACE", "development") + + # Ensure namespace is never "*" (wildcard) + if nomad_namespace == "*": + nomad_namespace = "development" + logger.info("Replaced wildcard namespace '*' with 'development'") + + # Extract host and port from the address + host_with_port = nomad_addr.replace("http://", "").replace("https://", "") + host = host_with_port.split(":")[0] + + # Safely extract port + port_part = host_with_port.split(":")[-1] if ":" in host_with_port else "4646" + port = int(port_part.split('/')[0]) # Remove any path components + + logger.info(f"Creating Nomad client with host={host}, port={port}, namespace={nomad_namespace}") + + return nomad.Nomad( + host=host, + port=port, + secure=nomad_addr.startswith("https"), + token=nomad_token, + timeout=10, + namespace=nomad_namespace, # Query across development namespace by default + verify=False if os.getenv("NOMAD_SKIP_VERIFY", "false").lower() == "true" else True + ) + except Exception as e: + logger.error(f"Failed to create Nomad client: {str(e)}") + raise HTTPException(status_code=500, detail=f"Failed to connect to Nomad: {str(e)}") + +class NomadService: + """Service for interacting with Nomad API.""" + + def __init__(self): + self.client = get_nomad_client() + self.namespace = os.getenv("NOMAD_NAMESPACE", "development") # Use "development" namespace as default + + def get_job(self, job_id: str, max_retries: int = 3, retry_delay: int = 2) -> Dict[str, Any]: + """ + Get a job by ID with retry logic. + + Args: + job_id: The ID of the job to retrieve + max_retries: Maximum number of retry attempts (default: 3) + retry_delay: Delay between retries in seconds (default: 2) + + Returns: + Dict containing job details + """ + last_exception = None + + # Try multiple times to get the job + for attempt in range(max_retries): + try: + # Get the Nomad address from the client + nomad_addr = f"http://{self.client.host}:{self.client.port}" + + # Build the URL for the job endpoint + url = f"{nomad_addr}/v1/job/{job_id}" + + # Set up headers + headers = {} + if hasattr(self.client, 'token') and self.client.token: + headers["X-Nomad-Token"] = self.client.token + + # Set up params with the correct namespace + params = {"namespace": self.namespace} + + # Make the request directly + import requests + response = requests.get( + url=url, + headers=headers, + params=params, + verify=False if os.getenv("NOMAD_SKIP_VERIFY", "false").lower() == "true" else True + ) + + # Check if the request was successful + if response.status_code == 200: + return response.json() + elif response.status_code == 404: + # If not the last attempt, log and retry + if attempt < max_retries - 1: + logger.warning(f"Job {job_id} not found on attempt {attempt+1}/{max_retries}, retrying in {retry_delay}s...") + time.sleep(retry_delay) + continue + else: + raise ValueError(f"Job not found after {max_retries} attempts: {job_id}") + else: + raise ValueError(f"Failed to get job: {response.text}") + + except Exception as e: + last_exception = e + # If not the last attempt, log and retry + if attempt < max_retries - 1: + logger.warning(f"Error getting job {job_id} on attempt {attempt+1}/{max_retries}: {str(e)}, retrying in {retry_delay}s...") + time.sleep(retry_delay) + continue + else: + logger.error(f"Failed to get job {job_id} after {max_retries} attempts: {str(e)}") + raise HTTPException(status_code=404, detail=f"Job not found: {job_id}") + + # If we get here, all retries failed + logger.error(f"Failed to get job {job_id} after {max_retries} attempts") + raise HTTPException(status_code=404, detail=f"Job not found: {job_id}") + + def list_jobs(self) -> List[Dict[str, Any]]: + """List all jobs.""" + try: + # Get the Nomad address from the client + nomad_addr = f"http://{self.client.host}:{self.client.port}" + + # Build the URL for the jobs endpoint + url = f"{nomad_addr}/v1/jobs" + + # Set up headers + headers = {} + if hasattr(self.client, 'token') and self.client.token: + headers["X-Nomad-Token"] = self.client.token + + # Set up params with the correct namespace + params = {"namespace": self.namespace} + + # Make the request directly + import requests + response = requests.get( + url=url, + headers=headers, + params=params, + verify=False if os.getenv("NOMAD_SKIP_VERIFY", "false").lower() == "true" else True + ) + + # Check if the request was successful + if response.status_code == 200: + return response.json() + else: + raise ValueError(f"Failed to list jobs: {response.text}") + except Exception as e: + logger.error(f"Failed to list jobs: {str(e)}") + raise HTTPException(status_code=500, detail=f"Failed to list jobs: {str(e)}") + + def start_job(self, job_spec: Dict[str, Any]) -> Dict[str, Any]: + """ + Start a job using the provided specification. + + Args: + job_spec: The job specification to submit. Can be a raw job spec or wrapped in a "Job" key. + + Returns: + Dict containing job_id, eval_id, status, and any warnings + """ + try: + # Extract job ID from specification + job_id = None + if "Job" in job_spec: + job_id = job_spec["Job"].get("ID") or job_spec["Job"].get("id") + else: + job_id = job_spec.get("ID") or job_spec.get("id") + + if not job_id: + raise ValueError("Job ID is required in the job specification") + + logger.info(f"Processing job start request for job ID: {job_id}") + + # Determine the namespace to use, with clear priorities: + # 1. Explicitly provided in the job spec (highest priority) + # 2. Service instance namespace + # 3. Fallback to "development" + namespace = self.namespace + + # Normalize the job structure to ensure it has a "Job" wrapper + normalized_job_spec = {} + if "Job" in job_spec: + normalized_job_spec = job_spec + # Check if namespace is specified in the job spec + if "Namespace" in job_spec["Job"]: + namespace = job_spec["Job"]["Namespace"] + logger.info(f"Using namespace from job spec: {namespace}") + else: + # Check if namespace is specified in the job spec + if "Namespace" in job_spec: + namespace = job_spec["Namespace"] + logger.info(f"Using namespace from job spec: {namespace}") + + # Wrap the job spec in a "Job" key + normalized_job_spec = {"Job": job_spec} + + # Replace wildcard namespaces with the default + if namespace == "*": + namespace = "development" + logger.info(f"Replaced wildcard namespace with default: {namespace}") + + # Always explicitly set the namespace in the job spec + normalized_job_spec["Job"]["Namespace"] = namespace + + logger.info(f"Submitting job {job_id} to namespace {namespace}") + logger.info(f"Job specification structure: {list(normalized_job_spec.keys())}") + logger.info(f"Job keys: {list(normalized_job_spec['Job'].keys())}") + + # Submit the job - pass the job_id and job spec directly + # The namespace is already set in the job spec + response = self.client.job.register_job(job_id, normalized_job_spec) + + logger.info(f"Job registration response: {response}") + + return { + "job_id": job_id, + "eval_id": response.get("EvalID"), + "status": "started", + "warnings": response.get("Warnings"), + "namespace": namespace + } + except Exception as e: + logger.error(f"Failed to start job: {str(e)}") + raise HTTPException(status_code=500, detail=f"Failed to start job: {str(e)}") + + def stop_job(self, job_id: str, purge: bool = False) -> Dict[str, Any]: + """ + Stop a job by ID. + + Args: + job_id: The ID of the job to stop + purge: If true, the job will be purged from Nomad's state entirely + + Returns: + Dict containing job_id, eval_id, and status + """ + try: + logger.info(f"Stopping job {job_id} in namespace {self.namespace} (purge={purge})") + + # Get the Nomad address from the client + nomad_addr = f"http://{self.client.host}:{self.client.port}" + + # Build the URL for the job endpoint + url = f"{nomad_addr}/v1/job/{job_id}" + + # Set up headers + headers = {} + if hasattr(self.client, 'token') and self.client.token: + headers["X-Nomad-Token"] = self.client.token + + # Set up params with the correct namespace and purge option + params = { + "namespace": self.namespace, + "purge": str(purge).lower() + } + + # Make the request directly + import requests + response = requests.delete( + url=url, + headers=headers, + params=params, + verify=False if os.getenv("NOMAD_SKIP_VERIFY", "false").lower() == "true" else True + ) + + # Check if the request was successful + if response.status_code == 200: + response_data = response.json() + logger.info(f"Job stop response: {response_data}") + + return { + "job_id": job_id, + "eval_id": response_data.get("EvalID"), + "status": "stopped", + "namespace": self.namespace + } + else: + raise ValueError(f"Failed to stop job: {response.text}") + + except Exception as e: + logger.error(f"Failed to stop job {job_id}: {str(e)}") + raise HTTPException(status_code=500, detail=f"Failed to stop job: {str(e)}") + + def get_allocations(self, job_id: str) -> List[Dict[str, Any]]: + """Get all allocations for a job.""" + try: + # Get the Nomad address from the client + nomad_addr = f"http://{self.client.host}:{self.client.port}" + + # Build the URL for the job allocations endpoint + url = f"{nomad_addr}/v1/job/{job_id}/allocations" + + # Set up headers + headers = {} + if hasattr(self.client, 'token') and self.client.token: + headers["X-Nomad-Token"] = self.client.token + + # Set up params with the correct namespace + params = {"namespace": self.namespace} + + # Make the request directly + import requests + response = requests.get( + url=url, + headers=headers, + params=params, + verify=False if os.getenv("NOMAD_SKIP_VERIFY", "false").lower() == "true" else True + ) + + # Check if the request was successful + if response.status_code == 200: + return response.json() + elif response.status_code == 404: + logger.warning(f"No allocations found for job {job_id}") + return [] + else: + raise ValueError(f"Failed to get allocations: {response.text}") + except Exception as e: + logger.error(f"Failed to get allocations for job {job_id}: {str(e)}") + raise HTTPException(status_code=500, detail=f"Failed to get allocations: {str(e)}") + + def get_allocation_logs(self, alloc_id: str, task: str, log_type: str = "stderr") -> str: + """Get logs for a specific allocation and task.""" + try: + # More detailed debugging to understand what's happening + logger.info(f"Getting logs for allocation {alloc_id}, task {task}, type {log_type}") + + if alloc_id == "repository": + logger.error("Invalid allocation ID 'repository' detected") + return f"Error: Invalid allocation ID 'repository'" + + # Verify the allocation ID is a valid UUID (must be 36 characters) + if not alloc_id or len(alloc_id) != 36: + logger.error(f"Invalid allocation ID format: {alloc_id} (length: {len(alloc_id) if alloc_id else 0})") + return f"Error: Invalid allocation ID format - must be 36 character UUID" + + # Get allocation info to verify it exists + try: + allocation = self.client.allocation.get_allocation(alloc_id) + if not allocation: + logger.warning(f"Allocation {alloc_id} not found") + return f"Allocation {alloc_id} not found" + except Exception as e: + logger.error(f"Error checking allocation: {str(e)}") + return f"Error checking allocation: {str(e)}" + + # Try multiple approaches to get logs + log_content = None + error_messages = [] + + # Approach 1: Standard API + try: + logger.info(f"Attempting to get logs using standard API") + logs = self.client.allocation.logs.get_logs( + alloc_id, + task, + log_type, + plain=True + ) + + if logs: + if isinstance(logs, dict) and logs.get("Data"): + log_content = logs.get("Data") + logger.info(f"Successfully retrieved logs using standard API") + elif isinstance(logs, str): + log_content = logs + logger.info(f"Successfully retrieved logs as string") + else: + error_messages.append(f"Unexpected log format: {type(logs)}") + logger.warning(f"Unexpected log format: {type(logs)}") + else: + error_messages.append("No logs returned from standard API") + logger.warning("No logs returned from standard API") + except Exception as e: + error_str = str(e) + error_messages.append(f"Standard API error: {error_str}") + logger.warning(f"Standard API failed: {error_str}") + + # Approach 2: Try raw HTTP if the standard API didn't work + if not log_content: + try: + import requests + + # Get the Nomad address from environment or use default + nomad_addr = os.getenv("NOMAD_ADDR", "http://localhost:4646").rstrip('/') + nomad_token = os.getenv("NOMAD_TOKEN") + + # Construct the URL for logs + logs_url = f"{nomad_addr}/v1/client/fs/logs/{alloc_id}" + + # Setup headers + headers = {} + if nomad_token: + headers["X-Nomad-Token"] = nomad_token + + # Setup query parameters + params = { + "task": task, + "type": log_type, + "plain": "true" + } + + if self.namespace and self.namespace != "*": + params["namespace"] = self.namespace + + logger.info(f"Attempting to get logs using direct HTTP request to: {logs_url}") + response = requests.get(logs_url, headers=headers, params=params, verify=False) + + if response.status_code == 200: + log_content = response.text + logger.info(f"Successfully retrieved logs using direct HTTP request") + else: + error_messages.append(f"HTTP request failed with status {response.status_code}: {response.text}") + logger.warning(f"HTTP request failed: {response.status_code} - {response.text}") + except ImportError: + error_messages.append("Requests library not available for fallback HTTP request") + logger.warning("Requests library not available for fallback HTTP request") + except Exception as e: + error_str = str(e) + error_messages.append(f"HTTP request error: {error_str}") + logger.warning(f"HTTP request failed: {error_str}") + + # Approach 3: Direct system call as a last resort + if not log_content: + try: + import subprocess + + # Get the Nomad command-line client path + nomad_cmd = "nomad" # Default, assumes nomad is in PATH + + # Build the command + cmd_parts = [ + nomad_cmd, + "alloc", "logs", + "-verbose", + ] + + # Add namespace if specified + if self.namespace and self.namespace != "*": + cmd_parts.extend(["-namespace", self.namespace]) + + # Add allocation and task info + cmd_parts.extend(["-job", alloc_id, task]) + + # Use stderr or stdout + if log_type == "stderr": + cmd_parts.append("-stderr") + else: + cmd_parts.append("-stdout") + + logger.info(f"Attempting to get logs using command: {' '.join(cmd_parts)}") + process = subprocess.run(cmd_parts, capture_output=True, text=True) + + if process.returncode == 0: + log_content = process.stdout + logger.info(f"Successfully retrieved logs using command-line client") + else: + error_messages.append(f"Command-line client failed: {process.stderr}") + logger.warning(f"Command-line client failed: {process.stderr}") + except Exception as e: + error_str = str(e) + error_messages.append(f"Command-line client error: {error_str}") + logger.warning(f"Command-line client failed: {error_str}") + + # Return the logs if we got them, otherwise return error + if log_content: + return log_content + else: + error_msg = "; ".join(error_messages) + logger.error(f"Failed to get logs after multiple attempts: {error_msg}") + return f"Error retrieving {log_type} logs: {error_msg}" + + except Exception as e: + error_str = str(e) + logger.error(f"Failed to get logs for allocation {alloc_id}, task {task}: {error_str}") + raise HTTPException(status_code=500, detail=f"Failed to get logs: {error_str}") + + def get_deployment_status(self, job_id: str) -> Dict[str, Any]: + """Get the deployment status for a job.""" + try: + return self.client.job.get_deployment(job_id, namespace=self.namespace) + except Exception as e: + logger.error(f"Failed to get deployment status for job {job_id}: {str(e)}") + raise HTTPException(status_code=500, detail=f"Failed to get deployment status: {str(e)}") + + def get_job_evaluations(self, job_id: str) -> List[Dict[str, Any]]: + """Get evaluations for a job.""" + try: + return self.client.job.get_evaluations(job_id, namespace=self.namespace) + except Exception as e: + logger.error(f"Failed to get evaluations for job {job_id}: {str(e)}") + raise HTTPException(status_code=500, detail=f"Failed to get evaluations: {str(e)}") \ No newline at end of file diff --git a/check_path.py b/check_path.py new file mode 100644 index 0000000..e9267f5 --- /dev/null +++ b/check_path.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python +""" +Script to check Python path and help diagnose import issues. +""" + +import sys +import os + +def main(): + print("Current working directory:", os.getcwd()) + print("\nPython path:") + for path in sys.path: + print(f" - {path}") + + print("\nChecking for app directory:") + if os.path.exists("app"): + print("✅ 'app' directory exists in current working directory") + print("Contents of app directory:") + for item in os.listdir("app"): + print(f" - {item}") + else: + print("❌ 'app' directory does not exist in current working directory") + + print("\nChecking for app module:") + try: + import app + print("✅ 'app' module can be imported") + print(f"app module location: {app.__file__}") + except ImportError as e: + print(f"❌ Cannot import 'app' module: {e}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/claude_nomad_tool.json b/claude_nomad_tool.json new file mode 100644 index 0000000..b6ac1ee --- /dev/null +++ b/claude_nomad_tool.json @@ -0,0 +1,71 @@ +{ + "tools": [ + { + "name": "nomad_mcp", + "description": "Manage Nomad jobs through the MCP service", + "api_endpoints": [ + { + "name": "list_jobs", + "description": "List all jobs in a namespace", + "method": "GET", + "url": "http://127.0.0.1:8000/api/claude/list-jobs", + "params": [ + { + "name": "namespace", + "type": "string", + "description": "Nomad namespace", + "required": false, + "default": "development" + } + ] + }, + { + "name": "manage_job", + "description": "Manage a job (status, stop, restart)", + "method": "POST", + "url": "http://127.0.0.1:8000/api/claude/jobs", + "body": { + "job_id": "string", + "action": "string", + "namespace": "string", + "purge": "boolean" + } + }, + { + "name": "create_job", + "description": "Create a new job", + "method": "POST", + "url": "http://127.0.0.1:8000/api/claude/create-job", + "body": { + "job_id": "string", + "name": "string", + "type": "string", + "datacenters": "array", + "namespace": "string", + "docker_image": "string", + "count": "integer", + "cpu": "integer", + "memory": "integer", + "ports": "array", + "env_vars": "object" + } + }, + { + "name": "get_job_logs", + "description": "Get logs for a job", + "method": "GET", + "url": "http://127.0.0.1:8000/api/claude/job-logs/{job_id}", + "params": [ + { + "name": "namespace", + "type": "string", + "description": "Nomad namespace", + "required": false, + "default": "development" + } + ] + } + ] + } + ] +} \ No newline at end of file diff --git a/cleanup_test_jobs.py b/cleanup_test_jobs.py new file mode 100644 index 0000000..61231bc --- /dev/null +++ b/cleanup_test_jobs.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python +""" +Script to clean up test jobs from Nomad. +""" + +import os +import sys +from dotenv import load_dotenv +from app.services.nomad_client import NomadService + +# Load environment variables from .env file +load_dotenv() + +def main(): + print("Cleaning up test jobs from Nomad...") + + # Check if NOMAD_ADDR is configured + nomad_addr = os.getenv("NOMAD_ADDR") + if not nomad_addr: + print("Error: NOMAD_ADDR is not configured in .env file.") + sys.exit(1) + + print(f"Connecting to Nomad at: {nomad_addr}") + + try: + # Initialize the Nomad service + nomad_service = NomadService() + + # List all jobs + print("\nListing all jobs...") + jobs = nomad_service.list_jobs() + print(f"Found {len(jobs)} jobs") + + # Filter for test jobs (starting with "test-") + test_jobs = [job for job in jobs if job.get('ID', '').startswith('test-')] + print(f"Found {len(test_jobs)} test jobs:") + + # Print each test job's ID and status + for job in test_jobs: + print(f" - {job.get('ID')}: {job.get('Status')}") + + # Confirm before proceeding + if test_jobs: + print("\nDo you want to stop and purge all these test jobs? (y/n)") + response = input().strip().lower() + + if response == 'y': + print("\nStopping and purging test jobs...") + + for job in test_jobs: + job_id = job.get('ID') + try: + print(f"Stopping and purging job: {job_id}...") + stop_response = nomad_service.stop_job(job_id, purge=True) + print(f" - Success: {stop_response}") + except Exception as e: + print(f" - Error stopping job {job_id}: {str(e)}") + + print("\nCleanup completed.") + else: + print("\nCleanup cancelled.") + else: + print("\nNo test jobs found to clean up.") + + except Exception as e: + print(f"Error during cleanup: {str(e)}") + sys.exit(1) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/configs/example.yaml b/configs/example.yaml new file mode 100644 index 0000000..6758394 --- /dev/null +++ b/configs/example.yaml @@ -0,0 +1,9 @@ +repository: https://github.com/example/my-service +job_id: my-service +description: Example service managed by MCP +meta: + owner: ai-team + environment: development + tags: + - api + - example \ No newline at end of file diff --git a/configs/ms-qc-db.yaml b/configs/ms-qc-db.yaml new file mode 100644 index 0000000..f0f9d5c --- /dev/null +++ b/configs/ms-qc-db.yaml @@ -0,0 +1,11 @@ +repository: https://gitea.dev.meisheng.group/Mei_Sheng_Textiles/MS_QC_DB +repository_alias: ms-qc-db +job_id: ms-qc-db-dev +namespace: development +description: MS QC Database application for quality control tracking +meta: + owner: ms-team + environment: development + tags: + - database + - qc \ No newline at end of file diff --git a/configs/test-service.yaml b/configs/test-service.yaml new file mode 100644 index 0000000..0a3d9e7 --- /dev/null +++ b/configs/test-service.yaml @@ -0,0 +1,10 @@ +repository: http://gitea.internal/username/test-service +repository_alias: test-service +job_id: test-service +description: Test service managed by MCP for Gitea integration +meta: + owner: ai-team + environment: development + tags: + - test + - api \ No newline at end of file diff --git a/deploy_nomad_mcp.py b/deploy_nomad_mcp.py new file mode 100644 index 0000000..6c0eebe --- /dev/null +++ b/deploy_nomad_mcp.py @@ -0,0 +1,152 @@ +#!/usr/bin/env python +""" +Script to deploy the Nomad MCP service using our own Nomad client. +""" + +import os +import sys +import json +from dotenv import load_dotenv +from app.services.nomad_client import NomadService + +# Load environment variables from .env file +load_dotenv() + +def read_job_spec(file_path): + """Read the Nomad job specification from a file.""" + try: + with open(file_path, 'r') as f: + content = f.read() + + # Convert HCL to JSON (simplified approach) + # In a real scenario, you might want to use a proper HCL parser + # This is a very basic approach that assumes the job spec is valid + job_id = "nomad-mcp" + + # Create a basic job structure + job_spec = { + "ID": job_id, + "Name": job_id, + "Type": "service", + "Datacenters": ["jm"], + "Namespace": "development", + "TaskGroups": [ + { + "Name": "app", + "Count": 1, + "Networks": [ + { + "DynamicPorts": [ + { + "Label": "http", + "To": 8000 + } + ] + } + ], + "Tasks": [ + { + "Name": "nomad-mcp", + "Driver": "docker", + "Config": { + "image": "registry.dev.meisheng.group/nomad_mcp:20250226", + "ports": ["http"], + "command": "python", + "args": ["-m", "uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] + }, + "Env": { + "NOMAD_ADDR": "http://pjmldk01.ds.meisheng.group:4646", + "NOMAD_NAMESPACE": "development", + "NOMAD_SKIP_VERIFY": "true", + "PORT": "8000", + "HOST": "0.0.0.0", + "LOG_LEVEL": "INFO", + "RELOAD": "true" + }, + "Resources": { + "CPU": 200, + "MemoryMB": 256 + }, + "Services": [ + { + "Name": "nomad-mcp", + "PortLabel": "http", + "Tags": [ + "traefik.enable=true", + "traefik.http.routers.nomad-mcp.entryPoints=https", + "traefik.http.routers.nomad-mcp.rule=Host(`nomad_mcp.dev.meisheng.group`)", + "traefik.http.routers.nomad-mcp.middlewares=proxyheaders@consulcatalog" + ], + "Checks": [ + { + "Type": "http", + "Path": "/api/health", + "Interval": 10000000000, + "Timeout": 2000000000, + "CheckRestart": { + "Limit": 3, + "Grace": 60000000000 + } + } + ] + } + ] + } + ] + } + ], + "Update": { + "MaxParallel": 1, + "MinHealthyTime": 30000000000, + "HealthyDeadline": 300000000000, + "AutoRevert": True + } + } + + return job_spec + except Exception as e: + print(f"Error reading job specification: {str(e)}") + sys.exit(1) + +def main(): + print("Deploying Nomad MCP service using our own Nomad client...") + + # Check if NOMAD_ADDR is configured + nomad_addr = os.getenv("NOMAD_ADDR") + if not nomad_addr: + print("Error: NOMAD_ADDR is not configured in .env file.") + sys.exit(1) + + print(f"Connecting to Nomad at: {nomad_addr}") + + try: + # Initialize the Nomad service + nomad_service = NomadService() + + # Read the job specification + job_spec = read_job_spec("nomad_mcp_job.nomad") + print("Job specification loaded successfully.") + + # Start the job + print("Registering and starting the nomad-mcp job...") + response = nomad_service.start_job(job_spec) + + print("\nJob registration response:") + print(json.dumps(response, indent=2)) + + if response.get("status") == "started": + print("\n✅ Nomad MCP service deployed successfully!") + print(f"Job ID: {response.get('job_id')}") + print(f"Evaluation ID: {response.get('eval_id')}") + print("\nThe service will be available at: https://nomad_mcp.dev.meisheng.group") + else: + print("\n❌ Failed to deploy Nomad MCP service.") + print(f"Status: {response.get('status')}") + print(f"Message: {response.get('message', 'Unknown error')}") + + except Exception as e: + print(f"Error deploying Nomad MCP service: {str(e)}") + sys.exit(1) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/deploy_with_claude_api.py b/deploy_with_claude_api.py new file mode 100644 index 0000000..1b186d4 --- /dev/null +++ b/deploy_with_claude_api.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python +""" +Script to deploy the Nomad MCP service using the Claude API. +""" + +import os +import sys +import json +import requests +from dotenv import load_dotenv + +# Load environment variables from .env file +load_dotenv() + +def main(): + print("Deploying Nomad MCP service using the Claude API...") + + # Define the API endpoint + api_url = "http://localhost:8000/api/claude/create-job" + + # Create the job specification for the Claude API + job_spec = { + "job_id": "nomad-mcp", + "name": "Nomad MCP Service", + "type": "service", + "datacenters": ["jm"], + "namespace": "development", + "docker_image": "registry.dev.meisheng.group/nomad_mcp:20250226", + "count": 1, + "cpu": 200, + "memory": 256, + "ports": [ + { + "Label": "http", + "Value": 0, + "To": 8000 + } + ], + "env_vars": { + "NOMAD_ADDR": "http://pjmldk01.ds.meisheng.group:4646", + "NOMAD_NAMESPACE": "development", + "NOMAD_SKIP_VERIFY": "true", + "PORT": "8000", + "HOST": "0.0.0.0", + "LOG_LEVEL": "INFO", + "RELOAD": "true" + }, + # Note: The Claude API doesn't directly support command and args, + # so we'll need to add a note about this limitation + } + + try: + # Make the API request + print("Sending request to Claude API...") + response = requests.post( + api_url, + json=job_spec, + headers={"Content-Type": "application/json"} + ) + + # Check if the request was successful + if response.status_code == 200: + result = response.json() + print("\nJob registration response:") + print(json.dumps(result, indent=2)) + + if result.get("success"): + print("\n✅ Nomad MCP service deployed successfully!") + print(f"Job ID: {result.get('job_id')}") + print(f"Status: {result.get('status')}") + print("\nThe service will be available at: https://nomad_mcp.dev.meisheng.group") + + # Add Traefik configuration and command information + print("\nImportant Notes:") + print("1. The Claude API doesn't directly support adding Traefik tags.") + print(" You may need to update the job manually to add the following tags:") + print(" - traefik.enable=true") + print(" - traefik.http.routers.nomad-mcp.entryPoints=https") + print(" - traefik.http.routers.nomad-mcp.rule=Host(`nomad_mcp.dev.meisheng.group`)") + print(" - traefik.http.routers.nomad-mcp.middlewares=proxyheaders@consulcatalog") + print("\n2. The Claude API doesn't directly support specifying command and args.") + print(" You need to update the job manually to add the following:") + print(" - command: python") + print(" - args: [\"-m\", \"uvicorn\", \"app.main:app\", \"--host\", \"0.0.0.0\", \"--port\", \"8000\"]") + else: + print("\n❌ Failed to deploy Nomad MCP service.") + print(f"Message: {result.get('message', 'Unknown error')}") + else: + print(f"\n❌ API request failed with status code: {response.status_code}") + print(f"Response: {response.text}") + + except Exception as e: + print(f"Error deploying Nomad MCP service: {str(e)}") + sys.exit(1) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..d1e4c88 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,14 @@ +version: '3' + +services: + nomad-mcp: + build: . + ports: + - "8000:8000" + volumes: + - ./configs:/app/configs + env_file: + - .env + environment: + - CONFIG_DIR=/app/configs + restart: unless-stopped \ No newline at end of file diff --git a/job_spec.json b/job_spec.json new file mode 100644 index 0000000..477153d --- /dev/null +++ b/job_spec.json @@ -0,0 +1,307 @@ +{ + "Job": { + "Stop": false, + "Region": "global", + "Namespace": "development", + "ID": "ms-qc-db-dev", + "ParentID": "", + "Name": "ms-qc-db-dev", + "Type": "service", + "Priority": 50, + "AllAtOnce": false, + "Datacenters": [ + "jm" + ], + "NodePool": "default", + "Constraints": null, + "Affinities": null, + "Spreads": null, + "TaskGroups": [ + { + "Name": "app", + "Count": 1, + "Update": { + "Stagger": 30000000000, + "MaxParallel": 1, + "HealthCheck": "checks", + "MinHealthyTime": 10000000000, + "HealthyDeadline": 300000000000, + "ProgressDeadline": 600000000000, + "AutoRevert": false, + "AutoPromote": false, + "Canary": 0 + }, + "Migrate": { + "MaxParallel": 1, + "HealthCheck": "checks", + "MinHealthyTime": 10000000000, + "HealthyDeadline": 300000000000 + }, + "Constraints": [ + { + "LTarget": "${attr.consul.version}", + "RTarget": "\u003e= 1.8.0", + "Operand": "semver" + } + ], + "Scaling": null, + "RestartPolicy": { + "Attempts": 2, + "Interval": 1800000000000, + "Delay": 15000000000, + "Mode": "fail", + "RenderTemplates": false + }, + "Tasks": [ + { + "Name": "ms-qc-db", + "Driver": "docker", + "User": "", + "Config": { + "command": "uvicorn", + "args": [ + "app.main:app", + "--host", + "0.0.0.0", + "--port", + "8000", + "--workers", + "2", + "--proxy-headers", + "--forwarded-allow-ips", + "*" + ], + "image": "registry.dev.meisheng.group/ms_qc_db:20250211", + "force_pull": true, + "ports": [ + "http" + ] + }, + "Env": { + "PYTHONPATH": "/local/MS_QC_DB", + "LOG_LEVEL": "INFO", + "USE_SQLITE": "false" + }, + "Services": null, + "Vault": null, + "Consul": null, + "Templates": [ + { + "SourcePath": "", + "DestPath": "secrets/app.env", + "EmbeddedTmpl": "{{with secret \"infrastructure/nomad/msqc\"}}\nDB_USER=\"{{ .Data.data.DB_USER }}\"\nDB_PASSWORD=\"{{ .Data.data.DB_PASSWORD }}\"\nDB_HOST=\"{{ .Data.data.DB_HOST }}\"\nDB_PORT=\"{{ .Data.data.DB_PORT }}\"\nDB_NAME=\"qc_rolls_dev\"\nWEBHOOK_SECRET=\"{{ .Data.data.WEBHOOK_SECRET }}\"\n{{end}}\n", + "ChangeMode": "restart", + "ChangeSignal": "", + "ChangeScript": null, + "Splay": 5000000000, + "Perms": "0644", + "Uid": null, + "Gid": null, + "LeftDelim": "{{", + "RightDelim": "}}", + "Envvars": true, + "VaultGrace": 0, + "Wait": null, + "ErrMissingKey": false + } + ], + "Constraints": null, + "Affinities": null, + "Resources": { + "CPU": 500, + "Cores": 0, + "MemoryMB": 512, + "MemoryMaxMB": 0, + "DiskMB": 0, + "IOPS": 0, + "Networks": null, + "Devices": null, + "NUMA": null + }, + "RestartPolicy": { + "Attempts": 2, + "Interval": 1800000000000, + "Delay": 15000000000, + "Mode": "fail", + "RenderTemplates": false + }, + "DispatchPayload": null, + "Lifecycle": null, + "Meta": null, + "KillTimeout": 5000000000, + "LogConfig": { + "MaxFiles": 10, + "MaxFileSizeMB": 10, + "Disabled": false + }, + "Artifacts": [ + { + "GetterSource": "git::ssh://git@gitea.service.mesh:2222/Mei_Sheng_Textiles/MS_QC_DB.git", + "GetterOptions": { + "sshkey": "LS0tLS1CRUdJTiBPUEVOU1NIIFBSSVZBVEUgS0VZLS0tLS0KYjNCbGJuTnphQzFyWlhrdGRqRUFBQUFBQkc1dmJtVUFBQUFFYm05dVpRQUFBQUFBQUFBQkFBQUFNd0FBQUF0emMyZ3RaVwpReU5UVXhPUUFBQUNENHJwM05hZXA4K2lwVnlOZXNEbEVKckE0Rlg3MXA5VW5BWmxZcEJCNDh6d0FBQUppQ1ZWczhnbFZiClBBQUFBQXR6YzJndFpXUXlOVFV4T1FBQUFDRDRycDNOYWVwOCtpcFZ5TmVzRGxFSnJBNEZYNzFwOVVuQVpsWXBCQjQ4encKQUFBRUNuckxjc1JDeUQyNmRnQ3dqdG5PUnNOK1VzUjdxZ1pqbXZpU2tVNmozalVmaXVuYzFwNm56NktsWEkxNndPVVFtcwpEZ1ZmdlduMVNjQm1WaWtFSGp6UEFBQUFFMjF6WDNGalgyUmlYMlJsY0d4dmVTQnJaWGtCQWc9PQotLS0tLUVORCBPUEVOU1NIIFBSSVZBVEUgS0VZLS0tLS0K", + "ref": "main" + }, + "GetterHeaders": null, + "GetterMode": "any", + "RelativeDest": "local/MS_QC_DB" + } + ], + "Leader": false, + "ShutdownDelay": 0, + "VolumeMounts": null, + "ScalingPolicies": null, + "KillSignal": "", + "Kind": "", + "CSIPluginConfig": null, + "Identity": { + "Name": "default", + "Audience": [ + "nomadproject.io" + ], + "ChangeMode": "", + "ChangeSignal": "", + "Env": false, + "File": false, + "ServiceName": "", + "TTL": 0 + }, + "Identities": null, + "Actions": null + } + ], + "EphemeralDisk": { + "Sticky": false, + "SizeMB": 300, + "Migrate": false + }, + "Meta": null, + "ReschedulePolicy": { + "Attempts": 0, + "Interval": 0, + "Delay": 30000000000, + "DelayFunction": "exponential", + "MaxDelay": 3600000000000, + "Unlimited": true + }, + "Affinities": null, + "Spreads": null, + "Networks": [ + { + "Mode": "", + "Device": "", + "CIDR": "", + "IP": "", + "Hostname": "", + "MBits": 0, + "DNS": null, + "ReservedPorts": null, + "DynamicPorts": [ + { + "Label": "http", + "Value": 0, + "To": 8000, + "HostNetwork": "default" + } + ] + } + ], + "Consul": { + "Namespace": "", + "Cluster": "default", + "Partition": "" + }, + "Services": [ + { + "Name": "${NOMAD_JOB_NAME}", + "TaskName": "", + "PortLabel": "http", + "AddressMode": "auto", + "Address": "", + "EnableTagOverride": false, + "Tags": [ + "traefik.http.routers.${NOMAD_JOB_NAME}.entryPoints=https", + "traefik.http.routers.${NOMAD_JOB_NAME}.rule=Host(`dev_qc.dev.meisheng.group`)", + "traefik.http.routers.${NOMAD_JOB_NAME}.middlewares=proxyheaders@consulcatalog", + "traefik.enable=true" + ], + "CanaryTags": null, + "Checks": [ + { + "Name": "service: \"${NOMAD_JOB_NAME}\" check", + "Type": "http", + "Command": "", + "Args": null, + "Path": "/api/v1/health", + "Protocol": "", + "PortLabel": "http", + "Expose": false, + "AddressMode": "", + "Interval": 10000000000, + "Timeout": 2000000000, + "InitialStatus": "", + "TLSServerName": "", + "TLSSkipVerify": false, + "Method": "", + "Header": null, + "CheckRestart": null, + "GRPCService": "", + "GRPCUseTLS": false, + "TaskName": "", + "SuccessBeforePassing": 0, + "FailuresBeforeCritical": 0, + "FailuresBeforeWarning": 0, + "Body": "", + "OnUpdate": "require_healthy" + } + ], + "Connect": null, + "Meta": null, + "CanaryMeta": null, + "TaggedAddresses": null, + "Namespace": "default", + "OnUpdate": "require_healthy", + "Provider": "consul", + "Cluster": "default", + "Identity": null + } + ], + "Volumes": null, + "ShutdownDelay": null, + "StopAfterClientDisconnect": null, + "MaxClientDisconnect": null, + "PreventRescheduleOnLost": false + } + ], + "Update": { + "Stagger": 30000000000, + "MaxParallel": 1, + "HealthCheck": "", + "MinHealthyTime": 0, + "HealthyDeadline": 0, + "ProgressDeadline": 0, + "AutoRevert": false, + "AutoPromote": false, + "Canary": 0 + }, + "Multiregion": null, + "Periodic": null, + "ParameterizedJob": null, + "Dispatched": false, + "DispatchIdempotencyToken": "", + "Payload": null, + "Meta": null, + "ConsulToken": "", + "ConsulNamespace": "", + "VaultToken": "", + "VaultNamespace": "", + "NomadTokenID": "", + "Status": "dead", + "StatusDescription": "", + "Stable": true, + "Version": 4, + "SubmitTime": 1740554361561458507, + "CreateIndex": 3415698, + "ModifyIndex": 3416318, + "JobModifyIndex": 3416317 + } +} diff --git a/nomad_job_api_docs.md b/nomad_job_api_docs.md new file mode 100644 index 0000000..fae7b61 --- /dev/null +++ b/nomad_job_api_docs.md @@ -0,0 +1,182 @@ +# Nomad Job Management API Documentation + +## Overview + +This document outlines the process for managing jobs (starting, stopping, and monitoring) in Hashicorp Nomad via its HTTP API. These operations are essential for deploying, updating, and terminating workloads in a Nomad cluster. + +## Prerequisites + +- A running Nomad cluster +- Network access to the Nomad API endpoint (default port 4646) +- Proper authentication credentials (if ACLs are enabled) + +## API Basics + +- Base URL: `http://:4646` +- API Version: `v1` +- Content Type: `application/json` + +## Job Lifecycle + +A Nomad job goes through multiple states during its lifecycle: + +1. **Pending**: The job has been submitted but not yet scheduled +2. **Running**: The job is active and its tasks are running +3. **Dead**: The job has been stopped or failed + +## Job Management Operations + +### 1. List Jobs + +List all jobs in a namespace to get an overview of the cluster's workloads. + +``` +GET /v1/jobs?namespace= +``` + +Example PowerShell command: +```powershell +Invoke-RestMethod -Uri "http://nomad-server:4646/v1/jobs?namespace=development" -Method GET +``` + +### 2. Starting a Job + +Starting a job in Nomad involves registering a job specification with the API server. + +``` +POST /v1/jobs +``` + +With a job specification in the request body: + +```json +{ + "Job": { + "ID": "example-job", + "Name": "example-job", + "Namespace": "development", + "Type": "service", + "Datacenters": ["dc1"], + "TaskGroups": [ + { + "Name": "app", + "Count": 1, + "Tasks": [ + { + "Name": "server", + "Driver": "docker", + "Config": { + "image": "nginx:latest" + } + } + ] + } + ] + } +} +``` + +Example PowerShell command: +```powershell +$jobSpec = @{ + Job = @{ + ID = "example-job" + # ... other job properties + } +} | ConvertTo-Json -Depth 20 + +Invoke-RestMethod -Uri "http://nomad-server:4646/v1/jobs" -Method POST -Body $jobSpec -ContentType "application/json" +``` + +To start an existing (stopped) job: +1. Retrieve the job specification with `GET /v1/job/?namespace=` +2. Set `Stop = false` in the job specification +3. Submit the modified spec with `POST /v1/jobs` + +### 3. Stopping a Job + +Stopping a job is simpler and requires a DELETE request: + +``` +DELETE /v1/job/?namespace= +``` + +This marks the job for stopping but preserves its configuration in Nomad. + +Example PowerShell command: +```powershell +Invoke-RestMethod -Uri "http://nomad-server:4646/v1/job/example-job?namespace=development" -Method DELETE +``` + +Optional parameters: +- `purge=true` - Completely removes the job from Nomad's state + +### 4. Reading Job Status + +To check the status of a job: + +``` +GET /v1/job/?namespace= +``` + +This returns detailed information about the job, including: +- Current status (`running`, `pending`, `dead`) +- Task group count and health +- Version information + +Example PowerShell command: +```powershell +Invoke-RestMethod -Uri "http://nomad-server:4646/v1/job/example-job?namespace=development" -Method GET +``` + +### 5. Reading Job Allocations + +To see all allocations (instances) of a job: + +``` +GET /v1/job//allocations?namespace= +``` + +This returns information about where the job is running and in what state. + +Example PowerShell command: +```powershell +Invoke-RestMethod -Uri "http://nomad-server:4646/v1/job/example-job/allocations?namespace=development" -Method GET +``` + +## Common Issues and Troubleshooting + +### Namespace Issues + +Nomad requires specifying the correct namespace when managing jobs. If not specified, operations will default to the "default" namespace, which may not contain your jobs. + +### Job Specification Formatting + +When starting a job, ensure the job specification is properly wrapped in a "Job" object: + +```json +{ + "Job": { + // job details go here + } +} +``` + +### Error Codes + +- **400**: Bad request, often due to malformed job specification +- **403**: Permission denied, check ACL tokens +- **404**: Job not found, verify job ID and namespace +- **500**: Server error, check Nomad server logs + +## Best Practices + +1. Always specify the namespace explicitly in API calls +2. Use the job's existing specification when updating, to avoid losing configuration +3. Log API responses to aid in troubleshooting +4. Implement proper error handling for API failures +5. Consider using official client libraries instead of direct API calls when possible + +## Conclusion + +The Nomad HTTP API provides a robust interface for job lifecycle management. Understanding these API workflows is crucial for building reliable automation and integration with Nomad clusters. \ No newline at end of file diff --git a/nomad_mcp_job.nomad b/nomad_mcp_job.nomad new file mode 100644 index 0000000..749b76b --- /dev/null +++ b/nomad_mcp_job.nomad @@ -0,0 +1,79 @@ +job "nomad-mcp" { + datacenters = ["jm"] + type = "service" + namespace = "development" + + group "app" { + count = 1 + + network { + port "http" { + to = 8000 + } + } + + task "nomad-mcp" { + driver = "docker" + + config { + image = "registry.dev.meisheng.group/nomad_mcp:20250226" + ports = ["http"] + command = "python" + args = ["-m", "uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] + } + + env { + # Nomad connection settings + NOMAD_ADDR = "http://pjmldk01.ds.meisheng.group:4646" + NOMAD_NAMESPACE = "development" + NOMAD_SKIP_VERIFY = "true" + + # API settings + PORT = "8000" + HOST = "0.0.0.0" + + # Logging level + LOG_LEVEL = "INFO" + + # Enable to make development easier + RELOAD = "true" + } + + resources { + cpu = 200 + memory = 256 + } + + service { + name = "nomad-mcp" + port = "http" + tags = [ + "traefik.enable=true", + "traefik.http.routers.nomad-mcp.entryPoints=https", + "traefik.http.routers.nomad-mcp.rule=Host(`nomad_mcp.dev.meisheng.group`)", + "traefik.http.routers.nomad-mcp.middlewares=proxyheaders@consulcatalog" + ] + + check { + type = "http" + path = "/api/health" + interval = "10s" + timeout = "2s" + + check_restart { + limit = 3 + grace = "60s" + } + } + } + } + } + + # Define update strategy + update { + max_parallel = 1 + min_healthy_time = "30s" + healthy_deadline = "5m" + auto_revert = true + } +} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f2832d7 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,9 @@ +fastapi +uvicorn +python-nomad +pydantic +python-dotenv +httpx +python-multipart +pyyaml +requests \ No newline at end of file diff --git a/run.py b/run.py new file mode 100644 index 0000000..63e0503 --- /dev/null +++ b/run.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python +import uvicorn +import os +from dotenv import load_dotenv + +# Load environment variables +load_dotenv() + +# Configuration from environment +host = os.getenv("HOST", "0.0.0.0") +port = int(os.getenv("PORT", "8000")) +reload = os.getenv("RELOAD", "false").lower() == "true" + +if __name__ == "__main__": + print(f"Starting Nomad MCP service on {host}:{port}") + print(f"API documentation available at http://{host}:{port}/docs") + + uvicorn.run( + "app.main:app", + host=host, + port=port, + reload=reload, + ) \ No newline at end of file diff --git a/static/app.js b/static/app.js new file mode 100644 index 0000000..becd58b --- /dev/null +++ b/static/app.js @@ -0,0 +1,355 @@ +// API endpoints +const API_BASE_URL = '/api/claude'; +const ENDPOINTS = { + listJobs: `${API_BASE_URL}/list-jobs`, + manageJob: `${API_BASE_URL}/jobs`, + jobLogs: `${API_BASE_URL}/job-logs` +}; + +// DOM elements +const elements = { + namespaceSelector: document.getElementById('namespace-selector'), + refreshBtn: document.getElementById('refresh-btn'), + jobList: document.getElementById('job-list'), + jobTable: document.getElementById('job-table'), + jobDetails: document.getElementById('job-details'), + logContent: document.getElementById('log-content'), + logTabs: document.querySelectorAll('.log-tab'), + loading: document.getElementById('loading'), + errorMessage: document.getElementById('error-message') +}; + +// State +let state = { + jobs: [], + selectedJob: null, + selectedNamespace: 'development', + logs: { + stdout: '', + stderr: '', + currentTab: 'stdout' + } +}; + +// Initialize the app +function init() { + // Set up event listeners + elements.namespaceSelector.addEventListener('change', handleNamespaceChange); + elements.refreshBtn.addEventListener('click', loadJobs); + elements.logTabs.forEach(tab => { + tab.addEventListener('click', () => { + const logType = tab.getAttribute('data-log-type'); + switchLogTab(logType); + }); + }); + + // Load initial jobs + loadJobs(); +} + +// Load jobs from the API +async function loadJobs() { + showLoading(true); + hideError(); + + try { + const namespace = elements.namespaceSelector.value; + const response = await fetch(`${ENDPOINTS.listJobs}?namespace=${namespace}`); + + if (!response.ok) { + throw new Error(`Failed to load jobs: ${response.statusText}`); + } + + const jobs = await response.json(); + state.jobs = jobs; + state.selectedNamespace = namespace; + + renderJobList(); + showLoading(false); + } catch (error) { + console.error('Error loading jobs:', error); + showError(`Failed to load jobs: ${error.message}`); + showLoading(false); + } +} + +// Render the job list +function renderJobList() { + elements.jobList.innerHTML = ''; + + if (state.jobs.length === 0) { + const row = document.createElement('tr'); + row.innerHTML = `No jobs found in the ${state.selectedNamespace} namespace`; + elements.jobList.appendChild(row); + return; + } + + state.jobs.forEach(job => { + const row = document.createElement('tr'); + row.setAttribute('data-job-id', job.id); + row.innerHTML = ` + ${job.id} + ${job.type} + ${job.status} + + + + + + `; + + elements.jobList.appendChild(row); + }); + + // Add event listeners to buttons + document.querySelectorAll('.btn-view').forEach(btn => { + btn.addEventListener('click', () => viewJob(btn.getAttribute('data-job-id'))); + }); + + document.querySelectorAll('.btn-restart').forEach(btn => { + btn.addEventListener('click', () => restartJob(btn.getAttribute('data-job-id'))); + }); + + document.querySelectorAll('.btn-stop').forEach(btn => { + btn.addEventListener('click', () => stopJob(btn.getAttribute('data-job-id'))); + }); +} + +// View job details +async function viewJob(jobId) { + showLoading(true); + + try { + // Get job status + const statusResponse = await fetch(ENDPOINTS.manageJob, { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ + job_id: jobId, + action: 'status', + namespace: state.selectedNamespace + }) + }); + + if (!statusResponse.ok) { + throw new Error(`Failed to get job status: ${statusResponse.statusText}`); + } + + const jobStatus = await statusResponse.json(); + state.selectedJob = jobStatus; + + // Get job logs + const logsResponse = await fetch(`${ENDPOINTS.jobLogs}/${jobId}?namespace=${state.selectedNamespace}`); + + if (logsResponse.ok) { + const logsData = await logsResponse.json(); + + if (logsData.success) { + state.logs.stdout = logsData.logs.stdout || 'No stdout logs available'; + state.logs.stderr = logsData.logs.stderr || 'No stderr logs available'; + } else { + state.logs.stdout = 'Logs not available'; + state.logs.stderr = 'Logs not available'; + } + } else { + state.logs.stdout = 'Failed to load logs'; + state.logs.stderr = 'Failed to load logs'; + } + + renderJobDetails(); + renderLogs(); + showLoading(false); + + // Highlight the selected job in the table + document.querySelectorAll('#job-list tr').forEach(row => { + row.classList.remove('selected'); + }); + + const selectedRow = document.querySelector(`#job-list tr[data-job-id="${jobId}"]`); + if (selectedRow) { + selectedRow.classList.add('selected'); + } + } catch (error) { + console.error('Error viewing job:', error); + showError(`Failed to view job: ${error.message}`); + showLoading(false); + } +} + +// Restart a job +async function restartJob(jobId) { + if (!confirm(`Are you sure you want to restart job "${jobId}"?`)) { + return; + } + + showLoading(true); + + try { + const response = await fetch(ENDPOINTS.manageJob, { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ + job_id: jobId, + action: 'restart', + namespace: state.selectedNamespace + }) + }); + + if (!response.ok) { + throw new Error(`Failed to restart job: ${response.statusText}`); + } + + const result = await response.json(); + + if (result.success) { + alert(`Job "${jobId}" has been restarted successfully.`); + loadJobs(); + } else { + throw new Error(result.message); + } + + showLoading(false); + } catch (error) { + console.error('Error restarting job:', error); + showError(`Failed to restart job: ${error.message}`); + showLoading(false); + } +} + +// Stop a job +async function stopJob(jobId) { + const purge = confirm(`Do you want to purge job "${jobId}" after stopping?`); + + if (!confirm(`Are you sure you want to stop job "${jobId}"?`)) { + return; + } + + showLoading(true); + + try { + const response = await fetch(ENDPOINTS.manageJob, { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ + job_id: jobId, + action: 'stop', + namespace: state.selectedNamespace, + purge: purge + }) + }); + + if (!response.ok) { + throw new Error(`Failed to stop job: ${response.statusText}`); + } + + const result = await response.json(); + + if (result.success) { + alert(`Job "${jobId}" has been stopped${purge ? ' and purged' : ''} successfully.`); + loadJobs(); + } else { + throw new Error(result.message); + } + + showLoading(false); + } catch (error) { + console.error('Error stopping job:', error); + showError(`Failed to stop job: ${error.message}`); + showLoading(false); + } +} + +// Render job details +function renderJobDetails() { + if (!state.selectedJob) { + elements.jobDetails.innerHTML = '

Select a job to view details

'; + return; + } + + const job = state.selectedJob; + const details = job.details?.job || {}; + const allocation = job.details?.latest_allocation || {}; + + let detailsHtml = ` +

${job.job_id}

+

Status: ${job.status}

+ `; + + if (details.Type) { + detailsHtml += `

Type: ${details.Type}

`; + } + + if (details.Namespace) { + detailsHtml += `

Namespace: ${details.Namespace}

`; + } + + if (details.Datacenters) { + detailsHtml += `

Datacenters: ${details.Datacenters.join(', ')}

`; + } + + if (allocation.ID) { + detailsHtml += ` +

Latest Allocation

+

ID: ${allocation.ID}

+

Status: ${allocation.ClientStatus || 'Unknown'}

+ `; + + if (allocation.ClientDescription) { + detailsHtml += `

Description: ${allocation.ClientDescription}

`; + } + } + + elements.jobDetails.innerHTML = detailsHtml; +} + +// Render logs +function renderLogs() { + elements.logContent.textContent = state.logs[state.logs.currentTab]; +} + +// Switch log tab +function switchLogTab(logType) { + state.logs.currentTab = logType; + + // Update active tab + elements.logTabs.forEach(tab => { + if (tab.getAttribute('data-log-type') === logType) { + tab.classList.add('active'); + } else { + tab.classList.remove('active'); + } + }); + + renderLogs(); +} + +// Handle namespace change +function handleNamespaceChange() { + loadJobs(); +} + +// Show/hide loading indicator +function showLoading(show) { + elements.loading.style.display = show ? 'block' : 'none'; + elements.jobTable.style.display = show ? 'none' : 'table'; +} + +// Show error message +function showError(message) { + elements.errorMessage.textContent = message; + elements.errorMessage.style.display = 'block'; +} + +// Hide error message +function hideError() { + elements.errorMessage.style.display = 'none'; +} + +// Initialize the app when the DOM is loaded +document.addEventListener('DOMContentLoaded', init); \ No newline at end of file diff --git a/static/index.html b/static/index.html new file mode 100644 index 0000000..7521c29 --- /dev/null +++ b/static/index.html @@ -0,0 +1,66 @@ + + + + + + Nomad Job Manager + + + +
+
+

Nomad Job Manager

+
+ + +
+
+ +
+
+

Jobs

+
Loading jobs...
+
+ + + + + + + + + + + + +
Job IDTypeStatusActions
+
+ +
+

Job Details

+
+

Select a job to view details

+
+
+

Logs

+
+ + +
+
Select a job to view logs
+
+
+
+ +
+

Nomad MCP Service - Claude Integration

+
+
+ + + + \ No newline at end of file diff --git a/static/styles.css b/static/styles.css new file mode 100644 index 0000000..f852318 --- /dev/null +++ b/static/styles.css @@ -0,0 +1,244 @@ +/* Base styles */ +:root { + --primary-color: #1976d2; + --secondary-color: #424242; + --success-color: #4caf50; + --danger-color: #f44336; + --warning-color: #ff9800; + --light-gray: #f5f5f5; + --border-color: #e0e0e0; + --text-color: #333; + --text-light: #666; +} + +* { + box-sizing: border-box; + margin: 0; + padding: 0; +} + +body { + font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; + line-height: 1.6; + color: var(--text-color); + background-color: #f9f9f9; +} + +.container { + max-width: 1200px; + margin: 0 auto; + padding: 20px; +} + +/* Header */ +header { + display: flex; + justify-content: space-between; + align-items: center; + margin-bottom: 20px; + padding-bottom: 10px; + border-bottom: 1px solid var(--border-color); +} + +.controls { + display: flex; + gap: 10px; +} + +/* Buttons */ +.btn { + padding: 8px 16px; + border: none; + border-radius: 4px; + cursor: pointer; + font-weight: 500; + transition: background-color 0.2s; +} + +.btn-primary { + background-color: var(--primary-color); + color: white; +} + +.btn-success { + background-color: var(--success-color); + color: white; +} + +.btn-danger { + background-color: var(--danger-color); + color: white; +} + +.btn-warning { + background-color: var(--warning-color); + color: white; +} + +.btn:hover { + opacity: 0.9; +} + +/* Form elements */ +select { + padding: 8px; + border: 1px solid var(--border-color); + border-radius: 4px; + background-color: white; +} + +/* Main content */ +main { + display: grid; + grid-template-columns: 1fr 1fr; + gap: 20px; +} + +/* Job list */ +.job-list-container { + background-color: white; + border-radius: 8px; + box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); + padding: 20px; +} + +.job-table { + width: 100%; + border-collapse: collapse; + margin-top: 10px; +} + +.job-table th, +.job-table td { + padding: 12px; + text-align: left; + border-bottom: 1px solid var(--border-color); +} + +.job-table th { + background-color: var(--light-gray); + font-weight: 600; +} + +.job-table tr:hover { + background-color: var(--light-gray); +} + +.job-actions { + display: flex; + gap: 5px; +} + +/* Job details */ +.job-details-container { + background-color: white; + border-radius: 8px; + box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); + padding: 20px; +} + +.job-details { + margin-bottom: 20px; +} + +.job-details h3 { + margin-top: 15px; + margin-bottom: 5px; + color: var(--secondary-color); +} + +.job-details p { + margin-bottom: 10px; +} + +.job-details .label { + font-weight: 600; + color: var(--text-light); +} + +/* Logs */ +.job-logs { + margin-top: 20px; +} + +.log-tabs { + display: flex; + margin-bottom: 10px; +} + +.log-tab { + padding: 8px 16px; + background-color: var(--light-gray); + border: 1px solid var(--border-color); + border-bottom: none; + cursor: pointer; +} + +.log-tab.active { + background-color: white; + border-bottom: 2px solid var(--primary-color); +} + +.log-content { + background-color: #282c34; + color: #abb2bf; + padding: 15px; + border-radius: 4px; + overflow: auto; + max-height: 300px; + font-family: 'Courier New', Courier, monospace; + white-space: pre-wrap; +} + +/* Status indicators */ +.status { + display: inline-block; + padding: 4px 8px; + border-radius: 4px; + font-size: 0.85em; + font-weight: 500; +} + +.status-running { + background-color: rgba(76, 175, 80, 0.2); + color: #2e7d32; +} + +.status-pending { + background-color: rgba(255, 152, 0, 0.2); + color: #ef6c00; +} + +.status-dead { + background-color: rgba(244, 67, 54, 0.2); + color: #c62828; +} + +/* Loading and error states */ +.loading { + padding: 20px; + text-align: center; + color: var(--text-light); +} + +.error-message { + padding: 10px; + background-color: rgba(244, 67, 54, 0.1); + color: var(--danger-color); + border-radius: 4px; + margin: 10px 0; + display: none; +} + +.select-job-message { + color: var(--text-light); + font-style: italic; +} + +/* Footer */ +footer { + margin-top: 40px; + text-align: center; + color: var(--text-light); + font-size: 0.9em; +} \ No newline at end of file diff --git a/test_direct_nomad.py b/test_direct_nomad.py new file mode 100644 index 0000000..d89813c --- /dev/null +++ b/test_direct_nomad.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python +""" +Test script to directly use the Nomad client library. +""" + +import os +import sys +import uuid +import nomad +from dotenv import load_dotenv + +# Load environment variables from .env file +load_dotenv() + +def get_test_job_spec(job_id): + """Create a simple test job specification.""" + return { + "Job": { + "ID": job_id, + "Name": job_id, + "Type": "service", + "Datacenters": ["jm"], + "Namespace": "development", + "Priority": 50, + "TaskGroups": [ + { + "Name": "app", + "Count": 1, + "Tasks": [ + { + "Name": "nginx", + "Driver": "docker", + "Config": { + "image": "nginx:latest", + "ports": ["http"], + }, + "Resources": { + "CPU": 100, + "MemoryMB": 128 + } + } + ], + "Networks": [ + { + "DynamicPorts": [ + { + "Label": "http", + "Value": 0, + "To": 80 + } + ] + } + ] + } + ] + } + } + +def main(): + print("Testing direct Nomad client...") + + # Check if NOMAD_ADDR is configured + nomad_addr = os.getenv("NOMAD_ADDR") + if not nomad_addr: + print("Error: NOMAD_ADDR is not configured in .env file.") + sys.exit(1) + + print(f"Connecting to Nomad at: {nomad_addr}") + + try: + # Extract host and port from the address + host_with_port = nomad_addr.replace("http://", "").replace("https://", "") + host = host_with_port.split(":")[0] + + # Safely extract port + port_part = host_with_port.split(":")[-1] if ":" in host_with_port else "4646" + port = int(port_part.split('/')[0]) # Remove any path components + + # Initialize the Nomad client + client = nomad.Nomad( + host=host, + port=port, + secure=nomad_addr.startswith("https"), + timeout=10, + namespace="development", # Set namespace explicitly + verify=False + ) + + # Create a unique job ID for testing + job_id = f"test-job-{uuid.uuid4().hex[:8]}" + print(f"Created test job ID: {job_id}") + + # Create job specification + job_spec = get_test_job_spec(job_id) + print("Created job specification with explicit namespace: development") + + # Start the job + print(f"Attempting to start job {job_id}...") + + # Print the job spec for debugging + print(f"Job spec structure: {list(job_spec.keys())}") + print(f"Job keys: {list(job_spec['Job'].keys())}") + + # Register the job + response = client.job.register_job(job_id, job_spec) + + print(f"Job registration response: {response}") + print(f"Job {job_id} started successfully!") + + # Clean up - stop the job + print(f"Stopping job {job_id}...") + stop_response = client.job.deregister_job(job_id, purge=True) + print(f"Job stop response: {stop_response}") + print(f"Job {job_id} stopped and purged successfully!") + + print("\nDirect Nomad client test completed successfully.") + + except Exception as e: + print(f"Error during direct Nomad client test: {str(e)}") + sys.exit(1) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/test_gitea_integration.py b/test_gitea_integration.py new file mode 100644 index 0000000..406293e --- /dev/null +++ b/test_gitea_integration.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python +""" +Test script to verify Gitea integration with Nomad MCP. +This script tests the basic functionality of the Gitea client. +""" + +import os +import sys +from dotenv import load_dotenv +from app.services.gitea_client import GiteaClient + +# Load environment variables from .env file +load_dotenv() + +def main(): + print("Testing Gitea integration with Nomad MCP...") + + # Check if Gitea API URL is configured + gitea_api_url = os.getenv("GITEA_API_URL") + if not gitea_api_url: + print("Error: GITEA_API_URL is not configured in .env file.") + print("Please configure the Gitea API URL and try again.") + sys.exit(1) + + # Check if authentication is configured + gitea_token = os.getenv("GITEA_API_TOKEN") + gitea_username = os.getenv("GITEA_USERNAME") + gitea_password = os.getenv("GITEA_PASSWORD") + + if not gitea_token and not (gitea_username and gitea_password): + print("Warning: No authentication configured for Gitea API.") + print("You might not be able to access protected repositories.") + + # Initialize the Gitea client + gitea_client = GiteaClient() + + # Test listing repositories + print("\nTesting repository listing...") + repositories = gitea_client.list_repositories(limit=5) + + if not repositories: + print("No repositories found or error occurred.") + else: + print(f"Found {len(repositories)} repositories:") + for repo in repositories: + print(f" - {repo.get('full_name')}: {repo.get('html_url')}") + + # Test parsing repository URLs + print("\nTesting repository URL parsing...") + test_urls = [ + f"{gitea_api_url.replace('/api/v1', '')}/username/repo-name", + "http://gitea.internal.example.com/org/project", + "https://gitea.example.com/user/repository", + ] + + for url in test_urls: + try: + owner, repo = gitea_client.parse_repo_url(url) + print(f" {url} -> Owner: {owner}, Repo: {repo}") + except ValueError as e: + print(f" {url} -> Error: {str(e)}") + + # If we have repositories, test getting repository info for the first one + if repositories: + print("\nTesting repository info retrieval...") + first_repo = repositories[0] + repo_url = first_repo.get("html_url") + + repo_info = gitea_client.get_repository_info(repo_url) + if repo_info: + print(f"Repository info for {repo_url}:") + print(f" Name: {repo_info.get('name')}") + print(f" Description: {repo_info.get('description')}") + print(f" Default branch: {repo_info.get('default_branch')}") + print(f" Stars: {repo_info.get('stars_count')}") + print(f" Forks: {repo_info.get('forks_count')}") + + # Test getting branches + branches = gitea_client.get_repository_branches(repo_url) + if branches: + print(f" Branches: {', '.join([b.get('name') for b in branches])}") + else: + print(" No branches found or error occurred.") + else: + print(f"Error retrieving repository info for {repo_url}") + + print("\nGitea integration test completed.") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/test_gitea_repos.py b/test_gitea_repos.py new file mode 100644 index 0000000..238bfee --- /dev/null +++ b/test_gitea_repos.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python +""" +Test script to list all accessible Gitea repositories grouped by owner. +This will show both personal and organization repositories. +""" + +import os +import sys +from collections import defaultdict +from dotenv import load_dotenv +from app.services.gitea_client import GiteaClient + +# Load environment variables from .env file +load_dotenv() + +def main(): + print("Testing Gitea Repository Access for Personal and Organization Accounts...") + + # Check if Gitea API URL is configured + gitea_api_url = os.getenv("GITEA_API_URL") + if not gitea_api_url: + print("Error: GITEA_API_URL is not configured in .env file.") + sys.exit(1) + + # Initialize the Gitea client + gitea_client = GiteaClient() + + # Get all repositories (increase limit if you have many) + repositories = gitea_client.list_repositories(limit=100) + + if not repositories: + print("No repositories found or error occurred.") + sys.exit(1) + + # Group repositories by owner + owners = defaultdict(list) + for repo in repositories: + owner_name = repo.get('owner', {}).get('login', 'unknown') + owners[owner_name].append(repo) + + # Display repositories grouped by owner + print(f"\nFound {len(repositories)} repositories across {len(owners)} owners:") + + for owner, repos in owners.items(): + print(f"\n== {owner} ({len(repos)} repositories) ==") + for repo in repos: + print(f" - {repo.get('name')}: {repo.get('html_url')}") + print(f" Description: {repo.get('description') or 'No description'}") + print(f" Default branch: {repo.get('default_branch')}") + + print("\nTest completed successfully.") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/test_job_registration.py b/test_job_registration.py new file mode 100644 index 0000000..0b6ad6e --- /dev/null +++ b/test_job_registration.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python +""" +Test script to verify job registration with explicit namespace. +""" + +import os +import sys +import uuid +from dotenv import load_dotenv +from app.services.nomad_client import NomadService + +# Load environment variables from .env file +load_dotenv() + +def get_test_job_spec(job_id): + """Create a simple test job specification.""" + return { + "ID": job_id, + "Name": job_id, + "Type": "service", + "Datacenters": ["jm"], + "Namespace": "development", + "Priority": 50, + "TaskGroups": [ + { + "Name": "app", + "Count": 1, + "Tasks": [ + { + "Name": "nginx", + "Driver": "docker", + "Config": { + "image": "nginx:latest", + "ports": ["http"], + }, + "Resources": { + "CPU": 100, + "MemoryMB": 128 + } + } + ], + "Networks": [ + { + "DynamicPorts": [ + { + "Label": "http", + "Value": 0, + "To": 80 + } + ] + } + ] + } + ] + } + +def main(): + print("Testing Nomad job registration...") + + # Check if NOMAD_ADDR is configured + nomad_addr = os.getenv("NOMAD_ADDR") + if not nomad_addr: + print("Error: NOMAD_ADDR is not configured in .env file.") + sys.exit(1) + + print(f"Connecting to Nomad at: {nomad_addr}") + + try: + # Initialize the Nomad service + nomad_service = NomadService() + + # Create a unique job ID for testing + job_id = f"test-job-{uuid.uuid4().hex[:8]}" + print(f"Created test job ID: {job_id}") + + # Create job specification + job_spec = get_test_job_spec(job_id) + print("Created job specification with explicit namespace: development") + + # Start the job + print(f"Attempting to start job {job_id}...") + start_response = nomad_service.start_job(job_spec) + + print(f"Job start response: {start_response}") + print(f"Job {job_id} started successfully!") + + # Clean up - stop the job + print(f"Stopping job {job_id}...") + stop_response = nomad_service.stop_job(job_id, purge=True) + print(f"Job stop response: {stop_response}") + print(f"Job {job_id} stopped and purged successfully!") + + print("\nNomad job registration test completed successfully.") + + except Exception as e: + print(f"Error during job registration test: {str(e)}") + sys.exit(1) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/test_nomad_connection.py b/test_nomad_connection.py new file mode 100644 index 0000000..a07b116 --- /dev/null +++ b/test_nomad_connection.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python +""" +Test script to verify Nomad connection and check for specific jobs. +""" + +import os +import sys +from dotenv import load_dotenv +from pprint import pprint +from app.services.nomad_client import NomadService + +# Load environment variables from .env file +load_dotenv() + +def main(): + print("Testing Nomad connection...") + + # Check if NOMAD_ADDR is configured + nomad_addr = os.getenv("NOMAD_ADDR") + if not nomad_addr: + print("Error: NOMAD_ADDR is not configured in .env file.") + sys.exit(1) + + print(f"Connecting to Nomad at: {nomad_addr}") + + try: + # Initialize the Nomad service + nomad_service = NomadService() + + # List all jobs + print("\nListing all jobs...") + jobs = nomad_service.list_jobs() + print(f"Found {len(jobs)} jobs:") + + # Print each job's ID and status + for job in jobs: + print(f" - {job.get('ID')}: {job.get('Status')}") + + # Look for specific job + job_id = "ms-qc-db-dev" + print(f"\nLooking for job '{job_id}'...") + + job_found = False + for job in jobs: + if job.get('ID') == job_id: + job_found = True + print(f"Found job '{job_id}'!") + print(f" Status: {job.get('Status')}") + print(f" Type: {job.get('Type')}") + print(f" Priority: {job.get('Priority')}") + break + + if not job_found: + print(f"Job '{job_id}' not found in the list of jobs.") + print("Available jobs:") + for job in jobs: + print(f" - {job.get('ID')}") + + print("\nNomad connection test completed successfully.") + + except Exception as e: + print(f"Error connecting to Nomad: {str(e)}") + sys.exit(1) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/test_nomad_namespaces.py b/test_nomad_namespaces.py new file mode 100644 index 0000000..d93988b --- /dev/null +++ b/test_nomad_namespaces.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python +""" +Test script to identify the exact namespace of the ms-qc-db-dev job. +""" + +import os +import sys +from dotenv import load_dotenv +import nomad +from pprint import pprint + +# Load environment variables from .env file +load_dotenv() + +def get_nomad_client(): + """Create a direct nomad client without going through our service layer.""" + nomad_addr = os.getenv("NOMAD_ADDR", "http://localhost:4646").rstrip('/') + host_with_port = nomad_addr.replace("http://", "").replace("https://", "") + host = host_with_port.split(":")[0] + + # Safely extract port + port_part = host_with_port.split(":")[-1] if ":" in host_with_port else "4646" + port = int(port_part.split('/')[0]) + + return nomad.Nomad( + host=host, + port=port, + timeout=10, + namespace="*", # Try with explicit wildcard + verify=False + ) + +def main(): + print(f"Creating Nomad client...") + client = get_nomad_client() + + print(f"\n=== Testing with namespace='*' ===") + try: + # List all jobs with namespace '*' + jobs = client.jobs.get_jobs(namespace="*") + print(f"Found {len(jobs)} jobs using namespace='*'") + + # Look for our specific job and show its namespace + found = False + for job in jobs: + if job.get('ID') == 'ms-qc-db-dev': + found = True + print(f"\nFound job 'ms-qc-db-dev' in namespace: {job.get('Namespace', 'unknown')}") + print(f"Job status: {job.get('Status')}") + print(f"Job type: {job.get('Type')}") + print(f"Job priority: {job.get('Priority')}") + break + + if not found: + print(f"\nJob 'ms-qc-db-dev' not found with namespace='*'") + except Exception as e: + print(f"Error with namespace='*': {str(e)}") + + # Try listing all available namespaces + print(f"\n=== Listing available namespaces ===") + try: + namespaces = client.namespaces.get_namespaces() + print(f"Found {len(namespaces)} namespaces:") + for ns in namespaces: + print(f" - {ns.get('Name')}") + + # Try finding the job in each namespace specifically + print(f"\n=== Searching for job in each namespace ===") + for ns in namespaces: + ns_name = ns.get('Name') + try: + job = client.job.get_job('ms-qc-db-dev', namespace=ns_name) + print(f"Found job in namespace '{ns_name}'!") + print(f" Status: {job.get('Status')}") + print(f" Type: {job.get('Type')}") + break + except Exception: + print(f"Not found in namespace '{ns_name}'") + + except Exception as e: + print(f"Error listing namespaces: {str(e)}") + + print("\nTest completed.") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tests/__pycache__/test_nomad_service.cpython-313-pytest-8.3.4.pyc b/tests/__pycache__/test_nomad_service.cpython-313-pytest-8.3.4.pyc new file mode 100644 index 0000000..ca8cd15 Binary files /dev/null and b/tests/__pycache__/test_nomad_service.cpython-313-pytest-8.3.4.pyc differ diff --git a/tests/test_nomad_service.py b/tests/test_nomad_service.py new file mode 100644 index 0000000..0113ea3 --- /dev/null +++ b/tests/test_nomad_service.py @@ -0,0 +1,193 @@ +import os +import pytest +import time +import uuid +from dotenv import load_dotenv +from app.services.nomad_client import NomadService + +# Load environment variables +load_dotenv() + +# Skip tests if Nomad server is not configured +nomad_addr = os.getenv("NOMAD_ADDR") +if not nomad_addr: + pytest.skip("NOMAD_ADDR not configured", allow_module_level=True) + +# Test job ID prefix - each test will append a unique suffix +TEST_JOB_ID_PREFIX = "test-job-" + +# Simple nginx job specification template for testing +def get_test_job_spec(job_id): + return { + "ID": job_id, + "Name": job_id, + "Type": "service", + "Datacenters": ["jm"], # Adjust to match your Nomad cluster + "Namespace": "development", + "Priority": 50, + "TaskGroups": [ + { + "Name": "app", + "Count": 1, + "Tasks": [ + { + "Name": "nginx", + "Driver": "docker", + "Config": { + "image": "nginx:latest", + "ports": ["http"], + }, + "Resources": { + "CPU": 100, + "MemoryMB": 128 + } + } + ], + "Networks": [ + { + "DynamicPorts": [ + { + "Label": "http", + "Value": 0, + "To": 80 + } + ] + } + ] + } + ] + } + +@pytest.fixture +def nomad_service(): + """Fixture to provide a NomadService instance.""" + return NomadService() + +@pytest.fixture +def test_job_id(): + """Fixture to provide a unique job ID for each test.""" + job_id = f"{TEST_JOB_ID_PREFIX}{uuid.uuid4().hex[:8]}" + yield job_id + + # Cleanup: ensure job is stopped after the test + try: + service = NomadService() + service.stop_job(job_id, purge=True) + print(f"Cleaned up job {job_id}") + except Exception as e: + print(f"Error cleaning up job {job_id}: {str(e)}") + +def test_job_start_and_stop(nomad_service, test_job_id): + """Test starting and stopping a job.""" + # Create job specification + job_spec = get_test_job_spec(test_job_id) + + # Start the job + start_response = nomad_service.start_job(job_spec) + assert start_response["job_id"] == test_job_id + assert start_response["status"] == "started" + assert "eval_id" in start_response + + # Wait longer for job to be registered (increased from 2 to 10 seconds) + time.sleep(10) + + # Verify job exists + job = nomad_service.get_job(test_job_id) + assert job["ID"] == test_job_id + + # Stop the job + stop_response = nomad_service.stop_job(test_job_id) + assert stop_response["job_id"] == test_job_id + assert stop_response["status"] == "stopped" + + # Wait for job to be stopped + time.sleep(5) + + # Verify job is stopped + job = nomad_service.get_job(test_job_id) + assert job["Stop"] is True + +def test_job_with_namespace(nomad_service, test_job_id): + """Test job with explicit namespace.""" + # Create job specification with explicit namespace + job_spec = get_test_job_spec(test_job_id) + job_spec["Namespace"] = "development" + + # Start the job + start_response = nomad_service.start_job(job_spec) + assert start_response["job_id"] == test_job_id + assert start_response["namespace"] == "development" + + # Wait longer for job to be registered (increased from 2 to 10 seconds) + time.sleep(10) + + # Verify job exists in the correct namespace + job = nomad_service.get_job(test_job_id) + assert job["Namespace"] == "development" + + # Clean up + nomad_service.stop_job(test_job_id) + +def test_job_with_job_wrapper(nomad_service, test_job_id): + """Test job specification already wrapped in 'Job' key.""" + # Create job specification with Job wrapper + job_spec = { + "Job": get_test_job_spec(test_job_id) + } + + # Start the job + start_response = nomad_service.start_job(job_spec) + assert start_response["job_id"] == test_job_id + + # Wait longer for job to be registered (increased from 2 to 10 seconds) + time.sleep(10) + + # Verify job exists + job = nomad_service.get_job(test_job_id) + assert job["ID"] == test_job_id + + # Clean up + nomad_service.stop_job(test_job_id) + +def test_list_jobs(nomad_service): + """Test listing jobs.""" + jobs = nomad_service.list_jobs() + assert isinstance(jobs, list) + + # List should contain job details + if jobs: + assert "ID" in jobs[0] + assert "Status" in jobs[0] + +def test_job_lifecycle(nomad_service, test_job_id): + """Test the full job lifecycle - start, check status, get allocations, stop.""" + # Start the job + job_spec = get_test_job_spec(test_job_id) + start_response = nomad_service.start_job(job_spec) + assert start_response["status"] == "started" + + # Wait longer for job to be scheduled (increased from 5 to 15 seconds) + time.sleep(15) + + # Check job status + job = nomad_service.get_job(test_job_id) + assert job["ID"] == test_job_id + + # Get allocations + try: + allocations = nomad_service.get_allocations(test_job_id) + assert isinstance(allocations, list) + except Exception: + # It's possible allocations aren't available yet, which is okay for the test + pass + + # Stop the job + stop_response = nomad_service.stop_job(test_job_id) + assert stop_response["status"] == "stopped" + + # Wait longer for job to be stopped (increased from 2 to 5 seconds) + time.sleep(5) + + # Verify job is stopped + job = nomad_service.get_job(test_job_id) + assert job["Stop"] is True \ No newline at end of file