Enhance static directory handling and job deployment configuration
This commit is contained in:
700
NOMAD_JOB_MANAGEMENT_GUIDE.md
Normal file
700
NOMAD_JOB_MANAGEMENT_GUIDE.md
Normal file
@ -0,0 +1,700 @@
|
|||||||
|
# Nomad Job Management Guide
|
||||||
|
|
||||||
|
This guide explains the complete process of creating, deploying, monitoring, and troubleshooting Nomad jobs using the Nomad MCP service. It's designed to be used by both humans and AI assistants to effectively manage containerized applications in a Nomad cluster.
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
- Access to a Nomad cluster
|
||||||
|
- Nomad MCP service installed and running
|
||||||
|
- Proper environment configuration (NOMAD_ADDR, NOMAD_NAMESPACE, etc.)
|
||||||
|
- Python with required packages installed
|
||||||
|
|
||||||
|
## 1. Creating a Nomad Job Specification
|
||||||
|
|
||||||
|
A Nomad job specification defines how your application should run. This can be created in two formats:
|
||||||
|
|
||||||
|
### Option A: Using a .nomad HCL File
|
||||||
|
|
||||||
|
```hcl
|
||||||
|
job "your-job-name" {
|
||||||
|
datacenters = ["dc1"]
|
||||||
|
type = "service"
|
||||||
|
namespace = "development"
|
||||||
|
|
||||||
|
group "app" {
|
||||||
|
count = 1
|
||||||
|
|
||||||
|
network {
|
||||||
|
port "http" {
|
||||||
|
to = 8000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
task "app-task" {
|
||||||
|
driver = "docker"
|
||||||
|
|
||||||
|
config {
|
||||||
|
image = "your-registry/your-image:tag"
|
||||||
|
ports = ["http"]
|
||||||
|
command = "python"
|
||||||
|
args = ["-m", "uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||||
|
|
||||||
|
# Mount volumes if needed
|
||||||
|
mount {
|
||||||
|
type = "bind"
|
||||||
|
source = "local/app-code"
|
||||||
|
target = "/app"
|
||||||
|
readonly = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Pull code from Git repository if needed
|
||||||
|
artifact {
|
||||||
|
source = "git::ssh://git@your-git-server:port/org/repo.git"
|
||||||
|
destination = "local/app-code"
|
||||||
|
options {
|
||||||
|
ref = "main"
|
||||||
|
sshkey = "your-base64-encoded-ssh-key"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
env {
|
||||||
|
# Environment variables
|
||||||
|
PORT = "8000"
|
||||||
|
HOST = "0.0.0.0"
|
||||||
|
LOG_LEVEL = "INFO"
|
||||||
|
PYTHONPATH = "/app"
|
||||||
|
|
||||||
|
# Add any application-specific environment variables
|
||||||
|
STATIC_DIR = "/local/app-code/static"
|
||||||
|
}
|
||||||
|
|
||||||
|
resources {
|
||||||
|
cpu = 200
|
||||||
|
memory = 256
|
||||||
|
}
|
||||||
|
|
||||||
|
service {
|
||||||
|
name = "your-service-name"
|
||||||
|
port = "http"
|
||||||
|
tags = [
|
||||||
|
"traefik.enable=true",
|
||||||
|
"traefik.http.routers.your-service.entryPoints=https",
|
||||||
|
"traefik.http.routers.your-service.rule=Host(`your-service.domain.com`)"
|
||||||
|
]
|
||||||
|
|
||||||
|
check {
|
||||||
|
type = "http"
|
||||||
|
path = "/api/health"
|
||||||
|
interval = "10s"
|
||||||
|
timeout = "2s"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Option B: Using a Python Deployment Script
|
||||||
|
|
||||||
|
```python
|
||||||
|
#!/usr/bin/env python
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
from app.services.nomad_client import NomadService
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# Initialize the Nomad service
|
||||||
|
nomad_service = NomadService()
|
||||||
|
|
||||||
|
# Create job specification
|
||||||
|
job_spec = {
|
||||||
|
"Job": {
|
||||||
|
"ID": "your-job-name",
|
||||||
|
"Name": "your-job-name",
|
||||||
|
"Type": "service",
|
||||||
|
"Datacenters": ["dc1"],
|
||||||
|
"Namespace": "development",
|
||||||
|
"TaskGroups": [
|
||||||
|
{
|
||||||
|
"Name": "app",
|
||||||
|
"Count": 1,
|
||||||
|
"Networks": [
|
||||||
|
{
|
||||||
|
"DynamicPorts": [
|
||||||
|
{
|
||||||
|
"Label": "http",
|
||||||
|
"To": 8000
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"Tasks": [
|
||||||
|
{
|
||||||
|
"Name": "app-task",
|
||||||
|
"Driver": "docker",
|
||||||
|
"Config": {
|
||||||
|
"image": "your-registry/your-image:tag",
|
||||||
|
"ports": ["http"],
|
||||||
|
"command": "python",
|
||||||
|
"args": ["-m", "uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"],
|
||||||
|
"mount": [
|
||||||
|
{
|
||||||
|
"type": "bind",
|
||||||
|
"source": "local/app-code",
|
||||||
|
"target": "/app",
|
||||||
|
"readonly": False
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"Artifacts": [
|
||||||
|
{
|
||||||
|
"GetterSource": "git::ssh://git@your-git-server:port/org/repo.git",
|
||||||
|
"RelativeDest": "local/app-code",
|
||||||
|
"GetterOptions": {
|
||||||
|
"ref": "main",
|
||||||
|
"sshkey": "your-base64-encoded-ssh-key"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"Env": {
|
||||||
|
"PORT": "8000",
|
||||||
|
"HOST": "0.0.0.0",
|
||||||
|
"LOG_LEVEL": "INFO",
|
||||||
|
"PYTHONPATH": "/app",
|
||||||
|
"STATIC_DIR": "/local/app-code/static"
|
||||||
|
},
|
||||||
|
"Resources": {
|
||||||
|
"CPU": 200,
|
||||||
|
"MemoryMB": 256
|
||||||
|
},
|
||||||
|
"Services": [
|
||||||
|
{
|
||||||
|
"Name": "your-service-name",
|
||||||
|
"PortLabel": "http",
|
||||||
|
"Tags": [
|
||||||
|
"traefik.enable=true",
|
||||||
|
"traefik.http.routers.your-service.entryPoints=https",
|
||||||
|
"traefik.http.routers.your-service.rule=Host(`your-service.domain.com`)"
|
||||||
|
],
|
||||||
|
"Checks": [
|
||||||
|
{
|
||||||
|
"Type": "http",
|
||||||
|
"Path": "/api/health",
|
||||||
|
"Interval": 10000000000, # 10 seconds in nanoseconds
|
||||||
|
"Timeout": 2000000000 # 2 seconds in nanoseconds
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Start the job
|
||||||
|
response = nomad_service.start_job(job_spec)
|
||||||
|
|
||||||
|
print(f"Job deployment response: {response}")
|
||||||
|
|
||||||
|
if response.get("status") == "started":
|
||||||
|
print(f"✅ Job deployed successfully!")
|
||||||
|
print(f"Job ID: {response.get('job_id')}")
|
||||||
|
print(f"Evaluation ID: {response.get('eval_id')}")
|
||||||
|
else:
|
||||||
|
print(f"❌ Failed to deploy job.")
|
||||||
|
print(f"Status: {response.get('status')}")
|
||||||
|
print(f"Message: {response.get('message', 'Unknown error')}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
```
|
||||||
|
|
||||||
|
## 2. Deploying the Nomad Job
|
||||||
|
|
||||||
|
### Option A: Using the Nomad CLI
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Deploy using a .nomad file
|
||||||
|
nomad job run your-job-file.nomad
|
||||||
|
|
||||||
|
# Verify the job was submitted
|
||||||
|
nomad job status your-job-name
|
||||||
|
```
|
||||||
|
|
||||||
|
### Option B: Using the Python Deployment Script
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Run the deployment script
|
||||||
|
python deploy_your_job.py
|
||||||
|
```
|
||||||
|
|
||||||
|
### Option C: Using the Nomad MCP API
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Using curl
|
||||||
|
curl -X POST http://localhost:8000/api/claude/create-job \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"job_id": "your-job-name",
|
||||||
|
"name": "Your Job Name",
|
||||||
|
"type": "service",
|
||||||
|
"datacenters": ["dc1"],
|
||||||
|
"namespace": "development",
|
||||||
|
"docker_image": "your-registry/your-image:tag",
|
||||||
|
"count": 1,
|
||||||
|
"cpu": 200,
|
||||||
|
"memory": 256,
|
||||||
|
"ports": [
|
||||||
|
{
|
||||||
|
"Label": "http",
|
||||||
|
"Value": 0,
|
||||||
|
"To": 8000
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"env_vars": {
|
||||||
|
"PORT": "8000",
|
||||||
|
"HOST": "0.0.0.0",
|
||||||
|
"LOG_LEVEL": "INFO",
|
||||||
|
"PYTHONPATH": "/app",
|
||||||
|
"STATIC_DIR": "/local/app-code/static"
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
|
||||||
|
# Using PowerShell
|
||||||
|
Invoke-RestMethod -Uri "http://localhost:8000/api/claude/create-job" -Method POST -Headers @{"Content-Type"="application/json"} -Body '{
|
||||||
|
"job_id": "your-job-name",
|
||||||
|
"name": "Your Job Name",
|
||||||
|
"type": "service",
|
||||||
|
"datacenters": ["dc1"],
|
||||||
|
"namespace": "development",
|
||||||
|
"docker_image": "your-registry/your-image:tag",
|
||||||
|
"count": 1,
|
||||||
|
"cpu": 200,
|
||||||
|
"memory": 256,
|
||||||
|
"ports": [
|
||||||
|
{
|
||||||
|
"Label": "http",
|
||||||
|
"Value": 0,
|
||||||
|
"To": 8000
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"env_vars": {
|
||||||
|
"PORT": "8000",
|
||||||
|
"HOST": "0.0.0.0",
|
||||||
|
"LOG_LEVEL": "INFO",
|
||||||
|
"PYTHONPATH": "/app",
|
||||||
|
"STATIC_DIR": "/local/app-code/static"
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
## 3. Checking Job Status
|
||||||
|
|
||||||
|
After deploying a job, you should check its status to ensure it's running correctly.
|
||||||
|
|
||||||
|
### Option A: Using the Nomad CLI
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Check job status
|
||||||
|
nomad job status your-job-name
|
||||||
|
|
||||||
|
# Check allocations for the job
|
||||||
|
nomad job allocs your-job-name
|
||||||
|
|
||||||
|
# Check the most recent allocation
|
||||||
|
nomad alloc status -job your-job-name
|
||||||
|
```
|
||||||
|
|
||||||
|
### Option B: Using the Nomad MCP API
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Using curl
|
||||||
|
curl -X POST http://localhost:8000/api/claude/jobs \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"job_id": "your-job-name",
|
||||||
|
"action": "status",
|
||||||
|
"namespace": "development"
|
||||||
|
}'
|
||||||
|
|
||||||
|
# Using PowerShell
|
||||||
|
Invoke-RestMethod -Uri "http://localhost:8000/api/claude/jobs" -Method POST -Headers @{"Content-Type"="application/json"} -Body '{
|
||||||
|
"job_id": "your-job-name",
|
||||||
|
"action": "status",
|
||||||
|
"namespace": "development"
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Option C: Using a Python Script
|
||||||
|
|
||||||
|
```python
|
||||||
|
#!/usr/bin/env python
|
||||||
|
from app.services.nomad_client import NomadService
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# Initialize the Nomad service
|
||||||
|
service = NomadService()
|
||||||
|
|
||||||
|
# Get job information
|
||||||
|
job = service.get_job('your-job-name')
|
||||||
|
print(f"Job Status: {job.get('Status', 'Unknown')}")
|
||||||
|
print(f"Job Type: {job.get('Type', 'Unknown')}")
|
||||||
|
print(f"Job Datacenters: {job.get('Datacenters', [])}")
|
||||||
|
|
||||||
|
# Get allocations
|
||||||
|
allocations = service.get_allocations('your-job-name')
|
||||||
|
print(f"\nFound {len(allocations)} allocations")
|
||||||
|
|
||||||
|
if allocations:
|
||||||
|
latest_alloc = allocations[0]
|
||||||
|
print(f"Latest allocation ID: {latest_alloc.get('ID', 'Unknown')}")
|
||||||
|
print(f"Allocation Status: {latest_alloc.get('ClientStatus', 'Unknown')}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
```
|
||||||
|
|
||||||
|
## 4. Checking Job Logs
|
||||||
|
|
||||||
|
Logs are crucial for diagnosing issues with your job. Here's how to access them:
|
||||||
|
|
||||||
|
### Option A: Using the Nomad CLI
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# First, get the allocation ID
|
||||||
|
nomad job allocs your-job-name
|
||||||
|
|
||||||
|
# Then view the logs for a specific allocation
|
||||||
|
nomad alloc logs <allocation-id>
|
||||||
|
|
||||||
|
# View stderr logs
|
||||||
|
nomad alloc logs -stderr <allocation-id>
|
||||||
|
|
||||||
|
# Follow logs in real-time
|
||||||
|
nomad alloc logs -f <allocation-id>
|
||||||
|
```
|
||||||
|
|
||||||
|
### Option B: Using the Nomad MCP API
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Using curl
|
||||||
|
curl -X GET http://localhost:8000/api/claude/job-logs/your-job-name
|
||||||
|
|
||||||
|
# Using PowerShell
|
||||||
|
Invoke-RestMethod -Uri "http://localhost:8000/api/claude/job-logs/your-job-name" -Method GET
|
||||||
|
```
|
||||||
|
|
||||||
|
### Option C: Using a Python Script
|
||||||
|
|
||||||
|
```python
|
||||||
|
#!/usr/bin/env python
|
||||||
|
from app.services.nomad_client import NomadService
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# Initialize the Nomad service
|
||||||
|
service = NomadService()
|
||||||
|
|
||||||
|
# Get allocations for the job
|
||||||
|
allocations = service.get_allocations('your-job-name')
|
||||||
|
|
||||||
|
if allocations:
|
||||||
|
latest_alloc = allocations[0]
|
||||||
|
alloc_id = latest_alloc["ID"]
|
||||||
|
print(f"Latest allocation ID: {alloc_id}")
|
||||||
|
|
||||||
|
# Get logs for the allocation
|
||||||
|
try:
|
||||||
|
# Get stdout logs
|
||||||
|
stdout_logs = service.get_allocation_logs(alloc_id, task="your-task-name", log_type="stdout")
|
||||||
|
print("\nStandard Output Logs:")
|
||||||
|
print(stdout_logs)
|
||||||
|
|
||||||
|
# Get stderr logs
|
||||||
|
stderr_logs = service.get_allocation_logs(alloc_id, task="your-task-name", log_type="stderr")
|
||||||
|
print("\nStandard Error Logs:")
|
||||||
|
print(stderr_logs)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error getting logs: {str(e)}")
|
||||||
|
else:
|
||||||
|
print("No allocations found for your-job-name job")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
```
|
||||||
|
|
||||||
|
## 5. Troubleshooting Common Issues
|
||||||
|
|
||||||
|
### Issue: Job Fails to Start
|
||||||
|
|
||||||
|
1. **Check the job status**:
|
||||||
|
```bash
|
||||||
|
nomad job status your-job-name
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Examine the allocation status**:
|
||||||
|
```bash
|
||||||
|
nomad alloc status -job your-job-name
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Check the logs for errors**:
|
||||||
|
```bash
|
||||||
|
# Get the allocation ID first
|
||||||
|
nomad job allocs your-job-name
|
||||||
|
# Then check the logs
|
||||||
|
nomad alloc logs -stderr <allocation-id>
|
||||||
|
```
|
||||||
|
|
||||||
|
4. **Common errors and solutions**:
|
||||||
|
|
||||||
|
a. **Missing static directory**:
|
||||||
|
```
|
||||||
|
RuntimeError: Directory 'static' does not exist
|
||||||
|
```
|
||||||
|
Solution: Add an environment variable to specify the static directory path:
|
||||||
|
```hcl
|
||||||
|
env {
|
||||||
|
STATIC_DIR = "/local/app-code/static"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
b. **Invalid mount configuration**:
|
||||||
|
```
|
||||||
|
invalid mount config for type 'bind': bind source path does not exist
|
||||||
|
```
|
||||||
|
Solution: Ensure the source path exists or is created by an artifact:
|
||||||
|
```hcl
|
||||||
|
artifact {
|
||||||
|
source = "git::ssh://git@your-git-server:port/org/repo.git"
|
||||||
|
destination = "local/app-code"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
c. **Port already allocated**:
|
||||||
|
```
|
||||||
|
Allocation failed: Failed to place allocation: failed to place alloc: port is already allocated
|
||||||
|
```
|
||||||
|
Solution: Use dynamic ports or choose a different port:
|
||||||
|
```hcl
|
||||||
|
network {
|
||||||
|
port "http" {
|
||||||
|
to = 8000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Issue: Application Errors After Deployment
|
||||||
|
|
||||||
|
1. **Check application logs**:
|
||||||
|
```bash
|
||||||
|
nomad alloc logs <allocation-id>
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Verify environment variables**:
|
||||||
|
```bash
|
||||||
|
nomad alloc status <allocation-id>
|
||||||
|
```
|
||||||
|
Look for the "Environment Variables" section.
|
||||||
|
|
||||||
|
3. **Check resource constraints**:
|
||||||
|
Ensure the job has enough CPU and memory allocated:
|
||||||
|
```hcl
|
||||||
|
resources {
|
||||||
|
cpu = 200 # Increase if needed
|
||||||
|
memory = 256 # Increase if needed
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## 6. Updating a Job
|
||||||
|
|
||||||
|
After fixing issues, you'll need to update the job:
|
||||||
|
|
||||||
|
### Option A: Using the Nomad CLI
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Update the job with the modified specification
|
||||||
|
nomad job run your-updated-job-file.nomad
|
||||||
|
```
|
||||||
|
|
||||||
|
### Option B: Using the Nomad MCP API
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Using PowerShell to restart a job
|
||||||
|
Invoke-RestMethod -Uri "http://localhost:8000/api/claude/jobs" -Method POST -Headers @{"Content-Type"="application/json"} -Body '{
|
||||||
|
"job_id": "your-job-name",
|
||||||
|
"action": "restart",
|
||||||
|
"namespace": "development"
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Option C: Using a Python Script
|
||||||
|
|
||||||
|
```python
|
||||||
|
#!/usr/bin/env python
|
||||||
|
from app.services.nomad_client import NomadService
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# Initialize the Nomad service
|
||||||
|
service = NomadService()
|
||||||
|
|
||||||
|
# Get the current job specification
|
||||||
|
job = service.get_job('your-job-name')
|
||||||
|
|
||||||
|
# Modify the job specification as needed
|
||||||
|
# For example, update environment variables:
|
||||||
|
job["TaskGroups"][0]["Tasks"][0]["Env"]["STATIC_DIR"] = "/local/app-code/static"
|
||||||
|
|
||||||
|
# Update the job
|
||||||
|
response = service.start_job({"Job": job})
|
||||||
|
|
||||||
|
print(f"Job update response: {response}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
```
|
||||||
|
|
||||||
|
## 7. Stopping a Job
|
||||||
|
|
||||||
|
When you're done with a job, you can stop it:
|
||||||
|
|
||||||
|
### Option A: Using the Nomad CLI
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Stop a job
|
||||||
|
nomad job stop your-job-name
|
||||||
|
|
||||||
|
# Stop and purge a job
|
||||||
|
nomad job stop -purge your-job-name
|
||||||
|
```
|
||||||
|
|
||||||
|
### Option B: Using the Nomad MCP API
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Using PowerShell
|
||||||
|
Invoke-RestMethod -Uri "http://localhost:8000/api/claude/jobs" -Method POST -Headers @{"Content-Type"="application/json"} -Body '{
|
||||||
|
"job_id": "your-job-name",
|
||||||
|
"action": "stop",
|
||||||
|
"namespace": "development",
|
||||||
|
"purge": true
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Option C: Using a Python Script
|
||||||
|
|
||||||
|
```python
|
||||||
|
#!/usr/bin/env python
|
||||||
|
from app.services.nomad_client import NomadService
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# Initialize the Nomad service
|
||||||
|
service = NomadService()
|
||||||
|
|
||||||
|
# Stop the job
|
||||||
|
response = service.stop_job('your-job-name', purge=True)
|
||||||
|
|
||||||
|
print(f"Job stop response: {response}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
```
|
||||||
|
|
||||||
|
## 8. Complete Workflow Example
|
||||||
|
|
||||||
|
Here's a complete workflow for deploying, monitoring, troubleshooting, and updating a job:
|
||||||
|
|
||||||
|
```python
|
||||||
|
#!/usr/bin/env python
|
||||||
|
import time
|
||||||
|
from app.services.nomad_client import NomadService
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# Initialize the Nomad service
|
||||||
|
service = NomadService()
|
||||||
|
|
||||||
|
# 1. Create and deploy the job
|
||||||
|
job_spec = {
|
||||||
|
"Job": {
|
||||||
|
"ID": "example-app",
|
||||||
|
"Name": "Example Application",
|
||||||
|
"Type": "service",
|
||||||
|
"Datacenters": ["dc1"],
|
||||||
|
"Namespace": "development",
|
||||||
|
# ... rest of job specification ...
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
deploy_response = service.start_job(job_spec)
|
||||||
|
print(f"Deployment response: {deploy_response}")
|
||||||
|
|
||||||
|
# 2. Wait for the job to be scheduled
|
||||||
|
print("Waiting for job to be scheduled...")
|
||||||
|
time.sleep(5)
|
||||||
|
|
||||||
|
# 3. Check job status
|
||||||
|
job = service.get_job('example-app')
|
||||||
|
print(f"Job Status: {job.get('Status', 'Unknown')}")
|
||||||
|
|
||||||
|
# 4. Get allocations
|
||||||
|
allocations = service.get_allocations('example-app')
|
||||||
|
|
||||||
|
if allocations:
|
||||||
|
latest_alloc = allocations[0]
|
||||||
|
alloc_id = latest_alloc["ID"]
|
||||||
|
print(f"Latest allocation ID: {alloc_id}")
|
||||||
|
print(f"Allocation Status: {latest_alloc.get('ClientStatus', 'Unknown')}")
|
||||||
|
|
||||||
|
# 5. Check logs for errors
|
||||||
|
stderr_logs = service.get_allocation_logs(alloc_id, log_type="stderr")
|
||||||
|
|
||||||
|
# 6. Look for common errors
|
||||||
|
if "Directory 'static' does not exist" in stderr_logs:
|
||||||
|
print("Error detected: Missing static directory")
|
||||||
|
|
||||||
|
# 7. Update the job to fix the issue
|
||||||
|
job["TaskGroups"][0]["Tasks"][0]["Env"]["STATIC_DIR"] = "/local/app-code/static"
|
||||||
|
update_response = service.start_job({"Job": job})
|
||||||
|
print(f"Job update response: {update_response}")
|
||||||
|
|
||||||
|
# 8. Wait for the updated job to be scheduled
|
||||||
|
print("Waiting for updated job to be scheduled...")
|
||||||
|
time.sleep(5)
|
||||||
|
|
||||||
|
# 9. Check the updated job status
|
||||||
|
updated_job = service.get_job('example-app')
|
||||||
|
print(f"Updated Job Status: {updated_job.get('Status', 'Unknown')}")
|
||||||
|
else:
|
||||||
|
print("No allocations found for the job")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
```
|
||||||
|
|
||||||
|
## 9. Best Practices
|
||||||
|
|
||||||
|
1. **Always check logs after deployment**: Logs are your primary tool for diagnosing issues.
|
||||||
|
|
||||||
|
2. **Use environment variables for configuration**: This makes your jobs more flexible and easier to update.
|
||||||
|
|
||||||
|
3. **Implement health checks**: Health checks help Nomad determine if your application is running correctly.
|
||||||
|
|
||||||
|
4. **Set appropriate resource limits**: Allocate enough CPU and memory for your application to run efficiently.
|
||||||
|
|
||||||
|
5. **Use artifacts for code deployment**: Pull code from a Git repository to ensure consistency.
|
||||||
|
|
||||||
|
6. **Implement proper error handling**: Your application should handle errors gracefully and provide meaningful error messages.
|
||||||
|
|
||||||
|
7. **Use namespaces**: Organize your jobs into namespaces based on environment or team.
|
||||||
|
|
||||||
|
8. **Document your job specifications**: Include comments in your job files to explain configuration choices.
|
||||||
|
|
||||||
|
9. **Implement a CI/CD pipeline**: Automate the deployment process to reduce errors and improve efficiency.
|
||||||
|
|
||||||
|
10. **Monitor job performance**: Use Nomad's monitoring capabilities to track resource usage and performance.
|
||||||
|
|
||||||
|
## 10. Conclusion
|
||||||
|
|
||||||
|
Managing Nomad jobs effectively requires understanding the job lifecycle, from creation to deployment, monitoring, troubleshooting, and updating. By following this guide, you can create robust deployment processes that ensure your applications run reliably in a Nomad cluster.
|
||||||
|
|
||||||
|
Remember that the key to successful job management is thorough testing, careful monitoring, and quick response to issues. With the right tools and processes in place, you can efficiently manage even complex applications in a Nomad environment.
|
BIN
__pycache__/run.cpython-313.pyc
Normal file
BIN
__pycache__/run.cpython-313.pyc
Normal file
Binary file not shown.
Binary file not shown.
74
app/main.py
74
app/main.py
@ -92,8 +92,78 @@ async def health_check():
|
|||||||
|
|
||||||
return health_status
|
return health_status
|
||||||
|
|
||||||
# Mount static files
|
# Find the static directory
|
||||||
app.mount("/", StaticFiles(directory="static", html=True), name="static")
|
def find_static_directory():
|
||||||
|
"""Find the static directory by checking multiple possible locations."""
|
||||||
|
logger.info("Starting static directory search...")
|
||||||
|
|
||||||
|
# First check if STATIC_DIR environment variable is set
|
||||||
|
static_dir_env = os.getenv("STATIC_DIR")
|
||||||
|
if static_dir_env:
|
||||||
|
logger.info(f"STATIC_DIR environment variable found: '{static_dir_env}'")
|
||||||
|
if os.path.isdir(static_dir_env):
|
||||||
|
logger.info(f"✅ Confirmed '{static_dir_env}' exists and is a directory")
|
||||||
|
return static_dir_env
|
||||||
|
else:
|
||||||
|
logger.warning(f"❌ STATIC_DIR '{static_dir_env}' does not exist or is not a directory")
|
||||||
|
# List parent directory contents if possible
|
||||||
|
parent_dir = os.path.dirname(static_dir_env)
|
||||||
|
if os.path.exists(parent_dir):
|
||||||
|
logger.info(f"Contents of parent directory '{parent_dir}':")
|
||||||
|
try:
|
||||||
|
for item in os.listdir(parent_dir):
|
||||||
|
item_path = os.path.join(parent_dir, item)
|
||||||
|
item_type = "directory" if os.path.isdir(item_path) else "file"
|
||||||
|
logger.info(f" - {item} ({item_type})")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error listing parent directory: {str(e)}")
|
||||||
|
else:
|
||||||
|
logger.info("STATIC_DIR environment variable not set")
|
||||||
|
|
||||||
|
# Possible locations for the static directory
|
||||||
|
possible_paths = [
|
||||||
|
"static", # Local development
|
||||||
|
"/app/static", # Docker container
|
||||||
|
"/local/nomad_mcp/static", # Nomad with artifact
|
||||||
|
os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "static") # Relative to this file
|
||||||
|
]
|
||||||
|
|
||||||
|
logger.info(f"Checking {len(possible_paths)} possible static directory locations:")
|
||||||
|
|
||||||
|
# Check each path and use the first one that exists
|
||||||
|
for path in possible_paths:
|
||||||
|
logger.info(f"Checking path: '{path}'")
|
||||||
|
if os.path.isdir(path):
|
||||||
|
logger.info(f"✅ Found valid static directory at: '{path}'")
|
||||||
|
return path
|
||||||
|
else:
|
||||||
|
logger.info(f"❌ Path '{path}' does not exist or is not a directory")
|
||||||
|
|
||||||
|
# If no static directory is found, log a warning but don't fail
|
||||||
|
# This allows the API to still function even without the UI
|
||||||
|
logger.warning("No static directory found in any of the checked locations. UI will not be available.")
|
||||||
|
|
||||||
|
# Try to create the static directory if STATIC_DIR is set
|
||||||
|
if static_dir_env:
|
||||||
|
try:
|
||||||
|
logger.info(f"Attempting to create static directory at '{static_dir_env}'")
|
||||||
|
os.makedirs(static_dir_env, exist_ok=True)
|
||||||
|
if os.path.isdir(static_dir_env):
|
||||||
|
logger.info(f"✅ Successfully created static directory at '{static_dir_env}'")
|
||||||
|
return static_dir_env
|
||||||
|
else:
|
||||||
|
logger.error(f"Failed to create static directory at '{static_dir_env}'")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error creating static directory: {str(e)}")
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Mount static files if the directory exists
|
||||||
|
static_dir = find_static_directory()
|
||||||
|
if static_dir:
|
||||||
|
app.mount("/", StaticFiles(directory=static_dir, html=True), name="static")
|
||||||
|
else:
|
||||||
|
logger.warning("Static files not mounted. API endpoints will still function.")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import uvicorn
|
import uvicorn
|
||||||
|
33
check_allocations.py
Normal file
33
check_allocations.py
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
"""
|
||||||
|
Script to check allocations for the nomad-mcp job.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from app.services.nomad_client import NomadService
|
||||||
|
|
||||||
|
def main():
|
||||||
|
service = NomadService()
|
||||||
|
|
||||||
|
# Get allocations for the job
|
||||||
|
allocations = service.get_allocations('nomad-mcp')
|
||||||
|
print(f'Found {len(allocations)} allocations')
|
||||||
|
|
||||||
|
if allocations:
|
||||||
|
latest_alloc = allocations[0]
|
||||||
|
print(f'Latest allocation ID: {latest_alloc["ID"]}')
|
||||||
|
print(f'Status: {latest_alloc.get("ClientStatus", "Unknown")}')
|
||||||
|
|
||||||
|
# Get logs for the allocation
|
||||||
|
try:
|
||||||
|
logs = service.get_allocation_logs(latest_alloc["ID"])
|
||||||
|
print("\nAllocation Logs:")
|
||||||
|
print(logs.get("stdout", "No stdout logs available"))
|
||||||
|
print("\nError Logs:")
|
||||||
|
print(logs.get("stderr", "No stderr logs available"))
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error getting logs: {str(e)}")
|
||||||
|
else:
|
||||||
|
print("No allocations found for nomad-mcp job")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
48
check_job_status.py
Normal file
48
check_job_status.py
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
"""
|
||||||
|
Script to check the status of the Nomad MCP job and its allocations.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from app.services.nomad_client import NomadService
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print("Checking Nomad MCP job status...")
|
||||||
|
|
||||||
|
# Initialize the Nomad service
|
||||||
|
service = NomadService()
|
||||||
|
|
||||||
|
# Get job information
|
||||||
|
job = service.get_job('nomad-mcp')
|
||||||
|
print(f"Job Status: {job.get('Status', 'Unknown')}")
|
||||||
|
print(f"Job Type: {job.get('Type', 'Unknown')}")
|
||||||
|
print(f"Job Datacenters: {job.get('Datacenters', [])}")
|
||||||
|
|
||||||
|
# Get allocations
|
||||||
|
allocations = service.get_allocations('nomad-mcp')
|
||||||
|
print(f"\nFound {len(allocations)} allocations")
|
||||||
|
|
||||||
|
if allocations:
|
||||||
|
latest_alloc = allocations[0]
|
||||||
|
print(f"Latest allocation ID: {latest_alloc.get('ID', 'Unknown')}")
|
||||||
|
print(f"Allocation Status: {latest_alloc.get('ClientStatus', 'Unknown')}")
|
||||||
|
|
||||||
|
# Check for task states
|
||||||
|
task_states = latest_alloc.get('TaskStates', {})
|
||||||
|
if task_states:
|
||||||
|
print("\nTask States:")
|
||||||
|
for task_name, state in task_states.items():
|
||||||
|
print(f" - {task_name}: {state.get('State', 'Unknown')}")
|
||||||
|
|
||||||
|
# Check for events
|
||||||
|
events = state.get('Events', [])
|
||||||
|
if events:
|
||||||
|
print(f" Events:")
|
||||||
|
for event in events[-3:]: # Show last 3 events
|
||||||
|
print(f" - {event.get('Time', 'Unknown')}: {event.get('Type', 'Unknown')} - {event.get('DisplayMessage', 'No message')}")
|
||||||
|
|
||||||
|
# Check health endpoint
|
||||||
|
print("\nService should be available at: https://nomad_mcp.dev.meisheng.group")
|
||||||
|
print("You can check the health endpoint at: https://nomad_mcp.dev.meisheng.group/api/health")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
@ -6,27 +6,36 @@ Script to deploy the Nomad MCP service using our own Nomad client.
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import json
|
import json
|
||||||
|
import subprocess
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from app.services.nomad_client import NomadService
|
from app.services.nomad_client import NomadService
|
||||||
|
|
||||||
# Load environment variables from .env file
|
# Load environment variables from .env file
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
def read_job_spec(file_path):
|
def main():
|
||||||
"""Read the Nomad job specification from a file."""
|
print("Deploying Nomad MCP service using our own Nomad client...")
|
||||||
|
|
||||||
|
# Check if NOMAD_ADDR is configured
|
||||||
|
nomad_addr = os.getenv("NOMAD_ADDR")
|
||||||
|
if not nomad_addr:
|
||||||
|
print("Error: NOMAD_ADDR is not configured in .env file.")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
print(f"Connecting to Nomad at: {nomad_addr}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with open(file_path, 'r') as f:
|
# Initialize the Nomad service
|
||||||
content = f.read()
|
nomad_service = NomadService()
|
||||||
|
|
||||||
# Convert HCL to JSON (simplified approach)
|
# Use the HCL file directly with the Nomad CLI
|
||||||
# In a real scenario, you might want to use a proper HCL parser
|
print("Registering and starting the nomad-mcp job...")
|
||||||
# This is a very basic approach that assumes the job spec is valid
|
|
||||||
job_id = "nomad-mcp"
|
|
||||||
|
|
||||||
# Create a basic job structure
|
# Create a job specification directly
|
||||||
job_spec = {
|
job_spec = {
|
||||||
"ID": job_id,
|
"Job": {
|
||||||
"Name": job_id,
|
"ID": "nomad-mcp",
|
||||||
|
"Name": "nomad-mcp",
|
||||||
"Type": "service",
|
"Type": "service",
|
||||||
"Datacenters": ["jm"],
|
"Datacenters": ["jm"],
|
||||||
"Namespace": "development",
|
"Namespace": "development",
|
||||||
@ -64,11 +73,11 @@ def read_job_spec(file_path):
|
|||||||
},
|
},
|
||||||
"Artifacts": [
|
"Artifacts": [
|
||||||
{
|
{
|
||||||
"source": "git::ssh://git@gitea.service.mesh:2222/Mei_Sheng_Textiles/nomad_mcp.git",
|
"GetterSource": "git::ssh://git@gitea.service.mesh:2222/Mei_Sheng_Textiles/nomad_mcp.git",
|
||||||
"destination": "local/nomad_mcp",
|
"RelativeDest": "local/nomad_mcp",
|
||||||
"options": {
|
"GetterOptions": {
|
||||||
"ref": "main",
|
"ref": "main",
|
||||||
"sshkey": "LS0tLS1CRUdJTiBPUEVOU1NIIFBSSVZBVEUgS0VZLS0tLS0KYjNCbGJuTnphQzFyWlhrdGRqRUFBQUFBQkc1dmJtVUFBQUFFYm05dVpRQUFBQUFBQUFBQkFBQUFNd0FBQUF0emMyZ3RaVwpReU5UVXhPUUFBQUNBY01oYkNPVXhFOHBYQ3d5UEh0ZFR4aThHU0pzNEdTNXZ6ZTR6Tm1ueUYvUUFBQUtCQm5RZi9RWjBICi93QUFBQXR6YzJndFpXUXlOVFV4T1FBQUFDQWNNaGJDT1V4RThwWEN3eVBIdGRUeGk4R1NKczRHUzV2emU0ek5tbnlGL1EKQUFBRURreWwzQlZlek9YUWZUNzZac0NkYTZPNTFnMExsb25EMEd6L2Y4SHh3dzRCd3lGc0k1VEVUeWxjTEREOGUxMVBHTAp3WkltemdaTG0vTjdqTTJhZklYOUFBQUFHR1JsY0d4dmVTQnJaWGtnWm05eUlHNXZiV0ZrWDIxamNBRUNBd1FGCi0tLS0tRU5EIE9QRU5TU0ggUFJJVkFURSBLRVktLS0tLQo="
|
"sshkey": "LS0tLS1CRUdJTiBPUEVOU1NIIFBSSVZBVEUgS0VZLS0tLS0KYjNCbGJuTnphQzFyWlhrdGRqRUFBQUFBQkc1dmJtVUFBQUFFYm05dVpRQUFBQUFBQUFBQkFBQUFNd0FBQUF0emMyZ3RaVwpReU5UVXhPUUFBQUNBY01oYkNPVXhFOHBYQ3d5UEh0ZFR4aThHU0pzNEdTNXZ6ZTR6Tm1ueUYvUUFBQUtCQm5RZi9RWjBICi93QUFBQXR6YzJndFpXUXlOVFV4T1FBQUFDQWNNaGJDT1V4RThwWEN3eVBIdGRUeGk4R1NKczRHUzV2emU0ek5tbnlGL1EKQUFBRURreWwzQlZlek9YUWZUNzZac0NkYTZPNTFnMExsb25EMEd6L2Y4SHh3dzRCd3lGc0k1VEVUeWxjTERJOGUxMVBHTAp3WkltemdaTG0vTjdqTTJhZklYOUFBQUFHR1JsY0d4dmVTQnJaWGtnWm05eUlHNXZiV0ZrWDIxamNBRUNBd1FGCi0tLS0tRU5EIE9QRU5TU0ggUFJJVkFURSBLRVktLS0tLQo="
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -80,7 +89,8 @@ def read_job_spec(file_path):
|
|||||||
"HOST": "0.0.0.0",
|
"HOST": "0.0.0.0",
|
||||||
"LOG_LEVEL": "INFO",
|
"LOG_LEVEL": "INFO",
|
||||||
"RELOAD": "true",
|
"RELOAD": "true",
|
||||||
"PYTHONPATH": "/app"
|
"PYTHONPATH": "/app",
|
||||||
|
"STATIC_DIR": "/local/nomad_mcp/static"
|
||||||
},
|
},
|
||||||
"Resources": {
|
"Resources": {
|
||||||
"CPU": 200,
|
"CPU": 200,
|
||||||
@ -121,33 +131,9 @@ def read_job_spec(file_path):
|
|||||||
"AutoRevert": True
|
"AutoRevert": True
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return job_spec
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error reading job specification: {str(e)}")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
def main():
|
|
||||||
print("Deploying Nomad MCP service using our own Nomad client...")
|
|
||||||
|
|
||||||
# Check if NOMAD_ADDR is configured
|
|
||||||
nomad_addr = os.getenv("NOMAD_ADDR")
|
|
||||||
if not nomad_addr:
|
|
||||||
print("Error: NOMAD_ADDR is not configured in .env file.")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
print(f"Connecting to Nomad at: {nomad_addr}")
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Initialize the Nomad service
|
|
||||||
nomad_service = NomadService()
|
|
||||||
|
|
||||||
# Read the job specification
|
|
||||||
job_spec = read_job_spec("nomad_mcp_job.nomad")
|
|
||||||
print("Job specification loaded successfully.")
|
|
||||||
|
|
||||||
# Start the job
|
# Start the job
|
||||||
print("Registering and starting the nomad-mcp job...")
|
|
||||||
response = nomad_service.start_job(job_spec)
|
response = nomad_service.start_job(job_spec)
|
||||||
|
|
||||||
print("\nJob registration response:")
|
print("\nJob registration response:")
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
job "nomad-mcp" {
|
job "nomad-mcp" {
|
||||||
datacenters = ["jm"]
|
datacenters = ["dc1"]
|
||||||
type = "service"
|
type = "service"
|
||||||
namespace = "development"
|
namespace = "development"
|
||||||
|
|
||||||
@ -16,12 +16,11 @@ job "nomad-mcp" {
|
|||||||
driver = "docker"
|
driver = "docker"
|
||||||
|
|
||||||
config {
|
config {
|
||||||
image = "registry.dev.meisheng.group/nomad_mcp:20250226"
|
image = "python:3.11-slim"
|
||||||
ports = ["http"]
|
ports = ["http"]
|
||||||
command = "python"
|
command = "python"
|
||||||
args = ["-m", "uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
args = ["-m", "uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||||
|
|
||||||
# Mount the local directory containing the code
|
|
||||||
mount {
|
mount {
|
||||||
type = "bind"
|
type = "bind"
|
||||||
source = "local/nomad_mcp"
|
source = "local/nomad_mcp"
|
||||||
@ -30,34 +29,37 @@ job "nomad-mcp" {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# Pull code from Gitea
|
|
||||||
artifact {
|
artifact {
|
||||||
source = "git::ssh://git@gitea.service.mesh:2222/Mei_Sheng_Textiles/nomad_mcp.git"
|
source = "git::https://gitea.dev.meisheng.group/nkohl/nomad_mcp.git"
|
||||||
destination = "local/nomad_mcp"
|
destination = "local/nomad_mcp"
|
||||||
options {
|
|
||||||
ref = "main" # or whichever branch/tag you want to use
|
|
||||||
sshkey = "LS0tLS1CRUdJTiBPUEVOU1NIIFBSSVZBVEUgS0VZLS0tLS0KYjNCbGJuTnphQzFyWlhrdGRqRUFBQUFBQkc1dmJtVUFBQUFFYm05dVpRQUFBQUFBQUFBQkFBQUFNd0FBQUF0emMyZ3RaVwpReU5UVXhPUUFBQUNBY01oYkNPVXhFOHBYQ3d5UEh0ZFR4aThHU0pzNEdTNXZ6ZTR6Tm1ueUYvUUFBQUtCQm5RZi9RWjBICi93QUFBQXR6YzJndFpXUXlOVFV4T1FBQUFDQWNNaGJDT1V4RThwWEN3eVBIdGRUeGk4R1NKczRHUzV2emU0ek5tbnlGL1EKQUFBRURreWwzQlZlek9YUWZUNzZac0NkYTZPNTFnMExsb25EMEd6L2Y4SHh3dzRCd3lGc0k1VEVUeWxjTEREOGUxMVBHTAp3WkltemdaTG0vTjdqTTJhZklYOUFBQUFHR1JsY0d4dmVTQnJaWGtnWm05eUlHNXZiV0ZrWDIxamNBRUNBd1FGCi0tLS0tRU5EIE9QRU5TU0ggUFJJVkFURSBLRVktLS0tLQo="
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
env {
|
env {
|
||||||
# Nomad connection settings
|
|
||||||
NOMAD_ADDR = "http://pjmldk01.ds.meisheng.group:4646"
|
NOMAD_ADDR = "http://pjmldk01.ds.meisheng.group:4646"
|
||||||
NOMAD_NAMESPACE = "development"
|
NOMAD_NAMESPACE = "development"
|
||||||
NOMAD_SKIP_VERIFY = "true"
|
LOG_LEVEL = "DEBUG"
|
||||||
|
|
||||||
# API settings
|
|
||||||
PORT = "8000"
|
|
||||||
HOST = "0.0.0.0"
|
|
||||||
|
|
||||||
# Logging level
|
|
||||||
LOG_LEVEL = "INFO"
|
|
||||||
|
|
||||||
# Enable to make development easier
|
|
||||||
RELOAD = "true"
|
|
||||||
|
|
||||||
# Set PYTHONPATH to include the app directory
|
|
||||||
PYTHONPATH = "/app"
|
PYTHONPATH = "/app"
|
||||||
|
STATIC_DIR = "/local/nomad_mcp/static"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add a template to create the static directory if it doesn't exist
|
||||||
|
template {
|
||||||
|
data = <<EOH
|
||||||
|
#!/bin/bash
|
||||||
|
echo "Ensuring static directory exists at ${STATIC_DIR}"
|
||||||
|
mkdir -p ${STATIC_DIR}
|
||||||
|
chmod -R 755 ${STATIC_DIR}
|
||||||
|
echo "Static directory setup complete"
|
||||||
|
EOH
|
||||||
|
destination = "local/setup_static.sh"
|
||||||
|
perms = "755"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Run the setup script before starting the application
|
||||||
|
lifecycle {
|
||||||
|
hook = "prestart"
|
||||||
|
command = "/bin/bash"
|
||||||
|
args = ["local/setup_static.sh"]
|
||||||
}
|
}
|
||||||
|
|
||||||
resources {
|
resources {
|
||||||
@ -71,8 +73,7 @@ job "nomad-mcp" {
|
|||||||
tags = [
|
tags = [
|
||||||
"traefik.enable=true",
|
"traefik.enable=true",
|
||||||
"traefik.http.routers.nomad-mcp.entryPoints=https",
|
"traefik.http.routers.nomad-mcp.entryPoints=https",
|
||||||
"traefik.http.routers.nomad-mcp.rule=Host(`nomad_mcp.dev.meisheng.group`)",
|
"traefik.http.routers.nomad-mcp.rule=Host(`nomad_mcp.dev.meisheng.group`)"
|
||||||
"traefik.http.routers.nomad-mcp.middlewares=proxyheaders@consulcatalog"
|
|
||||||
]
|
]
|
||||||
|
|
||||||
check {
|
check {
|
||||||
@ -80,11 +81,6 @@ job "nomad-mcp" {
|
|||||||
path = "/api/health"
|
path = "/api/health"
|
||||||
interval = "10s"
|
interval = "10s"
|
||||||
timeout = "2s"
|
timeout = "2s"
|
||||||
|
|
||||||
check_restart {
|
|
||||||
limit = 3
|
|
||||||
grace = "60s"
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user