Compare commits

..

4 Commits

Author SHA1 Message Date
Lucas Armand 186b388f2f Merge branch 'main' into add-hacky-deployments-script 2026-03-23 14:20:45 -07:00
Lucas Armand d1c521f973 retry S3 download 2026-03-23 14:18:52 -07:00
Lucas Armand e1a5cf2b43 Retry until it loads 2026-03-23 14:16:41 -07:00
Lucas Armand 87f968f961 Add hacky deployments script 2026-03-23 12:39:15 -07:00
5 changed files with 105 additions and 184 deletions
-37
View File
@@ -1,37 +0,0 @@
// .devcontainer/devcontainer.json
// Dev container for the Vast.ai serverless Ollama template.
// Includes Docker-in-Docker so you can build and test images from inside the container.
{
"name": "vast.ai-serverless-ollama",
"image": "mcr.microsoft.com/devcontainers/base:trixie",
"features": {
"ghcr.io/devcontainers/features/python:1": {
"installTools": true,
"version": "3.12"
},
"ghcr.io/devcontainers/features/docker-in-docker:3.0.1": {
"moby": false,
"version": "latest",
"installDockerBuildx": true,
"dockerDashComposeVersion": "v2"
}
},
"runArgs": ["--privileged"],
"containerEnv": {
"DOCKER_BUILDKIT": "1"
},
"postCreateCommand": "python3 -m pip install --user --upgrade pip && python3 -m pip install --user -r requirements.txt pyyaml",
"customizations": {
"vscode": {
"extensions": [
"ms-python.python",
"ms-azuretools.vscode-docker"
],
"settings": {
"python.defaultInterpreterPath": "/usr/bin/python3",
"terminal.integrated.defaultProfile.linux": "bash",
"docker.showStartPage": false
}
}
}
}
+1 -1
View File
@@ -1 +1 @@
vastai-sdk vastai-sdk>=0.3.0
+77 -117
View File
@@ -2,17 +2,10 @@
set -e -o pipefail set -e -o pipefail
# Check for force update flag
FORCE_UPDATE=false
if [ -f "/.force_update" ]; then
echo "Force update flag detected at /.force_update"
FORCE_UPDATE=true
fi
WORKSPACE_DIR="${WORKSPACE_DIR:-/workspace}" WORKSPACE_DIR="${WORKSPACE_DIR:-/workspace}"
SERVER_DIR="$WORKSPACE_DIR/vast-pyworker" SERVER_DIR="$WORKSPACE_DIR/vast-pyworker"
ENV_PATH="${ENV_PATH:-$WORKSPACE_DIR/worker-env}" ENV_PATH="$WORKSPACE_DIR/worker-env"
DEBUG_LOG="$WORKSPACE_DIR/debug.log" DEBUG_LOG="$WORKSPACE_DIR/debug.log"
PYWORKER_LOG="$WORKSPACE_DIR/pyworker.log" PYWORKER_LOG="$WORKSPACE_DIR/pyworker.log"
@@ -54,38 +47,29 @@ JSON
} }
function install_vastai_sdk() { function install_vastai_sdk() {
local uv_flags=() # If SDK_BRANCH is set, install vastai-sdk from the vast-sdk repo at that branch/tag/commit.
if [ "${USE_SYSTEM_PYTHON:-}" = "true" ]; then
uv_flags+=(--system --break-system-packages)
fi
if [ "$FORCE_UPDATE" = true ]; then
uv_flags+=(--force-reinstall)
echo "Force reinstalling vastai"
fi
# If SDK_BRANCH is set, install vastai from the vast-cli repo at that branch/tag/commit.
if [ -n "${SDK_BRANCH:-}" ]; then if [ -n "${SDK_BRANCH:-}" ]; then
if [ -n "${SDK_VERSION:-}" ]; then if [ -n "${SDK_VERSION:-}" ]; then
echo "WARNING: Both SDK_BRANCH and SDK_VERSION are set; using SDK_BRANCH=${SDK_BRANCH}" echo "WARNING: Both SDK_BRANCH and SDK_VERSION are set; using SDK_BRANCH=${SDK_BRANCH}"
fi fi
echo "Installing vastai from https://github.com/vast-ai/vast-cli/ @ ${SDK_BRANCH}" echo "Installing vastai-sdk from https://github.com/vast-ai/vast-sdk/ @ ${SDK_BRANCH}"
if ! uv pip install "${uv_flags[@]}" "vastai @ git+https://github.com/vast-ai/vast-cli.git@${SDK_BRANCH}"; then if ! uv pip install "vastai-sdk @ git+https://github.com/vast-ai/vast-sdk.git@${SDK_BRANCH}"; then
report_error_and_exit "Failed to install vastai from vast-ai/vast-cli@${SDK_BRANCH}" report_error_and_exit "Failed to install vastai-sdk from vast-ai/vast-sdk@${SDK_BRANCH}"
fi fi
return 0 return 0
fi fi
if [ -n "${SDK_VERSION:-}" ]; then if [ -n "${SDK_VERSION:-}" ]; then
echo "Installing vastai version ${SDK_VERSION}" echo "Installing vastai-sdk version ${SDK_VERSION}"
if ! uv pip install "${uv_flags[@]}" "vastai==${SDK_VERSION}"; then if ! uv pip install "vastai-sdk==${SDK_VERSION}"; then
report_error_and_exit "Failed to install vastai==${SDK_VERSION}" report_error_and_exit "Failed to install vastai-sdk==${SDK_VERSION}"
fi fi
return 0 return 0
fi fi
echo "Installing default vastai" echo "Installing default vastai-sdk"
if ! uv pip install "${uv_flags[@]}" vastai; then if ! uv pip install vastai-sdk; then
report_error_and_exit "Failed to install vastai" report_error_and_exit "Failed to install vastai-sdk"
fi fi
} }
@@ -128,21 +112,8 @@ if ! grep -q "VAST" /etc/environment; then
fi fi
fi fi
if [ "${USE_SYSTEM_PYTHON:-}" = "true" ]; then if [ ! -d "$ENV_PATH" ]
echo "Using system Python: $(which python3)" then
if ! which uv > /dev/null 2>&1; then
if ! curl -LsSf https://astral.sh/uv/install.sh | sh; then
report_error_and_exit "Failed to install uv package manager"
fi
if [[ -f ~/.local/bin/env ]]; then
if ! source ~/.local/bin/env; then
report_error_and_exit "Failed to source uv environment"
fi
fi
fi
install_vastai_sdk
touch ~/.no_auto_tmux
elif [ ! -d "$ENV_PATH" ]; then
echo "setting up venv" echo "setting up venv"
if ! which uv; then if ! which uv; then
if ! curl -LsSf https://astral.sh/uv/install.sh | sh; then if ! curl -LsSf https://astral.sh/uv/install.sh | sh; then
@@ -161,27 +132,10 @@ elif [ ! -d "$ENV_PATH" ]; then
if ! git clone "${PYWORKER_REPO:-https://github.com/vast-ai/pyworker}" "$SERVER_DIR"; then if ! git clone "${PYWORKER_REPO:-https://github.com/vast-ai/pyworker}" "$SERVER_DIR"; then
report_error_and_exit "Failed to clone pyworker repository" report_error_and_exit "Failed to clone pyworker repository"
fi fi
elif [ "$FORCE_UPDATE" = true ]; then
echo "Force updating pyworker repository"
if ! (cd "$SERVER_DIR" && git fetch --all); then
report_error_and_exit "Failed to fetch pyworker repository updates"
fi
fi fi
if [[ -n ${PYWORKER_REF:-} ]]; then if [[ -n ${PYWORKER_REF:-} ]]; then
if [ "$FORCE_UPDATE" = true ]; then if ! (cd "$SERVER_DIR" && git checkout "$PYWORKER_REF"); then
echo "Force updating to pyworker reference: $PYWORKER_REF" report_error_and_exit "Failed to checkout pyworker reference: $PYWORKER_REF"
if ! (cd "$SERVER_DIR" && git checkout "$PYWORKER_REF" && git pull); then
report_error_and_exit "Failed to force update pyworker reference: $PYWORKER_REF"
fi
else
if ! (cd "$SERVER_DIR" && git checkout "$PYWORKER_REF"); then
report_error_and_exit "Failed to checkout pyworker reference: $PYWORKER_REF"
fi
fi
elif [ "$FORCE_UPDATE" = true ]; then
echo "Force updating pyworker to latest"
if ! (cd "$SERVER_DIR" && git pull); then
report_error_and_exit "Failed to pull latest pyworker changes"
fi fi
fi fi
@@ -208,44 +162,11 @@ else
report_error_and_exit "Failed to source uv environment" report_error_and_exit "Failed to source uv environment"
fi fi
fi fi
if ! source "$ENV_PATH/bin/activate"; then if ! source "$WORKSPACE_DIR/worker-env/bin/activate"; then
report_error_and_exit "Failed to activate existing virtual environment" report_error_and_exit "Failed to activate existing virtual environment"
fi fi
echo "environment activated" echo "environment activated"
echo "venv: $VIRTUAL_ENV" echo "venv: $VIRTUAL_ENV"
# Handle force update for existing environment
if [ "$FORCE_UPDATE" = true ]; then
echo "Performing force update on existing environment"
if [[ -d $SERVER_DIR ]]; then
echo "Force updating pyworker repository"
if ! (cd "$SERVER_DIR" && git fetch --all); then
report_error_and_exit "Failed to fetch pyworker repository updates"
fi
if [[ -n ${PYWORKER_REF:-} ]]; then
echo "Force updating to pyworker reference: $PYWORKER_REF"
if ! (cd "$SERVER_DIR" && git checkout "$PYWORKER_REF" && git pull); then
report_error_and_exit "Failed to force update pyworker reference: $PYWORKER_REF"
fi
else
echo "Force updating pyworker to latest"
if ! (cd "$SERVER_DIR" && git pull); then
report_error_and_exit "Failed to pull latest pyworker changes"
fi
fi
fi
install_vastai_sdk
fi
fi
# Remove force update flag after successful update
if [ "$FORCE_UPDATE" = true ]; then
echo "Removing force update flag"
rm -f "/.force_update"
echo "Force update completed successfully"
fi fi
if [ "$USE_SSL" = true ]; then if [ "$USE_SSL" = true ]; then
@@ -283,23 +204,12 @@ EOF
report_error_and_exit "Failed to generate SSL certificate request" report_error_and_exit "Failed to generate SSL certificate request"
fi fi
max_retries=5 if ! curl --header 'Content-Type: application/octet-stream' \
retry_delay=2 --data-binary @/etc/instance.csr \
for attempt in $(seq 1 "$max_retries"); do -X \
http_code=$(curl -sS -o /etc/instance.crt -w '%{http_code}' \ POST "https://console.vast.ai/api/v0/sign_cert/?instance_id=$CONTAINER_ID" > /etc/instance.crt; then
--header 'Content-Type: application/octet-stream' \ report_error_and_exit "Failed to sign SSL certificate"
--data-binary @/etc/instance.csr \ fi
-X POST "https://console.vast.ai/api/v0/sign_cert/?instance_id=$CONTAINER_ID")
if [ "$http_code" -ge 200 ] && [ "$http_code" -lt 300 ]; then
break
fi
echo "SSL cert signing attempt $attempt/$max_retries failed (HTTP $http_code)"
if [ "$attempt" -eq "$max_retries" ]; then
report_error_and_exit "Failed to sign SSL certificate after $max_retries attempts (HTTP $http_code)"
fi
sleep "$retry_delay"
retry_delay=$((retry_delay * 2))
done
fi fi
export REPORT_ADDR WORKER_PORT USE_SSL UNSECURED export REPORT_ADDR WORKER_PORT USE_SSL UNSECURED
@@ -317,13 +227,63 @@ if [ "$IS_DEPLOYMENT" = "true" ]; then
# Download deployment code, retrying until the blob is available on S3. # Download deployment code, retrying until the blob is available on S3.
# The s3_key exists in the DB as soon as the deployment is created, but the # The s3_key exists in the DB as soon as the deployment is created, but the
# actual upload may still be in flight from the client side. # actual upload may still be in flight from the client side.
echo "Downloading deployment code..."
RETRY=0
while true; do
DOWNLOAD_RESPONSE=$(curl -sS \
-H "Authorization: Bearer $CONTAINER_API_KEY" \
"${VAST_API_BASE}/api/v0/deployment/${DEPLOYMENT_ID}/download_url/")
DOWNLOAD_URL=$(python3 -c "
import sys, json
try:
d = json.load(sys.stdin)
print(d.get('download_url') or '')
except: print('')
" <<< "$DOWNLOAD_RESPONSE")
if [ -z "$DOWNLOAD_URL" ] || [ "$DOWNLOAD_URL" = "None" ]; then
RETRY=$((RETRY + 1))
echo "No download URL yet (attempt $RETRY), retrying in 10s... response: $DOWNLOAD_RESPONSE"
sleep 10
continue
fi
# Got a URL — try the actual S3 download
HTTP_CODE=$(curl -sS -L -o "$DEPLOY_DIR/deployment.tar.gz" -w "%{http_code}" "$DOWNLOAD_URL")
if [ "$HTTP_CODE" = "200" ]; then
break
fi
RETRY=$((RETRY + 1))
echo "S3 download returned HTTP $HTTP_CODE (attempt $RETRY), blob not yet uploaded. Retrying in 10s..."
rm -f "$DEPLOY_DIR/deployment.tar.gz"
sleep 10
done
cd "$DEPLOY_DIR" && tar xzf deployment.tar.gz
echo "Deployment code extracted."
# Source secrets if present
if [ -f "$DEPLOY_DIR/.secrets" ]; then
echo "Sourcing secrets..."
source "$DEPLOY_DIR/.secrets"
fi
# Run on_start.sh to completion if present
if [ -f "$DEPLOY_DIR/on_start.sh" ]; then
echo "Running on_start.sh..."
chmod +x "$DEPLOY_DIR/on_start.sh"
bash "$DEPLOY_DIR/on_start.sh"
echo "on_start.sh completed."
fi
# Install SDK (uses the install_vastai_sdk function which supports SDK_BRANCH/SDK_VERSION) # Install SDK (uses the install_vastai_sdk function which supports SDK_BRANCH/SDK_VERSION)
install_vastai_sdk install_vastai_sdk
# Run deployment in serve mode # Run deployment in serve mode
export VAST_DEPLOYMENT_MODE=serve export VAST_DEPLOYMENT_MODE=serve
echo "Starting deployment: python3 $DEPLOY_DIR/deployment.py" echo "Starting deployment: python3 $DEPLOY_DIR/deployment.py"
serve-vast-deployment python3 "$DEPLOY_DIR/deployment.py"
exit $? exit $?
fi fi
# ─── End SDK Deployment Mode ─────────────────────────────────────────── # ─── End SDK Deployment Mode ───────────────────────────────────────────
@@ -339,19 +299,19 @@ set +e
PY_STATUS=1 PY_STATUS=1
if [ -f "$SERVER_DIR/worker.py" ]; then if [ -f "$SERVER_DIR/worker.py" ]; then
echo "Running worker.py" echo "trying worker.py"
python3 -m "worker" |& tee -a "$PYWORKER_LOG" python3 -m "worker" |& tee -a "$PYWORKER_LOG"
PY_STATUS=${PIPESTATUS[0]} PY_STATUS=${PIPESTATUS[0]}
fi fi
if [ "${PY_STATUS}" -ne 0 ] && [ -f "$SERVER_DIR/workers/$BACKEND/worker.py" ]; then if [ "${PY_STATUS}" -ne 0 ] && [ -f "$SERVER_DIR/workers/$BACKEND/worker.py" ]; then
echo "Running workers.${BACKEND}.worker" echo "trying workers.${BACKEND}.worker"
python3 -m "workers.${BACKEND}.worker" |& tee -a "$PYWORKER_LOG" python3 -m "workers.${BACKEND}.worker" |& tee -a "$PYWORKER_LOG"
PY_STATUS=${PIPESTATUS[0]} PY_STATUS=${PIPESTATUS[0]}
fi fi
if [ "${PY_STATUS}" -ne 0 ] && [ -f "$SERVER_DIR/workers/$BACKEND/server.py" ]; then if [ "${PY_STATUS}" -ne 0 ] && [ -f "$SERVER_DIR/workers/$BACKEND/server.py" ]; then
echo "Running workers.${BACKEND}.server" echo "trying workers.${BACKEND}.server"
python3 -m "workers.${BACKEND}.server" |& tee -a "$PYWORKER_LOG" python3 -m "workers.${BACKEND}.server" |& tee -a "$PYWORKER_LOG"
PY_STATUS=${PIPESTATUS[0]} PY_STATUS=${PIPESTATUS[0]}
fi fi
@@ -365,4 +325,4 @@ if [ "${PY_STATUS}" -ne 0 ]; then
report_error_and_exit "PyWorker exited with status ${PY_STATUS}" report_error_and_exit "PyWorker exited with status ${PY_STATUS}"
fi fi
echo "PyWorker bootstrap complete" echo "launching PyWorker server done"
+26 -28
View File
@@ -1,19 +1,33 @@
import nltk
import random import random
import os import os
import re
import logging
from vastai import Worker, WorkerConfig, HandlerConfig, LogActionConfig, BenchmarkConfig from vastai import Worker, WorkerConfig, HandlerConfig, LogActionConfig, BenchmarkConfig
logging.getLogger().setLevel(logging.WARNING) # Only show warnings and errors # vLLM model configuration
# Ollama model configuration
MODEL_SERVER_URL = 'http://127.0.0.1' MODEL_SERVER_URL = 'http://127.0.0.1'
MODEL_SERVER_PORT = 11434 MODEL_SERVER_PORT = 18000
MODEL_LOG_FILE = '/var/log/onstart.log' MODEL_LOG_FILE = '/var/log/portal/vllm.log'
MODEL_HEALTHCHECK_ENDPOINT = "/" MODEL_HEALTHCHECK_ENDPOINT = "/health"
# vLLM-specific log messages
MODEL_LOAD_LOG_MSG = [
"Application startup complete.",
]
MODEL_ERROR_LOG_MSGS = [
"INFO exited: vllm",
"RuntimeError: Engine",
"Traceback (most recent call last):"
]
MODEL_INFO_LOG_MSGS = [
'"message":"Download'
]
nltk.download("words")
WORD_LIST = nltk.corpus.words.words()
# Ollama-specific log messages
def request_parser(request): def request_parser(request):
data = request data = request
if request.get("input") is not None: if request.get("input") is not None:
@@ -22,24 +36,8 @@ def request_parser(request):
def completions_benchmark_generator() -> dict: def completions_benchmark_generator() -> dict:
# extract words from the python source code of the worker to create a list of words for generating prompts
WORD_LIST = []
# Try to load from perl copyright file first
try:
with open("/usr/share/doc/perl/copyright", 'r') as f:
source_code = f.read()
WORD_LIST = re.findall(r'\b\w+\b', source_code)
except (FileNotFoundError, IOError):
# Fallback to loading from python file
with open(__file__, 'r') as f:
source_code = f.read()
WORD_LIST = re.findall(r'\b\w+\b', source_code)
prompt = " ".join(random.choices(WORD_LIST, k=int(250))) prompt = " ".join(random.choices(WORD_LIST, k=int(250)))
model = os.environ.get("MODEL_NAME") model = os.environ.get("MODEL_NAME")
if not model: if not model:
raise ValueError("MODEL_NAME environment variable not set") raise ValueError("MODEL_NAME environment variable not set")
@@ -79,9 +77,9 @@ worker_config = WorkerConfig(
) )
], ],
log_action_config=LogActionConfig( log_action_config=LogActionConfig(
on_load=["llama_server: model loaded"], on_load=MODEL_LOAD_LOG_MSG,
on_error=["Traceback (most recent call last):","Error:"], on_error=MODEL_ERROR_LOG_MSGS,
#on_info=["load_tensors:","llama_context:","print_info:","llama_model_loader:"] on_info=MODEL_INFO_LOG_MSGS
) )
) )
+1 -1
View File
@@ -35,7 +35,7 @@ def benchmark_generator() -> dict:
benchmark_data = { benchmark_data = {
"inputs": prompt, "inputs": prompt,
"parameters": { "parameters": {
"max_new_tokens": 500, "max_new_tokens": 128,
"temperature": 0.7, "temperature": 0.7,
"return_full_text": False "return_full_text": False
} }