Compare commits

..

4 Commits

Author SHA1 Message Date
Lucas Armand 4e951f4912 test vastai_sdk test package 2026-04-08 13:38:22 -07:00
Lucas Armand f636012685 add test index 2026-04-08 13:18:46 -07:00
Lucas Armand ddb986d561 use test package 2026-04-08 13:12:27 -07:00
Lucas Armand 99a3319e66 Point to vast-cli 2026-04-08 12:30:20 -07:00
7 changed files with 45 additions and 84 deletions
-37
View File
@@ -1,37 +0,0 @@
// .devcontainer/devcontainer.json
// Dev container for the Vast.ai serverless Ollama template.
// Includes Docker-in-Docker so you can build and test images from inside the container.
{
"name": "vast.ai-serverless-ollama",
"image": "mcr.microsoft.com/devcontainers/base:trixie",
"features": {
"ghcr.io/devcontainers/features/python:1": {
"installTools": true,
"version": "3.12"
},
"ghcr.io/devcontainers/features/docker-in-docker:3.0.1": {
"moby": false,
"version": "latest",
"installDockerBuildx": true,
"dockerDashComposeVersion": "v2"
}
},
"runArgs": ["--privileged"],
"containerEnv": {
"DOCKER_BUILDKIT": "1"
},
"postCreateCommand": "python3 -m pip install --user --upgrade pip && python3 -m pip install --user -r requirements.txt pyyaml",
"customizations": {
"vscode": {
"extensions": [
"ms-python.python",
"ms-azuretools.vscode-docker"
],
"settings": {
"python.defaultInterpreterPath": "/usr/bin/python3",
"terminal.integrated.defaultProfile.linux": "bash",
"docker.showStartPage": false
}
}
}
}
+1 -1
View File
@@ -9,7 +9,7 @@ This repository contains **example PyWorkers** used by Vast.ais default Serve
- Optionally supports FIFO queueing when the backend cannot process concurrent requests - Optionally supports FIFO queueing when the backend cannot process concurrent requests
- Detects readiness/failure from model logs and runs a benchmark to estimate throughput - Detects readiness/failure from model logs and runs a benchmark to estimate throughput
> Important: The **core PyWorker framework** (Worker, WorkerConfig, HandlerConfig, BenchmarkConfig, LogActionConfig) is provided by the **`vastai` / `vastai-sdk`** Python package (https://github.com/vast-ai/vast-sdk). This repo focuses on *worker implementations and examples*, not the framework internals. > Important: The **core PyWorker framework** (Worker, WorkerConfig, HandlerConfig, BenchmarkConfig, LogActionConfig) is provided by the **`vastai`** Python package (https://github.com/vast-ai/vast-cli). This repo focuses on *worker implementations and examples*, not the framework internals.
## Repository Purpose ## Repository Purpose
+4 -4
View File
@@ -1,16 +1,16 @@
# Where did the PyWorker code go? # Where did the PyWorker code go?
We have moved the PyWorker source code into the `vastai-sdk` Python SDK. We have moved the PyWorker source code into the `vastai` Python package.
You can install it with You can install it with
``` ```
pip install vastai-sdk pip install vastai
``` ```
All of the source code can be found here: All of the source code can be found here:
https://github.com/vast-ai/vast-sdk https://github.com/vast-ai/vast-cli
And can be imported from vastai.serverless.server.lib And can be imported from vastai.serverless.server.lib
Serverless instances automatically run the start_server.sh script, which installs the vastai-sdk. Serverless instances automatically run the start_server.sh script, which installs the vastai package.
This is how the PyWorker source code makes it onto your serverless instances. This is how the PyWorker source code makes it onto your serverless instances.
You provide a worker.py file in your PYWORKER_REPO, and the start_server.sh will You provide a worker.py file in your PYWORKER_REPO, and the start_server.sh will
create and run a PyWorker according to your configuration defined in the file. create and run a PyWorker according to your configuration defined in the file.
+1 -1
View File
@@ -1 +1 @@
vastai-sdk vastai>=0.3.0
+12 -12
View File
@@ -53,7 +53,7 @@ JSON
exit 1 exit 1
} }
function install_vastai_sdk() { function install_vastai() {
local uv_flags=() local uv_flags=()
if [ "${USE_SYSTEM_PYTHON:-}" = "true" ]; then if [ "${USE_SYSTEM_PYTHON:-}" = "true" ]; then
uv_flags+=(--system --break-system-packages) uv_flags+=(--system --break-system-packages)
@@ -77,8 +77,8 @@ function install_vastai_sdk() {
if [ -n "${SDK_VERSION:-}" ]; then if [ -n "${SDK_VERSION:-}" ]; then
echo "Installing vastai version ${SDK_VERSION}" echo "Installing vastai version ${SDK_VERSION}"
if ! uv pip install "${uv_flags[@]}" "vastai==${SDK_VERSION}"; then if ! uv pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ "${uv_flags[@]}" "vastai-sdk-vast==${SDK_VERSION}"; then
report_error_and_exit "Failed to install vastai==${SDK_VERSION}" report_error_and_exit "Failed to install vastai-vast==${SDK_VERSION}"
fi fi
return 0 return 0
fi fi
@@ -140,7 +140,7 @@ if [ "${USE_SYSTEM_PYTHON:-}" = "true" ]; then
fi fi
fi fi
fi fi
install_vastai_sdk install_vastai
touch ~/.no_auto_tmux touch ~/.no_auto_tmux
elif [ ! -d "$ENV_PATH" ]; then elif [ ! -d "$ENV_PATH" ]; then
echo "setting up venv" echo "setting up venv"
@@ -197,7 +197,7 @@ elif [ ! -d "$ENV_PATH" ]; then
report_error_and_exit "Failed to install Python requirements" report_error_and_exit "Failed to install Python requirements"
fi fi
install_vastai_sdk install_vastai
if ! touch ~/.no_auto_tmux; then if ! touch ~/.no_auto_tmux; then
report_error_and_exit "Failed to create ~/.no_auto_tmux" report_error_and_exit "Failed to create ~/.no_auto_tmux"
@@ -237,7 +237,7 @@ else
fi fi
fi fi
install_vastai_sdk install_vastai
fi fi
fi fi
@@ -318,8 +318,8 @@ if [ "$IS_DEPLOYMENT" = "true" ]; then
# The s3_key exists in the DB as soon as the deployment is created, but the # The s3_key exists in the DB as soon as the deployment is created, but the
# actual upload may still be in flight from the client side. # actual upload may still be in flight from the client side.
# Install SDK (uses the install_vastai_sdk function which supports SDK_BRANCH/SDK_VERSION) # Install SDK (uses the install_vastai function which supports SDK_BRANCH/SDK_VERSION)
install_vastai_sdk install_vastai
# Run deployment in serve mode # Run deployment in serve mode
export VAST_DEPLOYMENT_MODE=serve export VAST_DEPLOYMENT_MODE=serve
echo "Starting deployment: python3 $DEPLOY_DIR/deployment.py" echo "Starting deployment: python3 $DEPLOY_DIR/deployment.py"
@@ -339,19 +339,19 @@ set +e
PY_STATUS=1 PY_STATUS=1
if [ -f "$SERVER_DIR/worker.py" ]; then if [ -f "$SERVER_DIR/worker.py" ]; then
echo "Running worker.py" echo "trying worker.py"
python3 -m "worker" |& tee -a "$PYWORKER_LOG" python3 -m "worker" |& tee -a "$PYWORKER_LOG"
PY_STATUS=${PIPESTATUS[0]} PY_STATUS=${PIPESTATUS[0]}
fi fi
if [ "${PY_STATUS}" -ne 0 ] && [ -f "$SERVER_DIR/workers/$BACKEND/worker.py" ]; then if [ "${PY_STATUS}" -ne 0 ] && [ -f "$SERVER_DIR/workers/$BACKEND/worker.py" ]; then
echo "Running workers.${BACKEND}.worker" echo "trying workers.${BACKEND}.worker"
python3 -m "workers.${BACKEND}.worker" |& tee -a "$PYWORKER_LOG" python3 -m "workers.${BACKEND}.worker" |& tee -a "$PYWORKER_LOG"
PY_STATUS=${PIPESTATUS[0]} PY_STATUS=${PIPESTATUS[0]}
fi fi
if [ "${PY_STATUS}" -ne 0 ] && [ -f "$SERVER_DIR/workers/$BACKEND/server.py" ]; then if [ "${PY_STATUS}" -ne 0 ] && [ -f "$SERVER_DIR/workers/$BACKEND/server.py" ]; then
echo "Running workers.${BACKEND}.server" echo "trying workers.${BACKEND}.server"
python3 -m "workers.${BACKEND}.server" |& tee -a "$PYWORKER_LOG" python3 -m "workers.${BACKEND}.server" |& tee -a "$PYWORKER_LOG"
PY_STATUS=${PIPESTATUS[0]} PY_STATUS=${PIPESTATUS[0]}
fi fi
@@ -365,4 +365,4 @@ if [ "${PY_STATUS}" -ne 0 ]; then
report_error_and_exit "PyWorker exited with status ${PY_STATUS}" report_error_and_exit "PyWorker exited with status ${PY_STATUS}"
fi fi
echo "PyWorker bootstrap complete" echo "launching PyWorker server done"
+26 -28
View File
@@ -1,19 +1,33 @@
import nltk
import random import random
import os import os
import re
import logging
from vastai import Worker, WorkerConfig, HandlerConfig, LogActionConfig, BenchmarkConfig from vastai import Worker, WorkerConfig, HandlerConfig, LogActionConfig, BenchmarkConfig
logging.getLogger().setLevel(logging.WARNING) # Only show warnings and errors # vLLM model configuration
# Ollama model configuration
MODEL_SERVER_URL = 'http://127.0.0.1' MODEL_SERVER_URL = 'http://127.0.0.1'
MODEL_SERVER_PORT = 11434 MODEL_SERVER_PORT = 18000
MODEL_LOG_FILE = '/var/log/onstart.log' MODEL_LOG_FILE = '/var/log/portal/vllm.log'
MODEL_HEALTHCHECK_ENDPOINT = "/" MODEL_HEALTHCHECK_ENDPOINT = "/health"
# vLLM-specific log messages
MODEL_LOAD_LOG_MSG = [
"Application startup complete.",
]
MODEL_ERROR_LOG_MSGS = [
"INFO exited: vllm",
"RuntimeError: Engine",
"Traceback (most recent call last):"
]
MODEL_INFO_LOG_MSGS = [
'"message":"Download'
]
nltk.download("words")
WORD_LIST = nltk.corpus.words.words()
# Ollama-specific log messages
def request_parser(request): def request_parser(request):
data = request data = request
if request.get("input") is not None: if request.get("input") is not None:
@@ -22,24 +36,8 @@ def request_parser(request):
def completions_benchmark_generator() -> dict: def completions_benchmark_generator() -> dict:
# extract words from the python source code of the worker to create a list of words for generating prompts
WORD_LIST = []
# Try to load from perl copyright file first
try:
with open("/usr/share/doc/perl/copyright", 'r') as f:
source_code = f.read()
WORD_LIST = re.findall(r'\b\w+\b', source_code)
except (FileNotFoundError, IOError):
# Fallback to loading from python file
with open(__file__, 'r') as f:
source_code = f.read()
WORD_LIST = re.findall(r'\b\w+\b', source_code)
prompt = " ".join(random.choices(WORD_LIST, k=int(250))) prompt = " ".join(random.choices(WORD_LIST, k=int(250)))
model = os.environ.get("MODEL_NAME") model = os.environ.get("MODEL_NAME")
if not model: if not model:
raise ValueError("MODEL_NAME environment variable not set") raise ValueError("MODEL_NAME environment variable not set")
@@ -79,9 +77,9 @@ worker_config = WorkerConfig(
) )
], ],
log_action_config=LogActionConfig( log_action_config=LogActionConfig(
on_load=["llama_server: model loaded"], on_load=MODEL_LOAD_LOG_MSG,
on_error=["Traceback (most recent call last):","Error:"], on_error=MODEL_ERROR_LOG_MSGS,
#on_info=["load_tensors:","llama_context:","print_info:","llama_model_loader:"] on_info=MODEL_INFO_LOG_MSGS
) )
) )
+1 -1
View File
@@ -35,7 +35,7 @@ def benchmark_generator() -> dict:
benchmark_data = { benchmark_data = {
"inputs": prompt, "inputs": prompt,
"parameters": { "parameters": {
"max_new_tokens": 500, "max_new_tokens": 128,
"temperature": 0.7, "temperature": 0.7,
"return_full_text": False "return_full_text": False
} }