Compare commits

...

12 Commits

Author SHA1 Message Date
Lucas Armand 4e951f4912 test vastai_sdk test package 2026-04-08 13:38:22 -07:00
Lucas Armand f636012685 add test index 2026-04-08 13:18:46 -07:00
Lucas Armand ddb986d561 use test package 2026-04-08 13:12:27 -07:00
Lucas Armand 99a3319e66 Point to vast-cli 2026-04-08 12:30:20 -07:00
Lucas Armand 83c31e25a9 Add force update detection 2026-03-31 13:46:22 -07:00
Lucas Armand fbe1dca6fa more env_path fixes 2026-03-30 16:28:51 -07:00
Lucas Armand 4c3120dbc5 allow override env_path 2026-03-30 16:25:01 -07:00
Lucas Armand d7d9b915f6 allow break system packages 2026-03-30 16:09:17 -07:00
Lucas Armand 4660b337fb Check for USE_SYSTEM_PYTHON 2026-03-30 14:46:38 -07:00
edgaratvast 7506ecb6b5 directly invoke one stop shop setup executable exported by vastai pip package for deployments (#82) 2026-03-26 10:59:49 -07:00
LucasArmandVast 50633c5003 Update deployments script with retries. (#81) 2026-03-23 14:58:32 -07:00
LucasArmandVast 2e8f18276f Add beta deployments script (#80) 2026-03-23 14:14:06 -07:00
4 changed files with 144 additions and 30 deletions
+1 -1
View File
@@ -9,7 +9,7 @@ This repository contains **example PyWorkers** used by Vast.ais default Serve
- Optionally supports FIFO queueing when the backend cannot process concurrent requests - Optionally supports FIFO queueing when the backend cannot process concurrent requests
- Detects readiness/failure from model logs and runs a benchmark to estimate throughput - Detects readiness/failure from model logs and runs a benchmark to estimate throughput
> Important: The **core PyWorker framework** (Worker, WorkerConfig, HandlerConfig, BenchmarkConfig, LogActionConfig) is provided by the **`vastai` / `vastai-sdk`** Python package (https://github.com/vast-ai/vast-sdk). This repo focuses on *worker implementations and examples*, not the framework internals. > Important: The **core PyWorker framework** (Worker, WorkerConfig, HandlerConfig, BenchmarkConfig, LogActionConfig) is provided by the **`vastai`** Python package (https://github.com/vast-ai/vast-cli). This repo focuses on *worker implementations and examples*, not the framework internals.
## Repository Purpose ## Repository Purpose
+4 -4
View File
@@ -1,16 +1,16 @@
# Where did the PyWorker code go? # Where did the PyWorker code go?
We have moved the PyWorker source code into the `vastai-sdk` Python SDK. We have moved the PyWorker source code into the `vastai` Python package.
You can install it with You can install it with
``` ```
pip install vastai-sdk pip install vastai
``` ```
All of the source code can be found here: All of the source code can be found here:
https://github.com/vast-ai/vast-sdk https://github.com/vast-ai/vast-cli
And can be imported from vastai.serverless.server.lib And can be imported from vastai.serverless.server.lib
Serverless instances automatically run the start_server.sh script, which installs the vastai-sdk. Serverless instances automatically run the start_server.sh script, which installs the vastai package.
This is how the PyWorker source code makes it onto your serverless instances. This is how the PyWorker source code makes it onto your serverless instances.
You provide a worker.py file in your PYWORKER_REPO, and the start_server.sh will You provide a worker.py file in your PYWORKER_REPO, and the start_server.sh will
create and run a PyWorker according to your configuration defined in the file. create and run a PyWorker according to your configuration defined in the file.
+1 -1
View File
@@ -1 +1 @@
vastai-sdk>=0.3.0 vastai>=0.3.0
+138 -24
View File
@@ -2,10 +2,17 @@
set -e -o pipefail set -e -o pipefail
# Check for force update flag
FORCE_UPDATE=false
if [ -f "/.force_update" ]; then
echo "Force update flag detected at /.force_update"
FORCE_UPDATE=true
fi
WORKSPACE_DIR="${WORKSPACE_DIR:-/workspace}" WORKSPACE_DIR="${WORKSPACE_DIR:-/workspace}"
SERVER_DIR="$WORKSPACE_DIR/vast-pyworker" SERVER_DIR="$WORKSPACE_DIR/vast-pyworker"
ENV_PATH="$WORKSPACE_DIR/worker-env" ENV_PATH="${ENV_PATH:-$WORKSPACE_DIR/worker-env}"
DEBUG_LOG="$WORKSPACE_DIR/debug.log" DEBUG_LOG="$WORKSPACE_DIR/debug.log"
PYWORKER_LOG="$WORKSPACE_DIR/pyworker.log" PYWORKER_LOG="$WORKSPACE_DIR/pyworker.log"
@@ -46,30 +53,39 @@ JSON
exit 1 exit 1
} }
function install_vastai_sdk() { function install_vastai() {
# If SDK_BRANCH is set, install vastai-sdk from the vast-sdk repo at that branch/tag/commit. local uv_flags=()
if [ "${USE_SYSTEM_PYTHON:-}" = "true" ]; then
uv_flags+=(--system --break-system-packages)
fi
if [ "$FORCE_UPDATE" = true ]; then
uv_flags+=(--force-reinstall)
echo "Force reinstalling vastai"
fi
# If SDK_BRANCH is set, install vastai from the vast-cli repo at that branch/tag/commit.
if [ -n "${SDK_BRANCH:-}" ]; then if [ -n "${SDK_BRANCH:-}" ]; then
if [ -n "${SDK_VERSION:-}" ]; then if [ -n "${SDK_VERSION:-}" ]; then
echo "WARNING: Both SDK_BRANCH and SDK_VERSION are set; using SDK_BRANCH=${SDK_BRANCH}" echo "WARNING: Both SDK_BRANCH and SDK_VERSION are set; using SDK_BRANCH=${SDK_BRANCH}"
fi fi
echo "Installing vastai-sdk from https://github.com/vast-ai/vast-sdk/ @ ${SDK_BRANCH}" echo "Installing vastai from https://github.com/vast-ai/vast-cli/ @ ${SDK_BRANCH}"
if ! uv pip install "vastai-sdk @ git+https://github.com/vast-ai/vast-sdk.git@${SDK_BRANCH}"; then if ! uv pip install "${uv_flags[@]}" "vastai @ git+https://github.com/vast-ai/vast-cli.git@${SDK_BRANCH}"; then
report_error_and_exit "Failed to install vastai-sdk from vast-ai/vast-sdk@${SDK_BRANCH}" report_error_and_exit "Failed to install vastai from vast-ai/vast-cli@${SDK_BRANCH}"
fi fi
return 0 return 0
fi fi
if [ -n "${SDK_VERSION:-}" ]; then if [ -n "${SDK_VERSION:-}" ]; then
echo "Installing vastai-sdk version ${SDK_VERSION}" echo "Installing vastai version ${SDK_VERSION}"
if ! uv pip install "vastai-sdk==${SDK_VERSION}"; then if ! uv pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ "${uv_flags[@]}" "vastai-sdk-vast==${SDK_VERSION}"; then
report_error_and_exit "Failed to install vastai-sdk==${SDK_VERSION}" report_error_and_exit "Failed to install vastai-vast==${SDK_VERSION}"
fi fi
return 0 return 0
fi fi
echo "Installing default vastai-sdk" echo "Installing default vastai"
if ! uv pip install vastai-sdk; then if ! uv pip install "${uv_flags[@]}" vastai; then
report_error_and_exit "Failed to install vastai-sdk" report_error_and_exit "Failed to install vastai"
fi fi
} }
@@ -112,8 +128,21 @@ if ! grep -q "VAST" /etc/environment; then
fi fi
fi fi
if [ ! -d "$ENV_PATH" ] if [ "${USE_SYSTEM_PYTHON:-}" = "true" ]; then
then echo "Using system Python: $(which python3)"
if ! which uv > /dev/null 2>&1; then
if ! curl -LsSf https://astral.sh/uv/install.sh | sh; then
report_error_and_exit "Failed to install uv package manager"
fi
if [[ -f ~/.local/bin/env ]]; then
if ! source ~/.local/bin/env; then
report_error_and_exit "Failed to source uv environment"
fi
fi
fi
install_vastai
touch ~/.no_auto_tmux
elif [ ! -d "$ENV_PATH" ]; then
echo "setting up venv" echo "setting up venv"
if ! which uv; then if ! which uv; then
if ! curl -LsSf https://astral.sh/uv/install.sh | sh; then if ! curl -LsSf https://astral.sh/uv/install.sh | sh; then
@@ -132,10 +161,27 @@ then
if ! git clone "${PYWORKER_REPO:-https://github.com/vast-ai/pyworker}" "$SERVER_DIR"; then if ! git clone "${PYWORKER_REPO:-https://github.com/vast-ai/pyworker}" "$SERVER_DIR"; then
report_error_and_exit "Failed to clone pyworker repository" report_error_and_exit "Failed to clone pyworker repository"
fi fi
elif [ "$FORCE_UPDATE" = true ]; then
echo "Force updating pyworker repository"
if ! (cd "$SERVER_DIR" && git fetch --all); then
report_error_and_exit "Failed to fetch pyworker repository updates"
fi
fi fi
if [[ -n ${PYWORKER_REF:-} ]]; then if [[ -n ${PYWORKER_REF:-} ]]; then
if ! (cd "$SERVER_DIR" && git checkout "$PYWORKER_REF"); then if [ "$FORCE_UPDATE" = true ]; then
report_error_and_exit "Failed to checkout pyworker reference: $PYWORKER_REF" echo "Force updating to pyworker reference: $PYWORKER_REF"
if ! (cd "$SERVER_DIR" && git checkout "$PYWORKER_REF" && git pull); then
report_error_and_exit "Failed to force update pyworker reference: $PYWORKER_REF"
fi
else
if ! (cd "$SERVER_DIR" && git checkout "$PYWORKER_REF"); then
report_error_and_exit "Failed to checkout pyworker reference: $PYWORKER_REF"
fi
fi
elif [ "$FORCE_UPDATE" = true ]; then
echo "Force updating pyworker to latest"
if ! (cd "$SERVER_DIR" && git pull); then
report_error_and_exit "Failed to pull latest pyworker changes"
fi fi
fi fi
@@ -151,7 +197,7 @@ then
report_error_and_exit "Failed to install Python requirements" report_error_and_exit "Failed to install Python requirements"
fi fi
install_vastai_sdk install_vastai
if ! touch ~/.no_auto_tmux; then if ! touch ~/.no_auto_tmux; then
report_error_and_exit "Failed to create ~/.no_auto_tmux" report_error_and_exit "Failed to create ~/.no_auto_tmux"
@@ -162,11 +208,44 @@ else
report_error_and_exit "Failed to source uv environment" report_error_and_exit "Failed to source uv environment"
fi fi
fi fi
if ! source "$WORKSPACE_DIR/worker-env/bin/activate"; then if ! source "$ENV_PATH/bin/activate"; then
report_error_and_exit "Failed to activate existing virtual environment" report_error_and_exit "Failed to activate existing virtual environment"
fi fi
echo "environment activated" echo "environment activated"
echo "venv: $VIRTUAL_ENV" echo "venv: $VIRTUAL_ENV"
# Handle force update for existing environment
if [ "$FORCE_UPDATE" = true ]; then
echo "Performing force update on existing environment"
if [[ -d $SERVER_DIR ]]; then
echo "Force updating pyworker repository"
if ! (cd "$SERVER_DIR" && git fetch --all); then
report_error_and_exit "Failed to fetch pyworker repository updates"
fi
if [[ -n ${PYWORKER_REF:-} ]]; then
echo "Force updating to pyworker reference: $PYWORKER_REF"
if ! (cd "$SERVER_DIR" && git checkout "$PYWORKER_REF" && git pull); then
report_error_and_exit "Failed to force update pyworker reference: $PYWORKER_REF"
fi
else
echo "Force updating pyworker to latest"
if ! (cd "$SERVER_DIR" && git pull); then
report_error_and_exit "Failed to pull latest pyworker changes"
fi
fi
fi
install_vastai
fi
fi
# Remove force update flag after successful update
if [ "$FORCE_UPDATE" = true ]; then
echo "Removing force update flag"
rm -f "/.force_update"
echo "Force update completed successfully"
fi fi
if [ "$USE_SSL" = true ]; then if [ "$USE_SSL" = true ]; then
@@ -204,16 +283,51 @@ EOF
report_error_and_exit "Failed to generate SSL certificate request" report_error_and_exit "Failed to generate SSL certificate request"
fi fi
if ! curl --header 'Content-Type: application/octet-stream' \ max_retries=5
--data-binary @/etc/instance.csr \ retry_delay=2
-X \ for attempt in $(seq 1 "$max_retries"); do
POST "https://console.vast.ai/api/v0/sign_cert/?instance_id=$CONTAINER_ID" > /etc/instance.crt; then http_code=$(curl -sS -o /etc/instance.crt -w '%{http_code}' \
report_error_and_exit "Failed to sign SSL certificate" --header 'Content-Type: application/octet-stream' \
fi --data-binary @/etc/instance.csr \
-X POST "https://console.vast.ai/api/v0/sign_cert/?instance_id=$CONTAINER_ID")
if [ "$http_code" -ge 200 ] && [ "$http_code" -lt 300 ]; then
break
fi
echo "SSL cert signing attempt $attempt/$max_retries failed (HTTP $http_code)"
if [ "$attempt" -eq "$max_retries" ]; then
report_error_and_exit "Failed to sign SSL certificate after $max_retries attempts (HTTP $http_code)"
fi
sleep "$retry_delay"
retry_delay=$((retry_delay * 2))
done
fi fi
export REPORT_ADDR WORKER_PORT USE_SSL UNSECURED export REPORT_ADDR WORKER_PORT USE_SSL UNSECURED
# ─── SDK Deployment Mode ───────────────────────────────────────────────
if [ "$IS_DEPLOYMENT" = "true" ]; then
echo "=== SDK Deployment Mode ==="
echo "DEPLOYMENT_ID: $DEPLOYMENT_ID"
DEPLOY_DIR="/workspace/deployment"
mkdir -p "$DEPLOY_DIR"
VAST_API_BASE="${VAST_API_BASE:-https://console.vast.ai}"
# Download deployment code, retrying until the blob is available on S3.
# The s3_key exists in the DB as soon as the deployment is created, but the
# actual upload may still be in flight from the client side.
# Install SDK (uses the install_vastai function which supports SDK_BRANCH/SDK_VERSION)
install_vastai
# Run deployment in serve mode
export VAST_DEPLOYMENT_MODE=serve
echo "Starting deployment: python3 $DEPLOY_DIR/deployment.py"
serve-vast-deployment
exit $?
fi
# ─── End SDK Deployment Mode ───────────────────────────────────────────
if ! cd "$SERVER_DIR"; then if ! cd "$SERVER_DIR"; then
report_error_and_exit "Failed to cd into SERVER_DIR: $SERVER_DIR" report_error_and_exit "Failed to cd into SERVER_DIR: $SERVER_DIR"
fi fi