Compare commits

..

12 Commits

Author SHA1 Message Date
Lucas Armand 4e951f4912 test vastai_sdk test package 2026-04-08 13:38:22 -07:00
Lucas Armand f636012685 add test index 2026-04-08 13:18:46 -07:00
Lucas Armand ddb986d561 use test package 2026-04-08 13:12:27 -07:00
Lucas Armand 99a3319e66 Point to vast-cli 2026-04-08 12:30:20 -07:00
Lucas Armand 83c31e25a9 Add force update detection 2026-03-31 13:46:22 -07:00
Lucas Armand fbe1dca6fa more env_path fixes 2026-03-30 16:28:51 -07:00
Lucas Armand 4c3120dbc5 allow override env_path 2026-03-30 16:25:01 -07:00
Lucas Armand d7d9b915f6 allow break system packages 2026-03-30 16:09:17 -07:00
Lucas Armand 4660b337fb Check for USE_SYSTEM_PYTHON 2026-03-30 14:46:38 -07:00
edgaratvast 7506ecb6b5 directly invoke one stop shop setup executable exported by vastai pip package for deployments (#82) 2026-03-26 10:59:49 -07:00
LucasArmandVast 50633c5003 Update deployments script with retries. (#81) 2026-03-23 14:58:32 -07:00
LucasArmandVast 2e8f18276f Add beta deployments script (#80) 2026-03-23 14:14:06 -07:00
4 changed files with 86 additions and 40 deletions
+1 -1
View File
@@ -9,7 +9,7 @@ This repository contains **example PyWorkers** used by Vast.ais default Serve
- Optionally supports FIFO queueing when the backend cannot process concurrent requests - Optionally supports FIFO queueing when the backend cannot process concurrent requests
- Detects readiness/failure from model logs and runs a benchmark to estimate throughput - Detects readiness/failure from model logs and runs a benchmark to estimate throughput
> Important: The **core PyWorker framework** (Worker, WorkerConfig, HandlerConfig, BenchmarkConfig, LogActionConfig) is provided by the **`vastai` / `vastai-sdk`** Python package (https://github.com/vast-ai/vast-sdk). This repo focuses on *worker implementations and examples*, not the framework internals. > Important: The **core PyWorker framework** (Worker, WorkerConfig, HandlerConfig, BenchmarkConfig, LogActionConfig) is provided by the **`vastai`** Python package (https://github.com/vast-ai/vast-cli). This repo focuses on *worker implementations and examples*, not the framework internals.
## Repository Purpose ## Repository Purpose
+4 -4
View File
@@ -1,16 +1,16 @@
# Where did the PyWorker code go? # Where did the PyWorker code go?
We have moved the PyWorker source code into the `vastai-sdk` Python SDK. We have moved the PyWorker source code into the `vastai` Python package.
You can install it with You can install it with
``` ```
pip install vastai-sdk pip install vastai
``` ```
All of the source code can be found here: All of the source code can be found here:
https://github.com/vast-ai/vast-sdk https://github.com/vast-ai/vast-cli
And can be imported from vastai.serverless.server.lib And can be imported from vastai.serverless.server.lib
Serverless instances automatically run the start_server.sh script, which installs the vastai-sdk. Serverless instances automatically run the start_server.sh script, which installs the vastai package.
This is how the PyWorker source code makes it onto your serverless instances. This is how the PyWorker source code makes it onto your serverless instances.
You provide a worker.py file in your PYWORKER_REPO, and the start_server.sh will You provide a worker.py file in your PYWORKER_REPO, and the start_server.sh will
create and run a PyWorker according to your configuration defined in the file. create and run a PyWorker according to your configuration defined in the file.
+1 -1
View File
@@ -1 +1 @@
vastai-sdk>=0.3.0 vastai>=0.3.0
+77 -31
View File
@@ -12,7 +12,7 @@ fi
WORKSPACE_DIR="${WORKSPACE_DIR:-/workspace}" WORKSPACE_DIR="${WORKSPACE_DIR:-/workspace}"
SERVER_DIR="$WORKSPACE_DIR/vast-pyworker" SERVER_DIR="$WORKSPACE_DIR/vast-pyworker"
ENV_PATH="$WORKSPACE_DIR/worker-env" ENV_PATH="${ENV_PATH:-$WORKSPACE_DIR/worker-env}"
DEBUG_LOG="$WORKSPACE_DIR/debug.log" DEBUG_LOG="$WORKSPACE_DIR/debug.log"
PYWORKER_LOG="$WORKSPACE_DIR/pyworker.log" PYWORKER_LOG="$WORKSPACE_DIR/pyworker.log"
@@ -53,36 +53,39 @@ JSON
exit 1 exit 1
} }
function install_vastai_sdk() { function install_vastai() {
local force_flag="" local uv_flags=()
if [ "${USE_SYSTEM_PYTHON:-}" = "true" ]; then
uv_flags+=(--system --break-system-packages)
fi
if [ "$FORCE_UPDATE" = true ]; then if [ "$FORCE_UPDATE" = true ]; then
force_flag="--force-reinstall" uv_flags+=(--force-reinstall)
echo "Force reinstalling vastai-sdk" echo "Force reinstalling vastai"
fi fi
# If SDK_BRANCH is set, install vastai-sdk from the vast-sdk repo at that branch/tag/commit. # If SDK_BRANCH is set, install vastai from the vast-cli repo at that branch/tag/commit.
if [ -n "${SDK_BRANCH:-}" ]; then if [ -n "${SDK_BRANCH:-}" ]; then
if [ -n "${SDK_VERSION:-}" ]; then if [ -n "${SDK_VERSION:-}" ]; then
echo "WARNING: Both SDK_BRANCH and SDK_VERSION are set; using SDK_BRANCH=${SDK_BRANCH}" echo "WARNING: Both SDK_BRANCH and SDK_VERSION are set; using SDK_BRANCH=${SDK_BRANCH}"
fi fi
echo "Installing vastai-sdk from https://github.com/vast-ai/vast-sdk/ @ ${SDK_BRANCH}" echo "Installing vastai from https://github.com/vast-ai/vast-cli/ @ ${SDK_BRANCH}"
if ! uv pip install $force_flag "vastai-sdk @ git+https://github.com/vast-ai/vast-sdk.git@${SDK_BRANCH}"; then if ! uv pip install "${uv_flags[@]}" "vastai @ git+https://github.com/vast-ai/vast-cli.git@${SDK_BRANCH}"; then
report_error_and_exit "Failed to install vastai-sdk from vast-ai/vast-sdk@${SDK_BRANCH}" report_error_and_exit "Failed to install vastai from vast-ai/vast-cli@${SDK_BRANCH}"
fi fi
return 0 return 0
fi fi
if [ -n "${SDK_VERSION:-}" ]; then if [ -n "${SDK_VERSION:-}" ]; then
echo "Installing vastai-sdk version ${SDK_VERSION}" echo "Installing vastai version ${SDK_VERSION}"
if ! uv pip install $force_flag "vastai-sdk==${SDK_VERSION}"; then if ! uv pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ "${uv_flags[@]}" "vastai-sdk-vast==${SDK_VERSION}"; then
report_error_and_exit "Failed to install vastai-sdk==${SDK_VERSION}" report_error_and_exit "Failed to install vastai-vast==${SDK_VERSION}"
fi fi
return 0 return 0
fi fi
echo "Installing default vastai-sdk" echo "Installing default vastai"
if ! uv pip install $force_flag vastai-sdk; then if ! uv pip install "${uv_flags[@]}" vastai; then
report_error_and_exit "Failed to install vastai-sdk" report_error_and_exit "Failed to install vastai"
fi fi
} }
@@ -125,8 +128,21 @@ if ! grep -q "VAST" /etc/environment; then
fi fi
fi fi
if [ ! -d "$ENV_PATH" ] if [ "${USE_SYSTEM_PYTHON:-}" = "true" ]; then
then echo "Using system Python: $(which python3)"
if ! which uv > /dev/null 2>&1; then
if ! curl -LsSf https://astral.sh/uv/install.sh | sh; then
report_error_and_exit "Failed to install uv package manager"
fi
if [[ -f ~/.local/bin/env ]]; then
if ! source ~/.local/bin/env; then
report_error_and_exit "Failed to source uv environment"
fi
fi
fi
install_vastai
touch ~/.no_auto_tmux
elif [ ! -d "$ENV_PATH" ]; then
echo "setting up venv" echo "setting up venv"
if ! which uv; then if ! which uv; then
if ! curl -LsSf https://astral.sh/uv/install.sh | sh; then if ! curl -LsSf https://astral.sh/uv/install.sh | sh; then
@@ -154,7 +170,7 @@ then
if [[ -n ${PYWORKER_REF:-} ]]; then if [[ -n ${PYWORKER_REF:-} ]]; then
if [ "$FORCE_UPDATE" = true ]; then if [ "$FORCE_UPDATE" = true ]; then
echo "Force updating to pyworker reference: $PYWORKER_REF" echo "Force updating to pyworker reference: $PYWORKER_REF"
if ! (cd "$SERVER_DIR" && git fetch --all && git checkout "$PYWORKER_REF" && git pull); then if ! (cd "$SERVER_DIR" && git checkout "$PYWORKER_REF" && git pull); then
report_error_and_exit "Failed to force update pyworker reference: $PYWORKER_REF" report_error_and_exit "Failed to force update pyworker reference: $PYWORKER_REF"
fi fi
else else
@@ -181,7 +197,7 @@ then
report_error_and_exit "Failed to install Python requirements" report_error_and_exit "Failed to install Python requirements"
fi fi
install_vastai_sdk install_vastai
if ! touch ~/.no_auto_tmux; then if ! touch ~/.no_auto_tmux; then
report_error_and_exit "Failed to create ~/.no_auto_tmux" report_error_and_exit "Failed to create ~/.no_auto_tmux"
@@ -192,7 +208,7 @@ else
report_error_and_exit "Failed to source uv environment" report_error_and_exit "Failed to source uv environment"
fi fi
fi fi
if ! source "$WORKSPACE_DIR/worker-env/bin/activate"; then if ! source "$ENV_PATH/bin/activate"; then
report_error_and_exit "Failed to activate existing virtual environment" report_error_and_exit "Failed to activate existing virtual environment"
fi fi
echo "environment activated" echo "environment activated"
@@ -202,7 +218,6 @@ else
if [ "$FORCE_UPDATE" = true ]; then if [ "$FORCE_UPDATE" = true ]; then
echo "Performing force update on existing environment" echo "Performing force update on existing environment"
# Update pyworker repository
if [[ -d $SERVER_DIR ]]; then if [[ -d $SERVER_DIR ]]; then
echo "Force updating pyworker repository" echo "Force updating pyworker repository"
if ! (cd "$SERVER_DIR" && git fetch --all); then if ! (cd "$SERVER_DIR" && git fetch --all); then
@@ -222,19 +237,15 @@ else
fi fi
fi fi
# Force reinstall SDK install_vastai
install_vastai_sdk
fi fi
fi fi
# Remove force update flag after successful update # Remove force update flag after successful update
if [ "$FORCE_UPDATE" = true ]; then if [ "$FORCE_UPDATE" = true ]; then
echo "Removing force update flag" echo "Removing force update flag"
if ! rm -f "/.force_update"; then rm -f "/.force_update"
echo "WARNING: Failed to remove /.force_update, continuing anyway"
else
echo "Force update completed successfully" echo "Force update completed successfully"
fi
fi fi
if [ "$USE_SSL" = true ]; then if [ "$USE_SSL" = true ]; then
@@ -272,16 +283,51 @@ EOF
report_error_and_exit "Failed to generate SSL certificate request" report_error_and_exit "Failed to generate SSL certificate request"
fi fi
if ! curl --header 'Content-Type: application/octet-stream' \ max_retries=5
retry_delay=2
for attempt in $(seq 1 "$max_retries"); do
http_code=$(curl -sS -o /etc/instance.crt -w '%{http_code}' \
--header 'Content-Type: application/octet-stream' \
--data-binary @/etc/instance.csr \ --data-binary @/etc/instance.csr \
-X \ -X POST "https://console.vast.ai/api/v0/sign_cert/?instance_id=$CONTAINER_ID")
POST "https://console.vast.ai/api/v0/sign_cert/?instance_id=$CONTAINER_ID" > /etc/instance.crt; then if [ "$http_code" -ge 200 ] && [ "$http_code" -lt 300 ]; then
report_error_and_exit "Failed to sign SSL certificate" break
fi fi
echo "SSL cert signing attempt $attempt/$max_retries failed (HTTP $http_code)"
if [ "$attempt" -eq "$max_retries" ]; then
report_error_and_exit "Failed to sign SSL certificate after $max_retries attempts (HTTP $http_code)"
fi
sleep "$retry_delay"
retry_delay=$((retry_delay * 2))
done
fi fi
export REPORT_ADDR WORKER_PORT USE_SSL UNSECURED export REPORT_ADDR WORKER_PORT USE_SSL UNSECURED
# ─── SDK Deployment Mode ───────────────────────────────────────────────
if [ "$IS_DEPLOYMENT" = "true" ]; then
echo "=== SDK Deployment Mode ==="
echo "DEPLOYMENT_ID: $DEPLOYMENT_ID"
DEPLOY_DIR="/workspace/deployment"
mkdir -p "$DEPLOY_DIR"
VAST_API_BASE="${VAST_API_BASE:-https://console.vast.ai}"
# Download deployment code, retrying until the blob is available on S3.
# The s3_key exists in the DB as soon as the deployment is created, but the
# actual upload may still be in flight from the client side.
# Install SDK (uses the install_vastai function which supports SDK_BRANCH/SDK_VERSION)
install_vastai
# Run deployment in serve mode
export VAST_DEPLOYMENT_MODE=serve
echo "Starting deployment: python3 $DEPLOY_DIR/deployment.py"
serve-vast-deployment
exit $?
fi
# ─── End SDK Deployment Mode ───────────────────────────────────────────
if ! cd "$SERVER_DIR"; then if ! cd "$SERVER_DIR"; then
report_error_and_exit "Failed to cd into SERVER_DIR: $SERVER_DIR" report_error_and_exit "Failed to cd into SERVER_DIR: $SERVER_DIR"
fi fi