Compare commits
19 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| b02ade1df5 | |||
| 0b6f381dd7 | |||
| 74f8b6a1ef | |||
| fa2bf082c2 | |||
| 6a57ff8e0a | |||
| 375633cb18 | |||
| ccd29ed8b6 | |||
| 2b30c69933 | |||
| 4d99c12820 | |||
| 6060f8ce0c | |||
| 067fa936fb | |||
| 405a8f1c0d | |||
| 12f4f23d39 | |||
| e2a771bb5a | |||
| 0cd64adfc4 | |||
| 6f795b8fb8 | |||
| 4bcc508473 | |||
| 74d7330800 | |||
| 2ce0450809 |
+11
-2
@@ -1,2 +1,11 @@
|
|||||||
vastai-sdk>=0.3.0
|
aiohttp[speedups]==3.10.1
|
||||||
nltk==3.9.4
|
anyio~=4.4
|
||||||
|
lib~=4.0
|
||||||
|
nltk~=3.9
|
||||||
|
psutil~=6.0
|
||||||
|
pycryptodome~=3.20
|
||||||
|
Requests~=2.32
|
||||||
|
transformers~=4.52
|
||||||
|
utils==1.0.*
|
||||||
|
hf_transfer>=0.1.9
|
||||||
|
git+https://github.com/vast-ai/vast-sdk.git@session
|
||||||
+18
-162
@@ -2,17 +2,10 @@
|
|||||||
|
|
||||||
set -e -o pipefail
|
set -e -o pipefail
|
||||||
|
|
||||||
# Check for force update flag
|
|
||||||
FORCE_UPDATE=false
|
|
||||||
if [ -f "/.force_update" ]; then
|
|
||||||
echo "Force update flag detected at /.force_update"
|
|
||||||
FORCE_UPDATE=true
|
|
||||||
fi
|
|
||||||
|
|
||||||
WORKSPACE_DIR="${WORKSPACE_DIR:-/workspace}"
|
WORKSPACE_DIR="${WORKSPACE_DIR:-/workspace}"
|
||||||
|
|
||||||
SERVER_DIR="$WORKSPACE_DIR/vast-pyworker"
|
SERVER_DIR="$WORKSPACE_DIR/vast-pyworker"
|
||||||
ENV_PATH="${ENV_PATH:-$WORKSPACE_DIR/worker-env}"
|
ENV_PATH="$WORKSPACE_DIR/worker-env"
|
||||||
DEBUG_LOG="$WORKSPACE_DIR/debug.log"
|
DEBUG_LOG="$WORKSPACE_DIR/debug.log"
|
||||||
PYWORKER_LOG="$WORKSPACE_DIR/pyworker.log"
|
PYWORKER_LOG="$WORKSPACE_DIR/pyworker.log"
|
||||||
|
|
||||||
@@ -53,42 +46,6 @@ JSON
|
|||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
|
|
||||||
function install_vastai_sdk() {
|
|
||||||
local uv_flags=()
|
|
||||||
if [ "${USE_SYSTEM_PYTHON:-}" = "true" ]; then
|
|
||||||
uv_flags+=(--system --break-system-packages)
|
|
||||||
fi
|
|
||||||
if [ "$FORCE_UPDATE" = true ]; then
|
|
||||||
uv_flags+=(--force-reinstall)
|
|
||||||
echo "Force reinstalling vastai"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# If SDK_BRANCH is set, install vastai from the vast-cli repo at that branch/tag/commit.
|
|
||||||
if [ -n "${SDK_BRANCH:-}" ]; then
|
|
||||||
if [ -n "${SDK_VERSION:-}" ]; then
|
|
||||||
echo "WARNING: Both SDK_BRANCH and SDK_VERSION are set; using SDK_BRANCH=${SDK_BRANCH}"
|
|
||||||
fi
|
|
||||||
echo "Installing vastai from https://github.com/vast-ai/vast-cli/ @ ${SDK_BRANCH}"
|
|
||||||
if ! uv pip install "${uv_flags[@]}" "vastai @ git+https://github.com/vast-ai/vast-cli.git@${SDK_BRANCH}"; then
|
|
||||||
report_error_and_exit "Failed to install vastai from vast-ai/vast-cli@${SDK_BRANCH}"
|
|
||||||
fi
|
|
||||||
return 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ -n "${SDK_VERSION:-}" ]; then
|
|
||||||
echo "Installing vastai version ${SDK_VERSION}"
|
|
||||||
if ! uv pip install "${uv_flags[@]}" "vastai==${SDK_VERSION}"; then
|
|
||||||
report_error_and_exit "Failed to install vastai==${SDK_VERSION}"
|
|
||||||
fi
|
|
||||||
return 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Installing default vastai"
|
|
||||||
if ! uv pip install "${uv_flags[@]}" vastai; then
|
|
||||||
report_error_and_exit "Failed to install vastai"
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
[ -n "$BACKEND" ] && [ -z "$HF_TOKEN" ] && report_error_and_exit "HF_TOKEN must be set when BACKEND is set!"
|
[ -n "$BACKEND" ] && [ -z "$HF_TOKEN" ] && report_error_and_exit "HF_TOKEN must be set when BACKEND is set!"
|
||||||
[ -z "$CONTAINER_ID" ] && report_error_and_exit "CONTAINER_ID must be set!"
|
[ -z "$CONTAINER_ID" ] && report_error_and_exit "CONTAINER_ID must be set!"
|
||||||
[ "$BACKEND" = "comfyui" ] && [ -z "$COMFY_MODEL" ] && report_error_and_exit "For comfyui backends, COMFY_MODEL must be set!"
|
[ "$BACKEND" = "comfyui" ] && [ -z "$COMFY_MODEL" ] && report_error_and_exit "For comfyui backends, COMFY_MODEL must be set!"
|
||||||
@@ -106,8 +63,7 @@ echo_var DEBUG_LOG
|
|||||||
echo_var PYWORKER_LOG
|
echo_var PYWORKER_LOG
|
||||||
echo_var MODEL_LOG
|
echo_var MODEL_LOG
|
||||||
|
|
||||||
ROTATE_MODEL_LOG="${ROTATE_MODEL_LOG:-false}"
|
if [ -e "$MODEL_LOG" ]; then
|
||||||
if [ "$ROTATE_MODEL_LOG" = "true" ] && [ -e "$MODEL_LOG" ]; then
|
|
||||||
echo "Rotating model log at $MODEL_LOG to $MODEL_LOG.old"
|
echo "Rotating model log at $MODEL_LOG to $MODEL_LOG.old"
|
||||||
if ! cat "$MODEL_LOG" >> "$MODEL_LOG.old"; then
|
if ! cat "$MODEL_LOG" >> "$MODEL_LOG.old"; then
|
||||||
report_error_and_exit "Failed to rotate model log"
|
report_error_and_exit "Failed to rotate model log"
|
||||||
@@ -128,21 +84,8 @@ if ! grep -q "VAST" /etc/environment; then
|
|||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ "${USE_SYSTEM_PYTHON:-}" = "true" ]; then
|
if [ ! -d "$ENV_PATH" ]
|
||||||
echo "Using system Python: $(which python3)"
|
then
|
||||||
if ! which uv > /dev/null 2>&1; then
|
|
||||||
if ! curl -LsSf https://astral.sh/uv/install.sh | sh; then
|
|
||||||
report_error_and_exit "Failed to install uv package manager"
|
|
||||||
fi
|
|
||||||
if [[ -f ~/.local/bin/env ]]; then
|
|
||||||
if ! source ~/.local/bin/env; then
|
|
||||||
report_error_and_exit "Failed to source uv environment"
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
install_vastai_sdk
|
|
||||||
touch ~/.no_auto_tmux
|
|
||||||
elif [ ! -d "$ENV_PATH" ]; then
|
|
||||||
echo "setting up venv"
|
echo "setting up venv"
|
||||||
if ! which uv; then
|
if ! which uv; then
|
||||||
if ! curl -LsSf https://astral.sh/uv/install.sh | sh; then
|
if ! curl -LsSf https://astral.sh/uv/install.sh | sh; then
|
||||||
@@ -161,34 +104,17 @@ elif [ ! -d "$ENV_PATH" ]; then
|
|||||||
if ! git clone "${PYWORKER_REPO:-https://github.com/vast-ai/pyworker}" "$SERVER_DIR"; then
|
if ! git clone "${PYWORKER_REPO:-https://github.com/vast-ai/pyworker}" "$SERVER_DIR"; then
|
||||||
report_error_and_exit "Failed to clone pyworker repository"
|
report_error_and_exit "Failed to clone pyworker repository"
|
||||||
fi
|
fi
|
||||||
elif [ "$FORCE_UPDATE" = true ]; then
|
|
||||||
echo "Force updating pyworker repository"
|
|
||||||
if ! (cd "$SERVER_DIR" && git fetch --all); then
|
|
||||||
report_error_and_exit "Failed to fetch pyworker repository updates"
|
|
||||||
fi
|
|
||||||
fi
|
fi
|
||||||
if [[ -n ${PYWORKER_REF:-} ]]; then
|
if [[ -n ${PYWORKER_REF:-} ]]; then
|
||||||
if [ "$FORCE_UPDATE" = true ]; then
|
if ! (cd "$SERVER_DIR" && git checkout "$PYWORKER_REF"); then
|
||||||
echo "Force updating to pyworker reference: $PYWORKER_REF"
|
report_error_and_exit "Failed to checkout pyworker reference: $PYWORKER_REF"
|
||||||
if ! (cd "$SERVER_DIR" && git checkout "$PYWORKER_REF" && git pull); then
|
|
||||||
report_error_and_exit "Failed to force update pyworker reference: $PYWORKER_REF"
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
if ! (cd "$SERVER_DIR" && git checkout "$PYWORKER_REF"); then
|
|
||||||
report_error_and_exit "Failed to checkout pyworker reference: $PYWORKER_REF"
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
elif [ "$FORCE_UPDATE" = true ]; then
|
|
||||||
echo "Force updating pyworker to latest"
|
|
||||||
if ! (cd "$SERVER_DIR" && git pull); then
|
|
||||||
report_error_and_exit "Failed to pull latest pyworker changes"
|
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if ! uv venv --python-preference only-managed "$ENV_PATH" -p 3.10; then
|
if ! uv venv --python-preference only-managed "$ENV_PATH" -p 3.10; then
|
||||||
report_error_and_exit "Failed to create virtual environment"
|
report_error_and_exit "Failed to create virtual environment"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if ! source "$ENV_PATH/bin/activate"; then
|
if ! source "$ENV_PATH/bin/activate"; then
|
||||||
report_error_and_exit "Failed to activate virtual environment"
|
report_error_and_exit "Failed to activate virtual environment"
|
||||||
fi
|
fi
|
||||||
@@ -197,8 +123,6 @@ elif [ ! -d "$ENV_PATH" ]; then
|
|||||||
report_error_and_exit "Failed to install Python requirements"
|
report_error_and_exit "Failed to install Python requirements"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
install_vastai_sdk
|
|
||||||
|
|
||||||
if ! touch ~/.no_auto_tmux; then
|
if ! touch ~/.no_auto_tmux; then
|
||||||
report_error_and_exit "Failed to create ~/.no_auto_tmux"
|
report_error_and_exit "Failed to create ~/.no_auto_tmux"
|
||||||
fi
|
fi
|
||||||
@@ -208,44 +132,11 @@ else
|
|||||||
report_error_and_exit "Failed to source uv environment"
|
report_error_and_exit "Failed to source uv environment"
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
if ! source "$ENV_PATH/bin/activate"; then
|
if ! source "$WORKSPACE_DIR/worker-env/bin/activate"; then
|
||||||
report_error_and_exit "Failed to activate existing virtual environment"
|
report_error_and_exit "Failed to activate existing virtual environment"
|
||||||
fi
|
fi
|
||||||
echo "environment activated"
|
echo "environment activated"
|
||||||
echo "venv: $VIRTUAL_ENV"
|
echo "venv: $VIRTUAL_ENV"
|
||||||
|
|
||||||
# Handle force update for existing environment
|
|
||||||
if [ "$FORCE_UPDATE" = true ]; then
|
|
||||||
echo "Performing force update on existing environment"
|
|
||||||
|
|
||||||
if [[ -d $SERVER_DIR ]]; then
|
|
||||||
echo "Force updating pyworker repository"
|
|
||||||
if ! (cd "$SERVER_DIR" && git fetch --all); then
|
|
||||||
report_error_and_exit "Failed to fetch pyworker repository updates"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ -n ${PYWORKER_REF:-} ]]; then
|
|
||||||
echo "Force updating to pyworker reference: $PYWORKER_REF"
|
|
||||||
if ! (cd "$SERVER_DIR" && git checkout "$PYWORKER_REF" && git pull); then
|
|
||||||
report_error_and_exit "Failed to force update pyworker reference: $PYWORKER_REF"
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
echo "Force updating pyworker to latest"
|
|
||||||
if ! (cd "$SERVER_DIR" && git pull); then
|
|
||||||
report_error_and_exit "Failed to pull latest pyworker changes"
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
install_vastai_sdk
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Remove force update flag after successful update
|
|
||||||
if [ "$FORCE_UPDATE" = true ]; then
|
|
||||||
echo "Removing force update flag"
|
|
||||||
rm -f "/.force_update"
|
|
||||||
echo "Force update completed successfully"
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ "$USE_SSL" = true ]; then
|
if [ "$USE_SSL" = true ]; then
|
||||||
@@ -283,51 +174,16 @@ EOF
|
|||||||
report_error_and_exit "Failed to generate SSL certificate request"
|
report_error_and_exit "Failed to generate SSL certificate request"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
max_retries=5
|
if ! curl --header 'Content-Type: application/octet-stream' \
|
||||||
retry_delay=2
|
--data-binary @/etc/instance.csr \
|
||||||
for attempt in $(seq 1 "$max_retries"); do
|
-X \
|
||||||
http_code=$(curl -sS -o /etc/instance.crt -w '%{http_code}' \
|
POST "https://console.vast.ai/api/v0/sign_cert/?instance_id=$CONTAINER_ID" > /etc/instance.crt; then
|
||||||
--header 'Content-Type: application/octet-stream' \
|
report_error_and_exit "Failed to sign SSL certificate"
|
||||||
--data-binary @/etc/instance.csr \
|
fi
|
||||||
-X POST "https://console.vast.ai/api/v0/sign_cert/?instance_id=$CONTAINER_ID")
|
|
||||||
if [ "$http_code" -ge 200 ] && [ "$http_code" -lt 300 ]; then
|
|
||||||
break
|
|
||||||
fi
|
|
||||||
echo "SSL cert signing attempt $attempt/$max_retries failed (HTTP $http_code)"
|
|
||||||
if [ "$attempt" -eq "$max_retries" ]; then
|
|
||||||
report_error_and_exit "Failed to sign SSL certificate after $max_retries attempts (HTTP $http_code)"
|
|
||||||
fi
|
|
||||||
sleep "$retry_delay"
|
|
||||||
retry_delay=$((retry_delay * 2))
|
|
||||||
done
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
export REPORT_ADDR WORKER_PORT USE_SSL UNSECURED
|
export REPORT_ADDR WORKER_PORT USE_SSL UNSECURED
|
||||||
|
|
||||||
# ─── SDK Deployment Mode ───────────────────────────────────────────────
|
|
||||||
if [ "$IS_DEPLOYMENT" = "true" ]; then
|
|
||||||
echo "=== SDK Deployment Mode ==="
|
|
||||||
echo "DEPLOYMENT_ID: $DEPLOYMENT_ID"
|
|
||||||
|
|
||||||
DEPLOY_DIR="/workspace/deployment"
|
|
||||||
mkdir -p "$DEPLOY_DIR"
|
|
||||||
|
|
||||||
VAST_API_BASE="${VAST_API_BASE:-https://console.vast.ai}"
|
|
||||||
|
|
||||||
# Download deployment code, retrying until the blob is available on S3.
|
|
||||||
# The s3_key exists in the DB as soon as the deployment is created, but the
|
|
||||||
# actual upload may still be in flight from the client side.
|
|
||||||
|
|
||||||
# Install SDK (uses the install_vastai_sdk function which supports SDK_BRANCH/SDK_VERSION)
|
|
||||||
install_vastai_sdk
|
|
||||||
# Run deployment in serve mode
|
|
||||||
export VAST_DEPLOYMENT_MODE=serve
|
|
||||||
echo "Starting deployment: python3 $DEPLOY_DIR/deployment.py"
|
|
||||||
serve-vast-deployment
|
|
||||||
exit $?
|
|
||||||
fi
|
|
||||||
# ─── End SDK Deployment Mode ───────────────────────────────────────────
|
|
||||||
|
|
||||||
if ! cd "$SERVER_DIR"; then
|
if ! cd "$SERVER_DIR"; then
|
||||||
report_error_and_exit "Failed to cd into SERVER_DIR: $SERVER_DIR"
|
report_error_and_exit "Failed to cd into SERVER_DIR: $SERVER_DIR"
|
||||||
fi
|
fi
|
||||||
@@ -339,19 +195,19 @@ set +e
|
|||||||
PY_STATUS=1
|
PY_STATUS=1
|
||||||
|
|
||||||
if [ -f "$SERVER_DIR/worker.py" ]; then
|
if [ -f "$SERVER_DIR/worker.py" ]; then
|
||||||
echo "Running worker.py"
|
echo "trying worker.py"
|
||||||
python3 -m "worker" |& tee -a "$PYWORKER_LOG"
|
python3 -m "worker" |& tee -a "$PYWORKER_LOG"
|
||||||
PY_STATUS=${PIPESTATUS[0]}
|
PY_STATUS=${PIPESTATUS[0]}
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ "${PY_STATUS}" -ne 0 ] && [ -f "$SERVER_DIR/workers/$BACKEND/worker.py" ]; then
|
if [ "${PY_STATUS}" -ne 0 ] && [ -f "$SERVER_DIR/workers/$BACKEND/worker.py" ]; then
|
||||||
echo "Running workers.${BACKEND}.worker"
|
echo "trying workers.${BACKEND}.worker"
|
||||||
python3 -m "workers.${BACKEND}.worker" |& tee -a "$PYWORKER_LOG"
|
python3 -m "workers.${BACKEND}.worker" |& tee -a "$PYWORKER_LOG"
|
||||||
PY_STATUS=${PIPESTATUS[0]}
|
PY_STATUS=${PIPESTATUS[0]}
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ "${PY_STATUS}" -ne 0 ] && [ -f "$SERVER_DIR/workers/$BACKEND/server.py" ]; then
|
if [ "${PY_STATUS}" -ne 0 ] && [ -f "$SERVER_DIR/workers/$BACKEND/server.py" ]; then
|
||||||
echo "Running workers.${BACKEND}.server"
|
echo "trying workers.${BACKEND}.server"
|
||||||
python3 -m "workers.${BACKEND}.server" |& tee -a "$PYWORKER_LOG"
|
python3 -m "workers.${BACKEND}.server" |& tee -a "$PYWORKER_LOG"
|
||||||
PY_STATUS=${PIPESTATUS[0]}
|
PY_STATUS=${PIPESTATUS[0]}
|
||||||
fi
|
fi
|
||||||
@@ -365,4 +221,4 @@ if [ "${PY_STATUS}" -ne 0 ]; then
|
|||||||
report_error_and_exit "PyWorker exited with status ${PY_STATUS}"
|
report_error_and_exit "PyWorker exited with status ${PY_STATUS}"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "PyWorker bootstrap complete"
|
echo "launching PyWorker server done"
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
This is the PyWorker implementation for running **ACE Step v1 3.5B** text-to-music workflows in ComfyUI. It provides a unified interface for executing complete ComfyUI audio-generation workflows through a proxy-based architecture and returning generated audio assets.
|
This is the PyWorker implementation for running **ACE Step v1 3.5B** text-to-music workflows in ComfyUI. It provides a unified interface for executing complete ComfyUI audio-generation workflows through a proxy-based architecture and returning generated audio assets.
|
||||||
|
|
||||||
Each request has a static cost of `1000`. ComfyUI does not support concurrent workloads, and there is no provision to run multiple ComfyUI instances per worker node.
|
Each request has a static cost of `100`. ComfyUI does not support concurrent workloads, and there is no provision to run multiple ComfyUI instances per worker node.
|
||||||
|
|
||||||
## Requirements
|
## Requirements
|
||||||
|
|
||||||
|
|||||||
@@ -1,16 +1,8 @@
|
|||||||
# ComfyUI PyWorker
|
# ComfyUI PyWorker
|
||||||
|
|
||||||
This is the base PyWorker for ComfyUI. It provides a unified interface for running any ComfyUI workflow through a proxy-based architecture. See the [Serverless documentation](https://docs.vast.ai/serverless) for guides and how-to's.
|
This is the base PyWorker for ComfyUI. It provides a unified interface for running any ComfyUI workflow through a proxy-based architecture.
|
||||||
|
|
||||||
The cost for each request has a static value of `100`. ComfyUI does not handle concurrent workloads and there is no current provision to load multiple instances of ComfyUI per worker node.
|
The cost for each request has a static value of `100`. ComfyUI does not handle concurrent workloads and there is no current provision to load multiple instances of ComfyUI per worker node.
|
||||||
|
|
||||||
## Instance Setup
|
|
||||||
|
|
||||||
1. Pick a template
|
|
||||||
|
|
||||||
- [ComfyUI (Serverless)](https://cloud.vast.ai/?ref_id=62897&creator_id=62897&name=ComfyUI%20(Serverless))
|
|
||||||
|
|
||||||
2. Follow the [getting started guide](https://docs.vast.ai/documentation/serverless/quickstart) for help with configuring your serverless setup. For testing, we recommend that you use the default options presented by the web interface.
|
|
||||||
|
|
||||||
## Requirements
|
## Requirements
|
||||||
|
|
||||||
@@ -18,142 +10,6 @@ This worker requires both [ComfyUI](https://github.com/comfyanonymous/ComfyUI) a
|
|||||||
|
|
||||||
A docker image is provided but you may use any if the above requirements are met.
|
A docker image is provided but you may use any if the above requirements are met.
|
||||||
|
|
||||||
## Client
|
|
||||||
|
|
||||||
The client demonstrates how to use the Vast Serverless SDK to generate images, save them locally, and optionally upload to S3-compatible storage.
|
|
||||||
|
|
||||||
### Setup
|
|
||||||
|
|
||||||
1. Clone the PyWorker repository to your local machine and install the necessary requirements for running the test client.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git clone https://github.com/vast-ai/pyworker
|
|
||||||
cd pyworker
|
|
||||||
pip install uv
|
|
||||||
uv venv -p 3.12
|
|
||||||
source .venv/bin/activate
|
|
||||||
uv pip install -r requirements.txt
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Set your API key:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
export VAST_API_KEY=<your_api_key>
|
|
||||||
```
|
|
||||||
|
|
||||||
### Usage
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Default prompt
|
|
||||||
python -m workers.comfyui-json.client
|
|
||||||
|
|
||||||
# Custom prompt
|
|
||||||
python -m workers.comfyui-json.client --prompt "a cat sitting on a rainbow"
|
|
||||||
|
|
||||||
# With options
|
|
||||||
python -m workers.comfyui-json.client --prompt "sunset" --width 1024 --height 1024 --steps 30
|
|
||||||
|
|
||||||
# Using a custom workflow file
|
|
||||||
python -m workers.comfyui-json.client --workflow my_workflow.json
|
|
||||||
|
|
||||||
# With S3 upload
|
|
||||||
python -m workers.comfyui-json.client --s3
|
|
||||||
```
|
|
||||||
|
|
||||||
### CLI Flags
|
|
||||||
|
|
||||||
| Flag | Default | Description |
|
|
||||||
|------|---------|-------------|
|
|
||||||
| `--endpoint` | `my-comfyui-endpoint` | Vast endpoint name |
|
|
||||||
| `--prompt` | (default) | Text prompt for image generation |
|
|
||||||
| `--workflow` | (none) | Path to custom workflow JSON file |
|
|
||||||
| `--width` | 512 | Image width in pixels |
|
|
||||||
| `--height` | 512 | Image height in pixels |
|
|
||||||
| `--steps` | 20 | Number of denoising steps |
|
|
||||||
| `--seed` | (random) | Random seed for reproducibility |
|
|
||||||
| `--s3` | (disabled) | Upload generated images to S3 |
|
|
||||||
|
|
||||||
### Output
|
|
||||||
|
|
||||||
Images are saved to `./generated_images/comfy_{seed}.png`.
|
|
||||||
|
|
||||||
### S3 Upload (Optional)
|
|
||||||
|
|
||||||
You can optionally upload generated images to an S3-compatible storage service (AWS S3, Cloudflare R2, Backblaze B2, etc.) by using the `--s3` flag.
|
|
||||||
|
|
||||||
**1. Set environment variables:**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
export S3_ENDPOINT_URL="https://your-account.r2.cloudflarestorage.com"
|
|
||||||
export S3_BUCKET_NAME="my-bucket"
|
|
||||||
export S3_ACCESS_KEY_ID="your-access-key-id"
|
|
||||||
export S3_SECRET_ACCESS_KEY="your-secret-access-key"
|
|
||||||
```
|
|
||||||
|
|
||||||
**2. Run with S3 upload enabled:**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python -m workers.comfyui-json.client --prompt "a beautiful landscape" --s3
|
|
||||||
```
|
|
||||||
|
|
||||||
Images will be saved locally AND uploaded to `s3://{bucket}/comfyui/{filename}`.
|
|
||||||
|
|
||||||
**Note:** Requires `boto3` (`pip install boto3`).
|
|
||||||
|
|
||||||
## Benchmarking
|
|
||||||
|
|
||||||
### Custom Benchmark Workflows
|
|
||||||
|
|
||||||
You can provide a custom ComfyUI workflow for benchmarking. This allows you to test performance using your preferred models and workflow complexity.
|
|
||||||
|
|
||||||
**Ways to provide the benchmark file** (in resolution order — first match wins):
|
|
||||||
|
|
||||||
1. **Fork this repository** and commit your workflow to `workers/comfyui-json/misc/benchmark.json`.
|
|
||||||
2. **Write the file during provisioning** to a path *outside* the pyworker tree (e.g. `/workspace/benchmark.json`) and export `BENCHMARK_JSON_PATH` so the worker can find it. The pyworker repo is cloned by `start_server.sh` *after* provisioning runs, so provisioning cannot write into `misc/` directly — the destination would be clobbered, or the clone would fail.
|
|
||||||
3. **Run on the vast.ai ComfyUI base image.** Its `convert-workflows.sh` maintains `/opt/comfyui-api-wrapper/workflows/pyworker_benchmark.json` as a symlink to the first provisioned workflow; the worker reads this automatically when neither of the above is set. No env var required.
|
|
||||||
|
|
||||||
If `BENCHMARK_JSON_PATH` is set but points at a missing or unreadable file, the worker logs a warning and falls through to the next tier rather than going straight to the SD1.5 fallback.
|
|
||||||
|
|
||||||
An example workflow is provided at `workers/comfyui-json/misc/benchmark.json.example`. To ensure varied generations, use the placeholder `__RANDOM_INT__` in place of static seed values — it will be replaced with a random integer for each benchmark run.
|
|
||||||
|
|
||||||
### Default Benchmark (Fallback)
|
|
||||||
|
|
||||||
If `benchmark.json` is not available, a simple image generation benchmark runs when each worker initializes. This validates GPU performance and helps identify underperforming machines.
|
|
||||||
|
|
||||||
The default benchmark uses Stable Diffusion v1.5 with ComfyUI's standard text-to-image workflow. Configure it using these environment variables:
|
|
||||||
|
|
||||||
| Environment Variable | Default Value | Description |
|
|
||||||
| -------------------- | ------------- | ----------- |
|
|
||||||
| BENCHMARK_JSON_PATH | (unset) | Path to a custom workflow file outside the pyworker tree. Used if `misc/benchmark.json` is absent. Falls through to `/opt/comfyui-api-wrapper/workflows/pyworker_benchmark.json` if set but missing. |
|
|
||||||
| BENCHMARK_TEST_WIDTH | 512 | Fallback benchmark: image width (pixels) |
|
|
||||||
| BENCHMARK_TEST_HEIGHT | 512 | Fallback benchmark: image height (pixels) |
|
|
||||||
| BENCHMARK_TEST_STEPS | 20 | Fallback benchmark: number of denoising steps |
|
|
||||||
|
|
||||||
Each benchmark run uses a random prompt from `misc/test_prompts.txt` and a random seed to ensure consistent GPU load patterns.
|
|
||||||
|
|
||||||
#### Calibrating Fallback Benchmark Duration
|
|
||||||
|
|
||||||
To screen for underperforming hardware, set `BENCHMARK_TEST_STEPS` to match your expected production workflow duration. This allows you to identify machines that won't meet performance requirements.
|
|
||||||
|
|
||||||
**Example:** If your typical workflow should complete in 90 seconds on acceptable hardware:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# 1. Measure it/sec on your reference machine
|
|
||||||
# RTX 4090 typically achieves ~43 it/sec with SD1.5
|
|
||||||
|
|
||||||
# 2. Calculate required steps
|
|
||||||
# 90 seconds × 43 it/sec = 3870 steps
|
|
||||||
|
|
||||||
# 3. Configure benchmark
|
|
||||||
export BENCHMARK_TEST_STEPS=3870
|
|
||||||
|
|
||||||
# 4. Machines completing significantly slower than 90s indicate hardware issues
|
|
||||||
```
|
|
||||||
|
|
||||||
**Performance expectations:**
|
|
||||||
- Benchmark duration should remain consistent across identical GPU models
|
|
||||||
- Significant variation (>20%) may indicate thermal, power, or configuration issues
|
|
||||||
|
|
||||||
## Endpoint
|
## Endpoint
|
||||||
|
|
||||||
The worker provides a single endpoint:
|
The worker provides a single endpoint:
|
||||||
@@ -314,4 +170,4 @@ See the client example for implementation details on how to integrate with the C
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
See Vast's serverless documentation for more details on how to use ComfyUI with autoscaler.
|
See Vast's serverless documentation for more details on how to use ComfyUI with autoscaler.
|
||||||
+22
-300
@@ -1,312 +1,34 @@
|
|||||||
import os
|
|
||||||
import sys
|
|
||||||
import json
|
|
||||||
import uuid
|
import uuid
|
||||||
import random
|
import random
|
||||||
import asyncio
|
import asyncio
|
||||||
import logging
|
import random
|
||||||
import argparse
|
|
||||||
import aiohttp
|
|
||||||
|
|
||||||
from vastai import Serverless
|
from vastai import Serverless
|
||||||
|
|
||||||
# ---------------------- Config ----------------------
|
async def main():
|
||||||
DEFAULT_PROMPT = "a beautiful sunset over mountains, digital art, highly detailed"
|
async with Serverless() as client:
|
||||||
ENDPOINT_NAME = "my-comfyui-endpoint"
|
endpoint = await client.get_endpoint(name="my-comfy-endpoint") # Change this to your endpoint name
|
||||||
DEFAULT_WIDTH = 512
|
|
||||||
DEFAULT_HEIGHT = 512
|
|
||||||
DEFAULT_STEPS = 20
|
|
||||||
COST = 100 # Fixed cost for ComfyUI requests
|
|
||||||
|
|
||||||
# Optional S3 Configuration (from environment variables)
|
payload = {
|
||||||
S3_ENDPOINT_URL = os.getenv("S3_ENDPOINT_URL")
|
"input": {
|
||||||
S3_BUCKET_NAME = os.getenv("S3_BUCKET_NAME")
|
"request_id": str(uuid.uuid4()),
|
||||||
S3_ACCESS_KEY_ID = os.getenv("S3_ACCESS_KEY_ID")
|
"modifier": "Text2Image",
|
||||||
S3_SECRET_ACCESS_KEY = os.getenv("S3_SECRET_ACCESS_KEY")
|
"modifications": {
|
||||||
|
"prompt": "a beautiful landscape with mountains and lakes",
|
||||||
logging.basicConfig(level=logging.INFO, format="%(levelname)s - %(message)s")
|
"width": 1024,
|
||||||
log = logging.getLogger(__name__)
|
"height": 1024,
|
||||||
|
"steps": 20,
|
||||||
|
"seed": random.randint(0, 2**32 - 1)
|
||||||
def get_s3_client():
|
},
|
||||||
"""Create and return an S3 client configured for the S3-compatible endpoint"""
|
"workflow_json": {} # Empty since using modifier approach
|
||||||
try:
|
}
|
||||||
import boto3
|
|
||||||
from botocore.config import Config
|
|
||||||
except ImportError:
|
|
||||||
log.error("boto3 is required for S3 uploads. Install with: pip install boto3")
|
|
||||||
return None
|
|
||||||
|
|
||||||
if not all([S3_ENDPOINT_URL, S3_BUCKET_NAME, S3_ACCESS_KEY_ID, S3_SECRET_ACCESS_KEY]):
|
|
||||||
log.error("S3 environment variables not fully configured. Required:")
|
|
||||||
log.error(" S3_ENDPOINT_URL, S3_BUCKET_NAME, S3_ACCESS_KEY_ID, S3_SECRET_ACCESS_KEY")
|
|
||||||
return None
|
|
||||||
|
|
||||||
return boto3.client(
|
|
||||||
"s3",
|
|
||||||
endpoint_url=S3_ENDPOINT_URL,
|
|
||||||
aws_access_key_id=S3_ACCESS_KEY_ID,
|
|
||||||
aws_secret_access_key=S3_SECRET_ACCESS_KEY,
|
|
||||||
config=Config(signature_version="s3v4"),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------- API Functions ----------------------
|
|
||||||
async def call_generate(
|
|
||||||
client: Serverless,
|
|
||||||
*,
|
|
||||||
endpoint_name: str,
|
|
||||||
prompt: str,
|
|
||||||
width: int,
|
|
||||||
height: int,
|
|
||||||
steps: int,
|
|
||||||
seed: int,
|
|
||||||
) -> dict:
|
|
||||||
"""Generate image using Text2Image modifier"""
|
|
||||||
endpoint = await client.get_endpoint(name=endpoint_name)
|
|
||||||
payload = {
|
|
||||||
"input": {
|
|
||||||
"request_id": str(uuid.uuid4()),
|
|
||||||
"modifier": "Text2Image",
|
|
||||||
"modifications": {
|
|
||||||
"prompt": prompt,
|
|
||||||
"width": width,
|
|
||||||
"height": height,
|
|
||||||
"steps": steps,
|
|
||||||
"seed": seed,
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
return await endpoint.request("/generate/sync", payload, cost=COST)
|
|
||||||
|
|
||||||
|
|
||||||
async def call_generate_workflow(
|
|
||||||
client: Serverless,
|
|
||||||
*,
|
|
||||||
endpoint_name: str,
|
|
||||||
workflow_json: dict,
|
|
||||||
) -> dict:
|
|
||||||
"""Generate using custom workflow JSON"""
|
|
||||||
endpoint = await client.get_endpoint(name=endpoint_name)
|
|
||||||
payload = {
|
|
||||||
"input": {
|
|
||||||
"request_id": str(uuid.uuid4()),
|
|
||||||
"workflow_json": workflow_json,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return await endpoint.request("/generate/sync", payload, cost=COST)
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------- Demo Class ----------------------
|
|
||||||
class APIDemo:
|
|
||||||
def __init__(self, client: Serverless, endpoint_name: str, upload_s3: bool = False):
|
|
||||||
self.client = client
|
|
||||||
self.endpoint_name = endpoint_name
|
|
||||||
self.upload_s3 = upload_s3
|
|
||||||
self.s3_client = get_s3_client() if upload_s3 else None
|
|
||||||
|
|
||||||
if upload_s3 and not self.s3_client:
|
response = await endpoint.request("/generate/sync", payload, cost=100)
|
||||||
log.warning("S3 upload requested but client creation failed. Images will only be saved locally.")
|
|
||||||
|
|
||||||
def extract_filename(self, response: dict) -> str | None:
|
|
||||||
"""Extract the generated image filename from ComfyUI response"""
|
|
||||||
if "comfyui_response" in response:
|
|
||||||
for data in response["comfyui_response"].values():
|
|
||||||
if isinstance(data, dict) and "outputs" in data:
|
|
||||||
for node_output in data["outputs"].values():
|
|
||||||
if "images" in node_output and node_output["images"]:
|
|
||||||
return node_output["images"][0].get("filename")
|
|
||||||
return None
|
|
||||||
|
|
||||||
async def save_image(self, worker_url: str, filename: str, local_name: str) -> str | None:
|
|
||||||
"""Fetch and save image locally from the worker, optionally upload to S3"""
|
|
||||||
os.makedirs("generated_images", exist_ok=True)
|
|
||||||
return await self._fetch_image(worker_url, filename, local_name)
|
|
||||||
|
|
||||||
def _upload_to_s3(self, local_path: str, s3_key: str) -> str | None:
|
|
||||||
"""Upload a local file to S3 and return the S3 URL"""
|
|
||||||
if not self.s3_client:
|
|
||||||
return None
|
|
||||||
|
|
||||||
try:
|
|
||||||
self.s3_client.upload_file(
|
|
||||||
local_path,
|
|
||||||
S3_BUCKET_NAME,
|
|
||||||
s3_key,
|
|
||||||
ExtraArgs={"ContentType": "image/png"}
|
|
||||||
)
|
|
||||||
s3_url = f"{S3_ENDPOINT_URL}/{S3_BUCKET_NAME}/{s3_key}"
|
|
||||||
print(f" ☁️ Uploaded to S3: {s3_key}")
|
|
||||||
return s3_url
|
|
||||||
except Exception as e:
|
|
||||||
log.error(f"Failed to upload to S3: {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
async def _fetch_image(self, worker_url: str, filename: str, local_name: str) -> str | None:
|
|
||||||
"""Fetch image from worker's /view endpoint and save locally"""
|
|
||||||
if not worker_url:
|
|
||||||
return None
|
|
||||||
|
|
||||||
try:
|
|
||||||
url = f"{worker_url}/view"
|
|
||||||
params = {"filename": filename, "type": "output"}
|
|
||||||
|
|
||||||
async with aiohttp.ClientSession() as session:
|
|
||||||
async with session.get(url, params=params, ssl=False) as resp:
|
|
||||||
if resp.status == 200:
|
|
||||||
path = f"generated_images/{local_name}"
|
|
||||||
image_data = await resp.read()
|
|
||||||
with open(path, "wb") as f:
|
|
||||||
f.write(image_data)
|
|
||||||
print(f" 💾 Saved: {path}")
|
|
||||||
|
|
||||||
# Upload to S3 if enabled
|
|
||||||
if self.upload_s3 and self.s3_client:
|
|
||||||
s3_key = f"comfyui/{local_name}"
|
|
||||||
self._upload_to_s3(path, s3_key)
|
|
||||||
|
|
||||||
return path
|
|
||||||
return None
|
|
||||||
except Exception:
|
|
||||||
return None
|
|
||||||
|
|
||||||
async def demo_prompt(
|
|
||||||
self,
|
|
||||||
prompt: str,
|
|
||||||
width: int,
|
|
||||||
height: int,
|
|
||||||
steps: int,
|
|
||||||
seed: int | None,
|
|
||||||
):
|
|
||||||
"""Demo: Generate image from text prompt"""
|
|
||||||
print("=" * 60)
|
|
||||||
print("COMFYUI TEXT-TO-IMAGE DEMO")
|
|
||||||
print("=" * 60)
|
|
||||||
|
|
||||||
if seed is None:
|
|
||||||
seed = random.randint(0, 2**32 - 1)
|
|
||||||
|
|
||||||
print(f"Prompt: {prompt[:100]}..." if len(prompt) > 100 else f"Prompt: {prompt}")
|
|
||||||
print(f"Size: {width}x{height}, Steps: {steps}, Seed: {seed}")
|
|
||||||
print("\n🎨 Generating image...")
|
|
||||||
|
|
||||||
response = await call_generate(
|
|
||||||
self.client,
|
|
||||||
endpoint_name=self.endpoint_name,
|
|
||||||
prompt=prompt,
|
|
||||||
width=width,
|
|
||||||
height=height,
|
|
||||||
steps=steps,
|
|
||||||
seed=seed,
|
|
||||||
)
|
|
||||||
|
|
||||||
print("\n✅ Generation complete!")
|
|
||||||
|
|
||||||
# Get worker URL for fetching images
|
|
||||||
worker_url = response.get("url", "")
|
|
||||||
print(f"Worker URL: {worker_url}")
|
|
||||||
|
|
||||||
# Fetch and save image
|
|
||||||
if "response" in response:
|
|
||||||
filename = self.extract_filename(response["response"])
|
|
||||||
if filename:
|
|
||||||
path = await self.save_image(worker_url, filename, f"comfy_{seed}.png")
|
|
||||||
if not path:
|
|
||||||
print(f"❌ Failed to fetch image")
|
|
||||||
else:
|
|
||||||
print("❌ No image in response")
|
|
||||||
else:
|
|
||||||
print("❌ Unexpected response format")
|
|
||||||
|
|
||||||
async def demo_workflow(self, workflow_file: str):
|
|
||||||
"""Demo: Generate using custom workflow file"""
|
|
||||||
print("=" * 60)
|
|
||||||
print("COMFYUI CUSTOM WORKFLOW DEMO")
|
|
||||||
print("=" * 60)
|
|
||||||
|
|
||||||
if not os.path.exists(workflow_file):
|
|
||||||
log.error(f"Workflow file not found: {workflow_file}")
|
|
||||||
return
|
|
||||||
|
|
||||||
with open(workflow_file, "r") as f:
|
|
||||||
workflow_json = json.load(f)
|
|
||||||
|
|
||||||
print(f"Workflow: {workflow_file}")
|
|
||||||
print("\n🎨 Generating...")
|
|
||||||
|
|
||||||
response = await call_generate_workflow(
|
|
||||||
self.client,
|
|
||||||
endpoint_name=self.endpoint_name,
|
|
||||||
workflow_json=workflow_json,
|
|
||||||
)
|
|
||||||
|
|
||||||
print("\n✅ Generation complete!")
|
|
||||||
|
|
||||||
worker_url = response.get("url", "")
|
|
||||||
|
|
||||||
if "response" in response:
|
|
||||||
filename = self.extract_filename(response["response"])
|
|
||||||
if filename:
|
|
||||||
path = await self.save_image(worker_url, filename, "workflow.png")
|
|
||||||
if not path:
|
|
||||||
print(f"❌ Failed to fetch image")
|
|
||||||
else:
|
|
||||||
print("❌ No image in response")
|
|
||||||
else:
|
|
||||||
print("❌ Unexpected response format")
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------- CLI ----------------------
|
|
||||||
def build_arg_parser() -> argparse.ArgumentParser:
|
|
||||||
p = argparse.ArgumentParser(description="Vast ComfyUI-JSON Demo (Serverless SDK)")
|
|
||||||
p.add_argument("--endpoint", default=ENDPOINT_NAME, help=f"Vast endpoint name (default: {ENDPOINT_NAME})")
|
|
||||||
p.add_argument("--prompt", type=str, default=DEFAULT_PROMPT, metavar="TEXT",
|
|
||||||
help=f"Prompt text (default: '{DEFAULT_PROMPT[:30]}...')")
|
|
||||||
p.add_argument("--workflow", type=str, metavar="FILE", help="Use custom workflow JSON file instead")
|
|
||||||
p.add_argument("--width", type=int, default=DEFAULT_WIDTH, help=f"Image width (default: {DEFAULT_WIDTH})")
|
|
||||||
p.add_argument("--height", type=int, default=DEFAULT_HEIGHT, help=f"Image height (default: {DEFAULT_HEIGHT})")
|
|
||||||
p.add_argument("--steps", type=int, default=DEFAULT_STEPS, help=f"Steps (default: {DEFAULT_STEPS})")
|
|
||||||
p.add_argument("--seed", type=int, default=None, help="Seed (default: random)")
|
|
||||||
p.add_argument("--s3", action="store_true",
|
|
||||||
help="Upload generated images to S3 (requires S3_ENDPOINT_URL, S3_BUCKET_NAME, S3_ACCESS_KEY_ID, S3_SECRET_ACCESS_KEY env vars)")
|
|
||||||
return p
|
|
||||||
|
|
||||||
|
|
||||||
async def main_async():
|
|
||||||
args = build_arg_parser().parse_args()
|
|
||||||
|
|
||||||
print("=" * 60)
|
|
||||||
print(f"Using endpoint: {args.endpoint}")
|
|
||||||
if args.s3:
|
|
||||||
print(f"S3 upload: enabled (bucket: {S3_BUCKET_NAME})")
|
|
||||||
|
|
||||||
try:
|
|
||||||
async with Serverless() as client:
|
|
||||||
demo = APIDemo(client, args.endpoint, upload_s3=args.s3)
|
|
||||||
|
|
||||||
if args.workflow:
|
|
||||||
await demo.demo_workflow(workflow_file=args.workflow)
|
|
||||||
else:
|
|
||||||
await demo.demo_prompt(
|
|
||||||
prompt=args.prompt,
|
|
||||||
width=args.width,
|
|
||||||
height=args.height,
|
|
||||||
steps=args.steps,
|
|
||||||
seed=args.seed,
|
|
||||||
)
|
|
||||||
|
|
||||||
except AttributeError as e:
|
|
||||||
if "API key" in str(e):
|
|
||||||
log.error("API key missing. Set VAST_API_KEY environment variable.")
|
|
||||||
else:
|
|
||||||
log.error(f"Error: {e}")
|
|
||||||
sys.exit(1)
|
|
||||||
except Exception as e:
|
|
||||||
log.error(f"Error: {e}")
|
|
||||||
import traceback
|
|
||||||
traceback.print_exc()
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
|
# Get the file from the path on the local machine using SCP or SFTP
|
||||||
|
# or configure S3 to upload to cloud storage.
|
||||||
|
print(response["response"]["output"][0]["local_path"])
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
asyncio.run(main_async())
|
asyncio.run(main())
|
||||||
@@ -1,107 +0,0 @@
|
|||||||
{
|
|
||||||
"3": {
|
|
||||||
"inputs": {
|
|
||||||
"seed": "__RANDOM_INT__",
|
|
||||||
"steps": 20,
|
|
||||||
"cfg": 8,
|
|
||||||
"sampler_name": "euler",
|
|
||||||
"scheduler": "normal",
|
|
||||||
"denoise": 1,
|
|
||||||
"model": [
|
|
||||||
"4",
|
|
||||||
0
|
|
||||||
],
|
|
||||||
"positive": [
|
|
||||||
"6",
|
|
||||||
0
|
|
||||||
],
|
|
||||||
"negative": [
|
|
||||||
"7",
|
|
||||||
0
|
|
||||||
],
|
|
||||||
"latent_image": [
|
|
||||||
"5",
|
|
||||||
0
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"class_type": "KSampler",
|
|
||||||
"_meta": {
|
|
||||||
"title": "KSampler"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"inputs": {
|
|
||||||
"ckpt_name": "v1-5-pruned-emaonly-fp16.safetensors"
|
|
||||||
},
|
|
||||||
"class_type": "CheckpointLoaderSimple",
|
|
||||||
"_meta": {
|
|
||||||
"title": "Load Checkpoint"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"5": {
|
|
||||||
"inputs": {
|
|
||||||
"width": 512,
|
|
||||||
"height": 512,
|
|
||||||
"batch_size": 1
|
|
||||||
},
|
|
||||||
"class_type": "EmptyLatentImage",
|
|
||||||
"_meta": {
|
|
||||||
"title": "Empty Latent Image"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"6": {
|
|
||||||
"inputs": {
|
|
||||||
"text": "beautiful scenery nature glass bottle landscape, , purple galaxy bottle,",
|
|
||||||
"clip": [
|
|
||||||
"4",
|
|
||||||
1
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"class_type": "CLIPTextEncode",
|
|
||||||
"_meta": {
|
|
||||||
"title": "CLIP Text Encode (Prompt)"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"7": {
|
|
||||||
"inputs": {
|
|
||||||
"text": "text, watermark",
|
|
||||||
"clip": [
|
|
||||||
"4",
|
|
||||||
1
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"class_type": "CLIPTextEncode",
|
|
||||||
"_meta": {
|
|
||||||
"title": "CLIP Text Encode (Prompt)"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"inputs": {
|
|
||||||
"samples": [
|
|
||||||
"3",
|
|
||||||
0
|
|
||||||
],
|
|
||||||
"vae": [
|
|
||||||
"4",
|
|
||||||
2
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"class_type": "VAEDecode",
|
|
||||||
"_meta": {
|
|
||||||
"title": "VAE Decode"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"9": {
|
|
||||||
"inputs": {
|
|
||||||
"filename_prefix": "ComfyUI",
|
|
||||||
"images": [
|
|
||||||
"8",
|
|
||||||
0
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"class_type": "SaveImage",
|
|
||||||
"_meta": {
|
|
||||||
"title": "Save Image"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,34 +0,0 @@
|
|||||||
cartoon character of a person with a hoodie , in style of cytus and deemo, ork, gold chains, realistic anime cat, dripping black goo, lineage revolution style, thug life, cute anthropomorphic bunny, balrog, arknights, aliased, very buff, black and red and yellow paint, painting illustration collage style, character composition in vector with white background
|
|
||||||
stardew valley, fine details
|
|
||||||
2D Vector Illustration of a child with soccer ball Art for Sublimation, Design Art, Chrome Art, Painting and Stunning Artwork, Highly Detailed Digital Painting, Airbrush Art, Highly Detailed Digital Artwork, Dramatic Artwork, stained antique yellow copper paint, digital airbrush art, detailed by Mark Brooks, Chicano airbrush art, Swagger! snake Culture
|
|
||||||
realistic futuristic city-downtown with short buildings, sunset
|
|
||||||
seascape by Ray Collins and artgerm, front view of a perfect wave, sunny background, ultra detailed water
|
|
||||||
inspired by realflow-cinema4d editor features, create image of a transparent luxury cup with ice fruits and mint, connected with white, yellow and pink cream, Slow - High Speed MO Photography, YouTube Video Screenshot, Abstract Clay, Transparent Cup , molecular gastronomy, wheel, 3D fluid,Simulation rendering, still video, 4k polymer clay futras photography, very surreal, Houdini Fluid Simulation, hyperrealistic CGI and FLUIDS & MULTIPHYSICS SIMULATION effect, with Somali Stain Lurex, Metallic Jacquard, Gold Thread, Mulberry Silk, Toub Saree, Warm background, a fantastic image worthy of an award.
|
|
||||||
biker with backpack on his back riding a motorcycle, Style by Ade Santora, Oilpunk, Cover photo, craig mullins style, on the cover of a magazine, Outdoor Magazine, inspired by Alex Petruk APe, image of a male biker, Cover of an award-winning magazine, the man has a backpack, photo for magazine, with a backpack, magazine cover
|
|
||||||
generate a collage-style illustration inspired by the Procreate raster graphic editor, photographic illustration with the theme, 2D vector, art for textile sublimation, containing surrealistic cartoon cat wearing a baseball cap and jeans standing in front of a poster, inspired by Sadao Watanabe, Doraemon, Japanese cartoon style, Eichiro Oda, Iconic high detail character, Director: Nakahara Nantenbō, Kastuhiro Otomo, image detailed, by Miyamoto, Hidetaka Miyazaki, Katsuhiro illustration, 8k, masterpiece, Minimize noise and grain in photo quality without lose quality and increase brightness and lighting,Symmetry and Alignment, Avoid asymmetrical shapes and out-of-focus points. Focus and Sharpness: Make sure the image is focused and sharp and encourages the viewer to see it as a work of art printed on fabric.
|
|
||||||
fantasy medieval village world inside a glass sphere , high detail, fantasy, realistic, light effect, hyper detail, volumetric lighting, cinematic, macro, depth of field, blur, red light and clouds from the back, highly detailed epic cinematic concept art cg render made in maya, blender and photoshop, octane render, excellent composition, dynamic dramatic cinematic lighting, aesthetic, very inspirational, world inside a glass sphere by james gurney by artgerm with james jean, joe fenton and tristan eaton by ross tran, fine details
|
|
||||||
armored hero with a glowing axe, mecha science_fiction, jungle background, dynamic lighting, detailed shading, digital texture painting, masterpiece, studio quality, 6k
|
|
||||||
elderly figure in a leather jacket DJing in a smoky nightclub, mixing live on a giant console, dramatic stage lighting, a masterpiece
|
|
||||||
elderly figure in a leather jacket on a motorcycle, magazine cover lighting, a masterpiece
|
|
||||||
a young pilot ordering a burger and fries from a futuristic space cantina
|
|
||||||
I want to generate a group avatar for a Feishu group chat. The role of this group is daily software technical communication. Now the subject technology stacks that members of this group discuss daily include: algorithms, data structures, optimization, functional programming, and the programming languages often discussed are: TypeScript, Java, python, etc. I hope this avatar has a simple aesthetic, this avatar is a single person avatar
|
|
||||||
portrait Anime black girl cute-fine-face, pretty face, realistic shaded Perfect face, fine details. Anime. realistic shaded lighting by Ilya Kuvshinov Giuseppe Dangelico Pino and Michael Garmash and Rob Rey, IAMAG premiere, WLOP matte print, cute freckles, masterpiece
|
|
||||||
young woman in modern fashion editorial, beige miniskirt and dark brown turtleneck sweater, soft studio lighting, brown hair, grey eyes, fine details, magazine cover style, a masterpiece
|
|
||||||
Cute small cat sitting in a movie theater eating chicken wiggs watching a movie ,unreal engine, cozy indoor lighting, artstation, detailed, digital painting,cinematic,character design by mark ryden and pixar and hayao miyazaki, unreal 5, daz, hyperrealistic, octane render
|
|
||||||
Cute small dog sitting in a movie theater eating popcorn watching a movie ,unreal engine, cozy indoor lighting, artstation, detailed, digital painting,cinematic,character design by mark ryden and pixar and hayao miyazaki, unreal 5, daz, hyperrealistic, octane render
|
|
||||||
fox bracelet made of buckskin with fox features, rich details, fine carvings, studio lighting
|
|
||||||
crane buckskin bracelet with crane features, rich details, fine carvings, studio lighting
|
|
||||||
london luxurious interior living-room, light walls
|
|
||||||
Parisian luxurious interior penthouse bedroom, dark walls, wooden panels
|
|
||||||
cute girl, crop-top, blond hair, black glasses, stretching, with background by greg rutkowski makoto shinkai kyoto animation key art feminine mid shot
|
|
||||||
houses in front, houses background, straight houses, digital art, smooth, sharp focus, gravity falls style, doraemon style, shinchan style, anime style
|
|
||||||
Simplified technical drawing, Leonardo da Vinci, Mechanical Dinosaur Skeleton, Minimalistic annotations, Hand-drawn illustrations, Basic design and engineering, Wonder and curiosity
|
|
||||||
High quality 8K painting impressionist style of a Japanese modern city street with a girl on the foreground wearing a traditional wedding dress with a fox mask, staring at the sky, daylight
|
|
||||||
a landscape from the Moon with the Earth setting on the horizon, realistic, detailed
|
|
||||||
Isometric Atlantis city,great architecture with columns, great details, ornaments,seaweed, blue ambiance, 3D cartoon style, soft light, 45° view
|
|
||||||
A hyper realistic avatar of a guy riding on a black honda cbr 650r in leather suit,high detail, high quality,8K,photo realism
|
|
||||||
the street of amedieval fantasy town, at dawn, dark, highly detailed
|
|
||||||
overwhelmingly beautiful eagle framed with vector flowers, long shiny wavy flowing hair, polished, ultra detailed vector floral illustration mixed with hyper realism, muted pastel colors, vector floral details in background, muted colors, hyper detailed ultra intricate overwhelming realism in detailed complex scene with magical fantasy atmosphere, no signature, no watermark
|
|
||||||
a highly detailed matte painting of a man on a hill watching a rocket launch in the distance by studio ghibli, makoto shinkai, by artgerm, by wlop, by greg rutkowski, volumetric lighting, octane render, 4 k resolution, trending on artstation, masterpiece | hyperrealism| highly detailed| insanely detailed| intricate| cinematic lighting| depth of field
|
|
||||||
electronik robot and ofice ,unreal engine, cozy indoor lighting, artstation, detailed, digital painting,cinematic,character design by mark ryden and pixar and hayao miyazaki, unreal 5, daz, hyperrealistic, octane render
|
|
||||||
exquisitely intricately detailed illustration, of a small world with a lake and a rainbow, inside a closed glass jar.
|
|
||||||
+34
-199
@@ -1,225 +1,60 @@
|
|||||||
"""ComfyUI worker for the vast.ai PyWorker SDK.
|
|
||||||
|
|
||||||
Each worker runs a benchmark on warm-up. The payload is selected as follows:
|
|
||||||
|
|
||||||
1. If ``misc/benchmark.json`` exists in the cloned worker tree, it is
|
|
||||||
used as a custom ComfyUI workflow. Use this if you fork the repo and
|
|
||||||
bake in your workflow.
|
|
||||||
2. Else, if ``$BENCHMARK_JSON_PATH`` is set and points at a readable
|
|
||||||
file, it is used. Use this from a provisioning script — provisioning
|
|
||||||
runs before pyworker is cloned, so it cannot write into ``misc/``,
|
|
||||||
but it can drop the workflow elsewhere (e.g. ``/workspace/``) and
|
|
||||||
export this env var.
|
|
||||||
3. Else, if the well-known path
|
|
||||||
``/opt/comfyui-api-wrapper/workflows/pyworker_benchmark.json`` exists,
|
|
||||||
it is used. The vast.ai ComfyUI base image's ``convert-workflows.sh``
|
|
||||||
maintains this as a symlink to the first provisioned workflow, so on
|
|
||||||
that image no env var is needed.
|
|
||||||
4. Otherwise an SD1.5 Text2Image fallback runs, parameterised by the
|
|
||||||
``BENCHMARK_TEST_{WIDTH,HEIGHT,STEPS}`` env vars and a random prompt
|
|
||||||
from ``misc/test_prompts.txt``.
|
|
||||||
|
|
||||||
``__RANDOM_INT__`` placeholders in custom workflows are substituted
|
|
||||||
server-side by ai-dock/comfyui-api-wrapper, so this worker does not handle
|
|
||||||
them itself.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import json
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import random
|
import random
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
from vastai import Worker, WorkerConfig, HandlerConfig, LogActionConfig, BenchmarkConfig
|
from vastai import Worker, WorkerConfig, HandlerConfig, LogActionConfig, BenchmarkConfig
|
||||||
|
|
||||||
# ComfyUI model configuration. The model server is ai-dock's
|
# ComyUI model configuration
|
||||||
# comfyui-api-wrapper sitting in front of ComfyUI itself, not ComfyUI's
|
|
||||||
# own port (18188). We tail the api-wrapper's log rather than ComfyUI's
|
|
||||||
# and key off the api-wrapper's own structured readiness/fault signals:
|
|
||||||
#
|
|
||||||
# BACKENDS_READY — api-wrapper has confirmed every ComfyUI
|
|
||||||
# backend passes HTTP+WS probes. Until
|
|
||||||
# this fires, posting to /generate/sync
|
|
||||||
# can hit "Cannot connect to host" inside
|
|
||||||
# the api-wrapper, which the SDK can't
|
|
||||||
# recover from since __call_backend
|
|
||||||
# doesn't retry connection-refused.
|
|
||||||
# BACKENDS_READY_TIMEOUT — backends never reachable within
|
|
||||||
# api-wrapper's deadline. Worker is
|
|
||||||
# unrecoverable; mark errored.
|
|
||||||
# BACKEND_UNRECOVERABLE — CUDA fault / illegal memory access on a
|
|
||||||
# backend's GPU. Same fate.
|
|
||||||
# Application startup failed — uvicorn's own ASGI lifespan failed.
|
|
||||||
#
|
|
||||||
# These tokens are emitted by ai-dock/comfyui-api-wrapper >= the
|
|
||||||
# "feat/backend-readiness-log-signals" change. Older wrappers won't
|
|
||||||
# emit BACKENDS_READY, so warm-up will stall — pin the wrapper version
|
|
||||||
# accordingly.
|
|
||||||
MODEL_SERVER_URL = 'http://127.0.0.1'
|
MODEL_SERVER_URL = 'http://127.0.0.1'
|
||||||
MODEL_SERVER_PORT = 18288
|
MODEL_SERVER_PORT = 18288
|
||||||
MODEL_LOG_FILE = '/var/log/portal/api-wrapper.log'
|
MODEL_LOG_FILE = '/var/log/portal/comfyui.log'
|
||||||
MODEL_HEALTHCHECK_ENDPOINT = "/health"
|
MODEL_HEALTHCHECK_ENDPOINT = "/health"
|
||||||
|
|
||||||
# Trigger benchmark only after the full stack (api-wrapper + ComfyUI
|
# ComyUI-specific log messages
|
||||||
# backends) is reachable. See BACKENDS_READY in the comment above.
|
|
||||||
MODEL_LOAD_LOG_MSG = [
|
MODEL_LOAD_LOG_MSG = [
|
||||||
"BACKENDS_READY",
|
"To see the GUI go to: "
|
||||||
]
|
]
|
||||||
|
|
||||||
# LogAction.ModelError is fatal: the SDK calls backend_errored() and
|
|
||||||
# locks the worker into a permanent error state. Patterns must
|
|
||||||
# therefore only match conditions where the api-wrapper genuinely
|
|
||||||
# cannot serve any request — supervisord restarts on uvicorn exit, so
|
|
||||||
# a real failure self-heals rather than dragging the worker down.
|
|
||||||
#
|
|
||||||
# Notably *not* matched here:
|
|
||||||
# - per-request errors (PreprocessWorker failures, ComfyUI workflow
|
|
||||||
# validation, "Value not in list:") — one malformed client payload
|
|
||||||
# would otherwise kill the worker
|
|
||||||
# - "CUDA out of memory" — surfaces both as a misconfigured GPU
|
|
||||||
# (which the benchmark-failure path already catches via
|
|
||||||
# backend_errored) and as a too-greedy client request, which is
|
|
||||||
# indistinguishable from a substring match
|
|
||||||
# - convert-workflows.sh warnings — that script is not load-bearing
|
|
||||||
# for serving
|
|
||||||
MODEL_ERROR_LOG_MSGS = [
|
MODEL_ERROR_LOG_MSGS = [
|
||||||
"BACKENDS_READY_TIMEOUT", # backends never reachable
|
"MetadataIncompleteBuffer",
|
||||||
"BACKEND_UNRECOVERABLE", # CUDA fault latched per backend
|
"Value not in list: ",
|
||||||
"Application startup failed", # uvicorn ASGI lifespan startup failed
|
"[ERROR] Provisioning Script failed"
|
||||||
]
|
]
|
||||||
|
|
||||||
# LogAction.Info is purely informational (echoes log lines into the vast
|
MODEL_INFO_LOG_MSGS = [
|
||||||
# console). Nothing in api-wrapper.log is currently worth surfacing —
|
'"message":"Downloading'
|
||||||
# model downloads are upstream in provisioning, per-request logs are
|
]
|
||||||
# too noisy.
|
|
||||||
MODEL_INFO_LOG_MSGS = []
|
|
||||||
|
|
||||||
# Benchmark assets shipped alongside this worker. Resolved relative to this
|
benchmark_prompts = [
|
||||||
# file so the worker keeps working regardless of the launch cwd.
|
"Cartoon hoodie hero; orc, anime cat, bunny; black goo; buff; vector on white.",
|
||||||
MISC_DIR = Path(__file__).parent / "misc"
|
"Cozy farming-game scene with fine details.",
|
||||||
BENCHMARK_FILE = MISC_DIR / "benchmark.json"
|
"2D vector child with soccer ball; airbrush chrome; swagger; antique copper.",
|
||||||
TEST_PROMPTS = MISC_DIR / "test_prompts.txt"
|
"Realistic futuristic downtown of low buildings at sunset.",
|
||||||
|
"Perfect wave front view; sunny seascape; ultra-detailed water; artful feel.",
|
||||||
# Well-known location maintained by the vast.ai ComfyUI base image.
|
"Clear cup with ice, fruit, mint; creamy swirls; fluid-sim CGI; warm glow.",
|
||||||
# convert-workflows.sh symlinks this to the first provisioned workflow,
|
"Male biker with backpack on motorcycle; oilpunk; award-worthy magazine cover.",
|
||||||
# letting the base image work out-of-the-box without any env var.
|
"Collage for textile; surreal cartoon cat in cap/jeans before poster; crisp.",
|
||||||
WELLKNOWN_BENCHMARK = Path("/opt/comfyui-api-wrapper/workflows/pyworker_benchmark.json")
|
"Medieval village inside glass sphere; volumetric light; macro focus.",
|
||||||
|
"Iron Man with glowing axe; mecha sci-fi; jungle scene; dynamic light.",
|
||||||
log = logging.getLogger(__name__)
|
"Pope Francis DJ in leather jacket, mixing on giant console; dramatic.",
|
||||||
|
]
|
||||||
# Used when test_prompts.txt is unreadable or empty. Bare and generic
|
|
||||||
# on purpose — this is a benchmark seed, not a creative output.
|
|
||||||
_FALLBACK_PROMPT = "a still life on a wooden table, soft daylight"
|
|
||||||
|
|
||||||
|
|
||||||
def _env_int(name: str, default: int) -> int:
|
|
||||||
"""Read an integer env var, warning + falling back on bad values."""
|
|
||||||
raw = os.getenv(name)
|
|
||||||
if raw is None or raw == "":
|
|
||||||
return default
|
|
||||||
try:
|
|
||||||
return int(raw)
|
|
||||||
except ValueError:
|
|
||||||
log.warning("ignoring %s=%r (not an int); using default %d", name, raw, default)
|
|
||||||
return default
|
|
||||||
|
|
||||||
|
benchmark_dataset = [
|
||||||
def _try_load_workflow(path: Path) -> dict | None:
|
{
|
||||||
"""Load and return a benchmark workflow from ``path``.
|
|
||||||
|
|
||||||
Returns None on any failure (path missing, not a regular file,
|
|
||||||
unreadable, invalid JSON) so the caller can fall through to the
|
|
||||||
next tier rather than dropping straight to the SD1.5 default.
|
|
||||||
"""
|
|
||||||
if not path.is_file():
|
|
||||||
return None
|
|
||||||
try:
|
|
||||||
with open(path) as f:
|
|
||||||
return json.load(f)
|
|
||||||
except (json.JSONDecodeError, OSError) as e:
|
|
||||||
log.warning("Failed to load %s: %s; trying next tier", path, e)
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def _custom_workflow_payload() -> dict | None:
|
|
||||||
"""Try each benchmark workflow tier in order; return the first one
|
|
||||||
that loads cleanly as a payload, or None if every tier is absent /
|
|
||||||
unreadable. Tiers (in order): in-tree ``misc/benchmark.json``,
|
|
||||||
``$BENCHMARK_JSON_PATH``, well-known base-image symlink.
|
|
||||||
"""
|
|
||||||
env_path = os.getenv("BENCHMARK_JSON_PATH")
|
|
||||||
candidates = [("misc", BENCHMARK_FILE)]
|
|
||||||
if env_path:
|
|
||||||
candidates.append(("env", Path(env_path)))
|
|
||||||
candidates.append(("well-known", WELLKNOWN_BENCHMARK))
|
|
||||||
|
|
||||||
for label, path in candidates:
|
|
||||||
# Surface a warning specifically when the operator pointed
|
|
||||||
# BENCHMARK_JSON_PATH at something we can't use — silent
|
|
||||||
# fall-through there is a footgun (typo => SD1.5 fallback,
|
|
||||||
# operator wonders why custom benchmark didn't take).
|
|
||||||
if not path.is_file():
|
|
||||||
if label == "env":
|
|
||||||
log.warning(
|
|
||||||
"BENCHMARK_JSON_PATH=%s is not a readable file; trying fallbacks", path
|
|
||||||
)
|
|
||||||
continue
|
|
||||||
workflow = _try_load_workflow(path)
|
|
||||||
if workflow is None:
|
|
||||||
continue
|
|
||||||
log.info("Using custom benchmark workflow from %s (%s)", path, label)
|
|
||||||
return {
|
|
||||||
"input": {
|
|
||||||
"request_id": f"test-{random.randint(1000, 99999)}",
|
|
||||||
"workflow_json": workflow,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def _load_prompts() -> list[str]:
|
|
||||||
"""Read misc/test_prompts.txt; defensive against missing/empty file."""
|
|
||||||
try:
|
|
||||||
with open(TEST_PROMPTS) as f:
|
|
||||||
prompts = [line.strip() for line in f if line.strip()]
|
|
||||||
except OSError as e:
|
|
||||||
log.warning("could not read %s: %s; using built-in fallback prompt", TEST_PROMPTS, e)
|
|
||||||
return [_FALLBACK_PROMPT]
|
|
||||||
if not prompts:
|
|
||||||
log.warning("%s is empty; using built-in fallback prompt", TEST_PROMPTS)
|
|
||||||
return [_FALLBACK_PROMPT]
|
|
||||||
return prompts
|
|
||||||
|
|
||||||
|
|
||||||
def _default_payload() -> dict:
|
|
||||||
"""Build the SD1.5 Text2Image fallback payload."""
|
|
||||||
prompts = _load_prompts()
|
|
||||||
return {
|
|
||||||
"input": {
|
"input": {
|
||||||
"request_id": f"test-{random.randint(1000, 99999)}",
|
"request_id": f"test-{random.randint(1000, 99999)}",
|
||||||
"modifier": "Text2Image",
|
"modifier": "Text2Image",
|
||||||
"modifications": {
|
"modifications": {
|
||||||
"prompt": random.choice(prompts),
|
"prompt": prompt,
|
||||||
"width": _env_int("BENCHMARK_TEST_WIDTH", 512),
|
"width": 512,
|
||||||
"height": _env_int("BENCHMARK_TEST_HEIGHT", 512),
|
"height": 512,
|
||||||
"steps": _env_int("BENCHMARK_TEST_STEPS", 20),
|
"steps": 20,
|
||||||
"seed": random.randint(0, sys.maxsize),
|
"seed": random.randint(0, sys.maxsize)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
} for prompt in benchmark_prompts
|
||||||
|
]
|
||||||
|
|
||||||
def make_benchmark_payload() -> dict:
|
|
||||||
"""Build one benchmark request payload.
|
|
||||||
|
|
||||||
Called once per benchmark run by the SDK; using a generator (rather
|
|
||||||
than a static ``dataset=``) lets each run re-pick a prompt and re-roll
|
|
||||||
the seed, and avoids holding multiple copies of a large workflow JSON
|
|
||||||
in memory.
|
|
||||||
"""
|
|
||||||
return _custom_workflow_payload() or _default_payload()
|
|
||||||
|
|
||||||
|
|
||||||
worker_config = WorkerConfig(
|
worker_config = WorkerConfig(
|
||||||
model_server_url=MODEL_SERVER_URL,
|
model_server_url=MODEL_SERVER_URL,
|
||||||
@@ -232,7 +67,7 @@ worker_config = WorkerConfig(
|
|||||||
allow_parallel_requests=False,
|
allow_parallel_requests=False,
|
||||||
max_queue_time=10.0,
|
max_queue_time=10.0,
|
||||||
benchmark_config=BenchmarkConfig(
|
benchmark_config=BenchmarkConfig(
|
||||||
generator=make_benchmark_payload,
|
dataset=benchmark_dataset,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
],
|
],
|
||||||
@@ -243,4 +78,4 @@ worker_config = WorkerConfig(
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
Worker(worker_config).run()
|
Worker(worker_config).run()
|
||||||
+22
-29
@@ -8,13 +8,14 @@ This is the base PyWorker for OpenAI compatible inference servers. See the [Ser
|
|||||||
|
|
||||||
This worker is compatible with any backend API that properly implements the `/v1/completions` and `/v1/chat/completions` endpoints. We currently have three templates you can choose from but you can also create your own without having to modify the PyWorker.
|
This worker is compatible with any backend API that properly implements the `/v1/completions` and `/v1/chat/completions` endpoints. We currently have three templates you can choose from but you can also create your own without having to modify the PyWorker.
|
||||||
|
|
||||||
- [vLLM](https://cloud.vast.ai/?ref_id=62897&creator_id=62897&name=vLLM%20(Serverless)) (recommended)
|
- [vLLM](https://cloud.vast.ai/?ref_id=62897&creator_id=62897&name=vLLM%20%2B%20Qwen%2FQwen3-8B%20(Serverless)) (recommended)
|
||||||
- [Ollama](https://cloud.vast.ai/?ref_id=62897&creator_id=62897&name=Ollama%20%2B%20Qwen3%3A32b%20(Serverless))
|
- [Ollama](https://cloud.vast.ai/?ref_id=62897&creator_id=62897&name=Ollama%20%2B%20Qwen3%3A32b%20(Serverless))
|
||||||
|
- [HuggingFace TGI](https://cloud.vast.ai/?ref_id=62897&creator_id=62897&name=TGI%20%2B%20Qwen3-8B%20(Serverless))
|
||||||
|
|
||||||
|
|
||||||
All of these templates can be configured via the template interface. You may want to change the model or startup arguments, depending on the template you selected.
|
All of these templates can be configured via the template interface. You may want to change the model or startup arguments, depending on the template you selected.
|
||||||
|
|
||||||
2. Follow the [getting started guide](https://docs.vast.ai/documentation/serverless/quickstart) for help with configuring your serverless setup. For testing, we recommend that you use the default options presented by the web interface.
|
2. Follow the [getting started guide](https://docs.vast.ai/serverless/getting-started) for help with configuring your serverless setup. For testing, we recommend that you use the default options presented by the web interface.
|
||||||
|
|
||||||
## Client Setup (Demo)
|
## Client Setup (Demo)
|
||||||
|
|
||||||
@@ -33,30 +34,12 @@ uv pip install -r requirements.txt
|
|||||||
|
|
||||||
Several examples have been provided in the client to help you get started with your own implementation.
|
Several examples have been provided in the client to help you get started with your own implementation.
|
||||||
|
|
||||||
First, set your API key as an environment variable:
|
### Completions
|
||||||
|
|
||||||
|
Call to `/v1/completions` with json response
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
export VAST_API_KEY=<your_api_key>
|
python -m workers.openai.client -k <API_KEY> -e <ENDPOINT_NAME> --completion --model <MODEL_NAME>
|
||||||
```
|
|
||||||
|
|
||||||
The `--model` and `--endpoint` flags are optional. If not provided, they default to `Qwen/Qwen3-8B` and `my-vllm-endpoint` respectively.
|
|
||||||
|
|
||||||
### Chat Completion (streaming)
|
|
||||||
|
|
||||||
Call to `/v1/chat/completions` with streaming response
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python -m workers.openai.client --chat-stream --endpoint <ENDPOINT_NAME> --model <MODEL_NAME>
|
|
||||||
```
|
|
||||||
|
|
||||||
### Interactive Chat (streaming)
|
|
||||||
|
|
||||||
Interactive session with calls to `/v1/chat/completions`.
|
|
||||||
|
|
||||||
Type `clear` to clear the chat history or `quit` to exit.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python -m workers.openai.client --interactive --endpoint <ENDPOINT_NAME> --model <MODEL_NAME>
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Chat Completion (json)
|
### Chat Completion (json)
|
||||||
@@ -64,7 +47,15 @@ python -m workers.openai.client --interactive --endpoint <ENDPOINT_NAME> --model
|
|||||||
Call to `/v1/chat/completions` with json response
|
Call to `/v1/chat/completions` with json response
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python -m workers.openai.client --chat --endpoint <ENDPOINT_NAME> --model <MODEL_NAME>
|
python -m workers.openai.client -k <API_KEY> -e <ENDPOINT_NAME> --chat --model <MODEL_NAME>
|
||||||
|
```
|
||||||
|
|
||||||
|
### Chat Completion (streaming)
|
||||||
|
|
||||||
|
Call to `/v1/chat/completions` with streaming response
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python -m workers.openai.client -k <API_KEY> -e <ENDPOINT_NAME> --chat-stream --model <MODEL_NAME>
|
||||||
```
|
```
|
||||||
|
|
||||||
### Tool Use (json)
|
### Tool Use (json)
|
||||||
@@ -74,14 +65,16 @@ Call to `/v1/chat/completions` with tool and json response.
|
|||||||
This test defines a simple tool which will list the contents of the local pyworker directory. The output is then analysed by the model.
|
This test defines a simple tool which will list the contents of the local pyworker directory. The output is then analysed by the model.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python -m workers.openai.client --tools --endpoint <ENDPOINT_NAME> --model <MODEL_NAME>
|
python -m workers.openai.client -k <API_KEY> -e <ENDPOINT_NAME> --tools --model <MODEL_NAME>
|
||||||
```
|
```
|
||||||
|
|
||||||
### Completions
|
### Interactive Chat (streaming)
|
||||||
|
|
||||||
Call to `/v1/completions` with json response
|
Interactive session with calls to `/v1/chat/completions`.
|
||||||
|
|
||||||
|
Type `clear` to clear the chat history or `quit` to exit.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python -m workers.openai.client --completion --endpoint <ENDPOINT_NAME> --model <MODEL_NAME>
|
python -m workers.openai.client -k <API_KEY> -e <ENDPOINT_NAME> --interactive --model <MODEL_NAME>
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
+16
-32
@@ -18,7 +18,7 @@ logging.basicConfig(
|
|||||||
log = logging.getLogger(__file__)
|
log = logging.getLogger(__file__)
|
||||||
|
|
||||||
# ---------------------- Prompts ----------------------
|
# ---------------------- Prompts ----------------------
|
||||||
COMPLETIONS_PROMPT = "Zebras are primarily grazers and can subsist on lower-quality vegetation. They are preyed on mainly by"
|
COMPLETIONS_PROMPT = "the capital of USA is"
|
||||||
CHAT_PROMPT = "Think step by step: Tell me about the Python programming language."
|
CHAT_PROMPT = "Think step by step: Tell me about the Python programming language."
|
||||||
TOOLS_PROMPT = (
|
TOOLS_PROMPT = (
|
||||||
"Can you list the files in the current working directory and tell me what you see? "
|
"Can you list the files in the current working directory and tell me what you see? "
|
||||||
@@ -97,9 +97,9 @@ def _tool_state_to_message_tool_calls(state: Dict[int, Dict[str, Any]]) -> List[
|
|||||||
|
|
||||||
|
|
||||||
# ---- OpenAI-compatible calls (non-streaming) ----
|
# ---- OpenAI-compatible calls (non-streaming) ----
|
||||||
async def call_completions(client: Serverless, *, model: str, prompt: str, endpoint_name: str, **kwargs) -> Dict[str, Any]:
|
async def call_completions(client: Serverless, *, model: str, prompt: str, **kwargs) -> Dict[str, Any]:
|
||||||
|
|
||||||
endpoint = await client.get_endpoint(name=endpoint_name)
|
endpoint = await client.get_endpoint(name=ENDPOINT_NAME)
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"model": model,
|
"model": model,
|
||||||
@@ -111,9 +111,9 @@ async def call_completions(client: Serverless, *, model: str, prompt: str, endpo
|
|||||||
resp = await endpoint.request("/v1/completions", payload, cost=payload["max_tokens"])
|
resp = await endpoint.request("/v1/completions", payload, cost=payload["max_tokens"])
|
||||||
return resp["response"]
|
return resp["response"]
|
||||||
|
|
||||||
async def call_chat_completions(client: Serverless, *, model: str, messages: List[Dict[str, Any]], endpoint_name: str, **kwargs) -> Dict[str, Any]:
|
async def call_chat_completions(client: Serverless, *, model: str, messages: List[Dict[str, Any]], **kwargs) -> Dict[str, Any]:
|
||||||
|
|
||||||
endpoint = await client.get_endpoint(name=endpoint_name)
|
endpoint = await client.get_endpoint(name=ENDPOINT_NAME)
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"model": model,
|
"model": model,
|
||||||
@@ -128,9 +128,9 @@ async def call_chat_completions(client: Serverless, *, model: str, messages: Lis
|
|||||||
return resp["response"]
|
return resp["response"]
|
||||||
|
|
||||||
# ---- Streaming variants ----
|
# ---- Streaming variants ----
|
||||||
async def stream_completions(client: Serverless, *, model: str, prompt: str, endpoint_name: str, **kwargs):
|
async def stream_completions(client: Serverless, *, model: str, prompt: str, **kwargs):
|
||||||
|
|
||||||
endpoint = await client.get_endpoint(name=endpoint_name)
|
endpoint = await client.get_endpoint(name=ENDPOINT_NAME)
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"model": model,
|
"model": model,
|
||||||
@@ -144,9 +144,9 @@ async def stream_completions(client: Serverless, *, model: str, prompt: str, end
|
|||||||
resp = await endpoint.request("/v1/completions", payload, cost=payload["max_tokens"], stream=True)
|
resp = await endpoint.request("/v1/completions", payload, cost=payload["max_tokens"], stream=True)
|
||||||
return resp["response"] # async generator
|
return resp["response"] # async generator
|
||||||
|
|
||||||
async def stream_chat_completions(client: Serverless, *, model: str, messages: List[Dict[str, Any]], endpoint_name: str, **kwargs):
|
async def stream_chat_completions(client: Serverless, *, model: str, messages: List[Dict[str, Any]], **kwargs):
|
||||||
|
|
||||||
endpoint = await client.get_endpoint(name=endpoint_name)
|
endpoint = await client.get_endpoint(name=ENDPOINT_NAME)
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"model": model,
|
"model": model,
|
||||||
@@ -166,10 +166,9 @@ async def stream_chat_completions(client: Serverless, *, model: str, messages: L
|
|||||||
class APIDemo:
|
class APIDemo:
|
||||||
"""Demo and testing functionality for the API client"""
|
"""Demo and testing functionality for the API client"""
|
||||||
|
|
||||||
def __init__(self, client: Serverless, model: str, endpoint_name: str, tool_manager: Optional[ToolManager] = None):
|
def __init__(self, client: Serverless, model: str, tool_manager: Optional[ToolManager] = None):
|
||||||
self.client = client
|
self.client = client
|
||||||
self.model = model
|
self.model = model
|
||||||
self.endpoint_name = endpoint_name
|
|
||||||
self.tool_manager = tool_manager or ToolManager()
|
self.tool_manager = tool_manager or ToolManager()
|
||||||
|
|
||||||
# ----- Streaming handler -----
|
# ----- Streaming handler -----
|
||||||
@@ -178,15 +177,10 @@ class APIDemo:
|
|||||||
reasoning_content = ""
|
reasoning_content = ""
|
||||||
printed_reasoning = False
|
printed_reasoning = False
|
||||||
printed_answer = False
|
printed_answer = False
|
||||||
finish_reason = None
|
|
||||||
|
|
||||||
async for chunk in stream:
|
async for chunk in stream:
|
||||||
choice = (chunk.get("choices") or [{}])[0]
|
choice = (chunk.get("choices") or [{}])[0]
|
||||||
delta = choice.get("delta", {})
|
delta = choice.get("delta", {})
|
||||||
|
|
||||||
# Track finish reason
|
|
||||||
if choice.get("finish_reason"):
|
|
||||||
finish_reason = choice.get("finish_reason")
|
|
||||||
|
|
||||||
# reasoning tokens
|
# reasoning tokens
|
||||||
rc = delta.get("reasoning_content")
|
rc = delta.get("reasoning_content")
|
||||||
@@ -217,8 +211,6 @@ class APIDemo:
|
|||||||
print(f"Reasoning tokens: {len(reasoning_content.split())}")
|
print(f"Reasoning tokens: {len(reasoning_content.split())}")
|
||||||
if printed_answer:
|
if printed_answer:
|
||||||
print(f"Response tokens: {len(full_response.split())}")
|
print(f"Response tokens: {len(full_response.split())}")
|
||||||
if finish_reason:
|
|
||||||
print(f"Finish reason: {finish_reason}")
|
|
||||||
|
|
||||||
return full_response
|
return full_response
|
||||||
|
|
||||||
@@ -231,7 +223,6 @@ class APIDemo:
|
|||||||
client=self.client,
|
client=self.client,
|
||||||
model=self.model,
|
model=self.model,
|
||||||
prompt=COMPLETIONS_PROMPT,
|
prompt=COMPLETIONS_PROMPT,
|
||||||
endpoint_name=self.endpoint_name,
|
|
||||||
max_tokens=MAX_TOKENS,
|
max_tokens=MAX_TOKENS,
|
||||||
temperature=DEFAULT_TEMPERATURE,
|
temperature=DEFAULT_TEMPERATURE,
|
||||||
)
|
)
|
||||||
@@ -250,7 +241,6 @@ class APIDemo:
|
|||||||
client=self.client,
|
client=self.client,
|
||||||
model=self.model,
|
model=self.model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
endpoint_name=self.endpoint_name,
|
|
||||||
max_tokens=MAX_TOKENS,
|
max_tokens=MAX_TOKENS,
|
||||||
temperature=DEFAULT_TEMPERATURE
|
temperature=DEFAULT_TEMPERATURE
|
||||||
)
|
)
|
||||||
@@ -263,7 +253,6 @@ class APIDemo:
|
|||||||
client=self.client,
|
client=self.client,
|
||||||
model=self.model,
|
model=self.model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
endpoint_name=self.endpoint_name,
|
|
||||||
max_tokens=MAX_TOKENS,
|
max_tokens=MAX_TOKENS,
|
||||||
temperature=DEFAULT_TEMPERATURE
|
temperature=DEFAULT_TEMPERATURE
|
||||||
)
|
)
|
||||||
@@ -290,7 +279,6 @@ class APIDemo:
|
|||||||
client=self.client,
|
client=self.client,
|
||||||
model=self.model,
|
model=self.model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
endpoint_name=self.endpoint_name,
|
|
||||||
tools=minimal_tool,
|
tools=minimal_tool,
|
||||||
tool_choice="none",
|
tool_choice="none",
|
||||||
max_tokens=10
|
max_tokens=10
|
||||||
@@ -316,7 +304,6 @@ class APIDemo:
|
|||||||
client=self.client,
|
client=self.client,
|
||||||
model=self.model,
|
model=self.model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
endpoint_name=self.endpoint_name,
|
|
||||||
tools=self.tool_manager.get_ls_tool_definition(),
|
tools=self.tool_manager.get_ls_tool_definition(),
|
||||||
tool_choice="auto",
|
tool_choice="auto",
|
||||||
max_tokens=MAX_TOKENS,
|
max_tokens=MAX_TOKENS,
|
||||||
@@ -394,7 +381,6 @@ class APIDemo:
|
|||||||
client=self.client,
|
client=self.client,
|
||||||
model=self.model,
|
model=self.model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
endpoint_name=self.endpoint_name,
|
|
||||||
max_tokens=MAX_TOKENS,
|
max_tokens=MAX_TOKENS,
|
||||||
temperature=DEFAULT_TEMPERATURE,
|
temperature=DEFAULT_TEMPERATURE,
|
||||||
)
|
)
|
||||||
@@ -433,6 +419,7 @@ class APIDemo:
|
|||||||
print("=" * 60)
|
print("=" * 60)
|
||||||
print("INTERACTIVE STREAMING CHAT")
|
print("INTERACTIVE STREAMING CHAT")
|
||||||
print("=" * 60)
|
print("=" * 60)
|
||||||
|
print(f"Using model: {self.model}")
|
||||||
print("Type 'quit' to exit, 'clear' to clear history")
|
print("Type 'quit' to exit, 'clear' to clear history")
|
||||||
print()
|
print()
|
||||||
|
|
||||||
@@ -458,8 +445,7 @@ class APIDemo:
|
|||||||
stream = await stream_chat_completions(
|
stream = await stream_chat_completions(
|
||||||
client=self.client,
|
client=self.client,
|
||||||
model=self.model,
|
model=self.model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
endpoint_name=self.endpoint_name,
|
|
||||||
max_tokens=MAX_TOKENS,
|
max_tokens=MAX_TOKENS,
|
||||||
temperature=0.7
|
temperature=0.7
|
||||||
)
|
)
|
||||||
@@ -479,8 +465,8 @@ class APIDemo:
|
|||||||
# ---------------------- CLI ----------------------
|
# ---------------------- CLI ----------------------
|
||||||
def build_arg_parser() -> argparse.ArgumentParser:
|
def build_arg_parser() -> argparse.ArgumentParser:
|
||||||
p = argparse.ArgumentParser(description="Vast vLLM Demo (Serverless SDK)")
|
p = argparse.ArgumentParser(description="Vast vLLM Demo (Serverless SDK)")
|
||||||
p.add_argument("--model", default=DEFAULT_MODEL, help=f"Model to use for requests (default: {DEFAULT_MODEL})")
|
p.add_argument("--model", required=True, help="Model to use for requests (required)")
|
||||||
p.add_argument("--endpoint", default=ENDPOINT_NAME, help=f"Vast endpoint name (default: {ENDPOINT_NAME})")
|
p.add_argument("--endpoint", default="my-vllm-endpoint", help="Vast endpoint name (default: my-vllm-endpoint)")
|
||||||
|
|
||||||
modes = p.add_mutually_exclusive_group(required=False)
|
modes = p.add_mutually_exclusive_group(required=False)
|
||||||
modes.add_argument("--completion", action="store_true", help="Test completions endpoint")
|
modes.add_argument("--completion", action="store_true", help="Test completions endpoint")
|
||||||
@@ -508,14 +494,12 @@ async def main_async():
|
|||||||
print("Please specify exactly one test mode")
|
print("Please specify exactly one test mode")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
print("=" * 60)
|
|
||||||
print(f"Using model: {args.model}")
|
print(f"Using model: {args.model}")
|
||||||
print(f"Using endpoint: {args.endpoint}")
|
print("=" * 60)
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
async with Serverless() as client:
|
async with Serverless() as client:
|
||||||
demo = APIDemo(client, args.model, args.endpoint, ToolManager())
|
demo = APIDemo(client, args.model, ToolManager())
|
||||||
|
|
||||||
if args.completion:
|
if args.completion:
|
||||||
await demo.demo_completions()
|
await demo.demo_completions()
|
||||||
|
|||||||
@@ -28,12 +28,6 @@ MODEL_INFO_LOG_MSGS = [
|
|||||||
nltk.download("words")
|
nltk.download("words")
|
||||||
WORD_LIST = nltk.corpus.words.words()
|
WORD_LIST = nltk.corpus.words.words()
|
||||||
|
|
||||||
def request_parser(request):
|
|
||||||
data = request
|
|
||||||
if request.get("input") is not None:
|
|
||||||
data = request.get("input")
|
|
||||||
return data
|
|
||||||
|
|
||||||
|
|
||||||
def completions_benchmark_generator() -> dict:
|
def completions_benchmark_generator() -> dict:
|
||||||
prompt = " ".join(random.choices(WORD_LIST, k=int(250)))
|
prompt = " ".join(random.choices(WORD_LIST, k=int(250)))
|
||||||
@@ -60,20 +54,18 @@ worker_config = WorkerConfig(
|
|||||||
route="/v1/completions",
|
route="/v1/completions",
|
||||||
workload_calculator= lambda data: data.get("max_tokens", 0),
|
workload_calculator= lambda data: data.get("max_tokens", 0),
|
||||||
allow_parallel_requests=True,
|
allow_parallel_requests=True,
|
||||||
request_parser=request_parser,
|
max_queue_time=60.0,
|
||||||
max_queue_time=600.0,
|
|
||||||
benchmark_config=BenchmarkConfig(
|
benchmark_config=BenchmarkConfig(
|
||||||
generator=completions_benchmark_generator,
|
generator=completions_benchmark_generator,
|
||||||
concurrency=10,
|
concurrency=100,
|
||||||
runs=3
|
runs=2
|
||||||
)
|
)
|
||||||
),
|
),
|
||||||
HandlerConfig(
|
HandlerConfig(
|
||||||
route="/v1/chat/completions",
|
route="/v1/chat/completions",
|
||||||
workload_calculator= lambda data: data.get("max_tokens", 0),
|
workload_calculator= lambda data: data.get("max_tokens", 0),
|
||||||
allow_parallel_requests=True,
|
allow_parallel_requests=True,
|
||||||
request_parser=request_parser,
|
max_queue_time=60.0,
|
||||||
max_queue_time=600.0,
|
|
||||||
)
|
)
|
||||||
],
|
],
|
||||||
log_action_config=LogActionConfig(
|
log_action_config=LogActionConfig(
|
||||||
|
|||||||
+9
-93
@@ -1,103 +1,19 @@
|
|||||||
# HuggingFace TGI PyWorker
|
This is the base PyWorker for TGI, designed to create PyWorkers that can utilize various LLMs. It offers two primary endpoints:
|
||||||
|
|
||||||
This is the base PyWorker for HuggingFace Text Generation Inference (TGI) servers. See the [Serverless documentation](https://docs.vast.ai/serverless) for guides and how-to's.
|
1. `generate`: Generates the LLM's response to a given prompt in a single request.
|
||||||
|
2. `generate_stream`: Streams the LLM's response token by token.
|
||||||
|
|
||||||
## Instance Setup
|
Both endpoints use the following API payload format:
|
||||||
|
|
||||||
1. Pick a template
|
|
||||||
|
|
||||||
This worker is compatible with any TGI backend. We have a template you can use or you can create your own.
|
|
||||||
|
|
||||||
- [HuggingFace TGI](https://cloud.vast.ai/?ref_id=62897&creator_id=62897&name=TGI%20(Serverless))
|
|
||||||
|
|
||||||
The template can be configured via the template interface. You may want to change the model or startup arguments.
|
|
||||||
|
|
||||||
2. Follow the [getting started guide](https://docs.vast.ai/documentation/serverless/quickstart) for help with configuring your serverless setup. For testing, we recommend that you use the default options presented by the web interface.
|
|
||||||
|
|
||||||
## Client Setup (Demo)
|
|
||||||
|
|
||||||
1. Clone the PyWorker repository to your local machine and install the necessary requirements for running the test client.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git clone https://github.com/vast-ai/pyworker
|
|
||||||
cd pyworker
|
|
||||||
pip install uv
|
|
||||||
uv venv -p 3.12
|
|
||||||
source .venv/bin/activate
|
|
||||||
uv pip install -r requirements.txt
|
|
||||||
```
|
|
||||||
|
|
||||||
## Using the Test Client
|
|
||||||
|
|
||||||
The test client demonstrates both streaming and non-streaming generation using TGI's native API.
|
|
||||||
|
|
||||||
First, set your API key as an environment variable:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
export VAST_API_KEY=<your_api_key>
|
|
||||||
```
|
|
||||||
|
|
||||||
The `--endpoint` flag is optional. If not provided, it defaults to `my-tgi-endpoint`.
|
|
||||||
|
|
||||||
### Generate (Streaming)
|
|
||||||
|
|
||||||
Call to `/generate_stream` with streaming response:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python -m workers.tgi.client --generate-stream --endpoint <ENDPOINT_NAME>
|
|
||||||
```
|
|
||||||
|
|
||||||
### Generate (Non-Streaming)
|
|
||||||
|
|
||||||
Call to `/generate` with json response:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python -m workers.tgi.client --generate --endpoint <ENDPOINT_NAME>
|
|
||||||
```
|
|
||||||
|
|
||||||
### Interactive Session (Streaming)
|
|
||||||
|
|
||||||
Interactive session with streaming responses. Type `quit` to exit.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python -m workers.tgi.client --interactive --endpoint <ENDPOINT_NAME>
|
|
||||||
```
|
|
||||||
|
|
||||||
## API Endpoints
|
|
||||||
|
|
||||||
TGI provides two primary endpoints:
|
|
||||||
|
|
||||||
### Generate (Non-Streaming)
|
|
||||||
|
|
||||||
`/generate` - Returns the complete response in a single request.
|
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"inputs": "Your prompt here",
|
"inputs": "PROMPT",
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"max_new_tokens": 1024,
|
"max_new_tokens": 250
|
||||||
"temperature": 0.7,
|
|
||||||
"return_full_text": false
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
### Generate Stream (Streaming)
|
Note that the max_new_tokens parameter, rather than the prompt size, impacts performance. For example, if an
|
||||||
|
instance is benchmarked to process 100 tokens per second, a request with max_new_tokens = 200 will take
|
||||||
`/generate_stream` - Streams the response token by token.
|
approximately 2 seconds to complete.
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"inputs": "Your prompt here",
|
|
||||||
"parameters": {
|
|
||||||
"max_new_tokens": 1024,
|
|
||||||
"temperature": 0.7,
|
|
||||||
"do_sample": true,
|
|
||||||
"return_full_text": false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## Performance Notes
|
|
||||||
|
|
||||||
The `max_new_tokens` parameter (not the prompt size) primarily impacts performance. For example, if an instance is benchmarked to process 100 tokens per second, a request with `max_new_tokens = 200` will take approximately 2 seconds to complete.
|
|
||||||
|
|||||||
+33
-194
@@ -1,222 +1,61 @@
|
|||||||
import logging
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import argparse
|
|
||||||
|
|
||||||
from vastai import Serverless
|
from vastai import Serverless
|
||||||
import asyncio
|
import asyncio
|
||||||
|
|
||||||
# ---------------------- Logging ----------------------
|
ENDPOINT_NAME = "my-tgi-endpoint" # Change this to match your endpoint name
|
||||||
logging.basicConfig(
|
|
||||||
level=logging.DEBUG,
|
|
||||||
format="%(asctime)s[%(levelname)-5s] %(message)s",
|
|
||||||
datefmt="%Y-%m-%d %H:%M:%S",
|
|
||||||
)
|
|
||||||
log = logging.getLogger(__file__)
|
|
||||||
|
|
||||||
# ---------------------- Defaults ----------------------
|
|
||||||
DEFAULT_PROMPT = "Think step by step: Tell me about the Python programming language."
|
|
||||||
|
|
||||||
ENDPOINT_NAME = "TGI-Prod2" # change this to your TGI endpoint name
|
|
||||||
MAX_TOKENS = 1024
|
MAX_TOKENS = 1024
|
||||||
DEFAULT_TEMPERATURE = 0.7
|
PROMPT = "Think step by step: Tell me about the Python programming language."
|
||||||
|
|
||||||
|
async def call_generate(client: Serverless) -> None:
|
||||||
# ---------------------- API Calls ----------------------
|
endpoint = await client.get_endpoint(name=ENDPOINT_NAME)
|
||||||
async def call_generate(client: Serverless, *, endpoint_name: str, prompt: str, **kwargs) -> dict:
|
|
||||||
"""Non-streaming generation via /generate endpoint"""
|
|
||||||
endpoint = await client.get_endpoint(name=endpoint_name)
|
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"inputs": prompt,
|
"inputs": PROMPT,
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"max_new_tokens": kwargs.get("max_tokens", MAX_TOKENS),
|
"max_new_tokens": MAX_TOKENS,
|
||||||
"temperature": kwargs.get("temperature", DEFAULT_TEMPERATURE),
|
"temperature": 0.7,
|
||||||
"return_full_text": False,
|
"return_full_text": False
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
log.debug("POST /generate %s", json.dumps(payload)[:500])
|
|
||||||
resp = await endpoint.request("/generate", payload, cost=payload["parameters"]["max_new_tokens"])
|
resp = await endpoint.request("/generate", payload, cost=MAX_TOKENS)
|
||||||
return resp["response"]
|
|
||||||
|
print(resp["response"]["generated_text"])
|
||||||
|
|
||||||
|
|
||||||
async def call_generate_stream(client: Serverless, *, endpoint_name: str, prompt: str, **kwargs):
|
async def call_generate_stream(client: Serverless) -> None:
|
||||||
"""Streaming generation via /generate_stream endpoint"""
|
endpoint = await client.get_endpoint(name=ENDPOINT_NAME)
|
||||||
endpoint = await client.get_endpoint(name=endpoint_name)
|
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"inputs": prompt,
|
"inputs": PROMPT,
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"max_new_tokens": kwargs.get("max_tokens", MAX_TOKENS),
|
"max_new_tokens": MAX_TOKENS,
|
||||||
"temperature": kwargs.get("temperature", DEFAULT_TEMPERATURE),
|
"temperature": 0.7,
|
||||||
"do_sample": True,
|
"do_sample": True,
|
||||||
"return_full_text": False,
|
"return_full_text": False,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
log.debug("STREAM /generate_stream %s", json.dumps(payload)[:500])
|
|
||||||
resp = await endpoint.request(
|
resp = await endpoint.request(
|
||||||
"/generate_stream",
|
"/generate_stream",
|
||||||
payload,
|
payload,
|
||||||
cost=payload["parameters"]["max_new_tokens"],
|
cost=MAX_TOKENS,
|
||||||
stream=True,
|
stream=True,
|
||||||
)
|
)
|
||||||
return resp["response"] # async generator
|
stream = resp["response"]
|
||||||
|
|
||||||
|
printed_answer = False
|
||||||
|
async for event in stream:
|
||||||
|
tok = (event.get("token") or {}).get("text")
|
||||||
|
if tok:
|
||||||
|
if not printed_answer:
|
||||||
|
printed_answer = True
|
||||||
|
print("Answer:\n", end="", flush=True)
|
||||||
|
print(tok, end="", flush=True)
|
||||||
|
|
||||||
# ---------------------- Demo Runner ----------------------
|
async def main():
|
||||||
class APIDemo:
|
async with Serverless() as client:
|
||||||
"""Demo and testing functionality for the TGI API client"""
|
await call_generate(client)
|
||||||
|
await call_generate_stream(client)
|
||||||
def __init__(self, client: Serverless, endpoint_name: str):
|
|
||||||
self.client = client
|
|
||||||
self.endpoint_name = endpoint_name
|
|
||||||
|
|
||||||
async def handle_streaming_response(self, stream) -> str:
|
|
||||||
"""Process streaming response and print tokens"""
|
|
||||||
full_response = ""
|
|
||||||
printed_answer = False
|
|
||||||
|
|
||||||
async for event in stream:
|
|
||||||
tok = (event.get("token") or {}).get("text")
|
|
||||||
if tok:
|
|
||||||
if not printed_answer:
|
|
||||||
printed_answer = True
|
|
||||||
print("\n💬 Response: ", end="", flush=True)
|
|
||||||
print(tok, end="", flush=True)
|
|
||||||
full_response += tok
|
|
||||||
|
|
||||||
print() # newline
|
|
||||||
if printed_answer:
|
|
||||||
print(f"\nStreaming completed. Response tokens: {len(full_response.split())}")
|
|
||||||
|
|
||||||
return full_response
|
|
||||||
|
|
||||||
async def demo_generate(self) -> None:
|
|
||||||
"""Demo non-streaming generation"""
|
|
||||||
print("=" * 60)
|
|
||||||
print("GENERATE DEMO (NON-STREAMING)")
|
|
||||||
print("=" * 60)
|
|
||||||
|
|
||||||
response = await call_generate(
|
|
||||||
client=self.client,
|
|
||||||
endpoint_name=self.endpoint_name,
|
|
||||||
prompt=DEFAULT_PROMPT,
|
|
||||||
max_tokens=MAX_TOKENS,
|
|
||||||
temperature=DEFAULT_TEMPERATURE,
|
|
||||||
)
|
|
||||||
|
|
||||||
print(f"\n💬 Response: {response.get('generated_text', '')}")
|
|
||||||
print(f"\nFull Response:\n{json.dumps(response, indent=2)}")
|
|
||||||
|
|
||||||
async def demo_generate_stream(self) -> None:
|
|
||||||
"""Demo streaming generation"""
|
|
||||||
print("=" * 60)
|
|
||||||
print("GENERATE DEMO (STREAMING)")
|
|
||||||
print("=" * 60)
|
|
||||||
|
|
||||||
stream = await call_generate_stream(
|
|
||||||
client=self.client,
|
|
||||||
endpoint_name=self.endpoint_name,
|
|
||||||
prompt=DEFAULT_PROMPT,
|
|
||||||
max_tokens=MAX_TOKENS,
|
|
||||||
temperature=DEFAULT_TEMPERATURE,
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
|
||||||
await self.handle_streaming_response(stream)
|
|
||||||
except Exception as e:
|
|
||||||
log.error("\nError during streaming: %s", e, exc_info=True)
|
|
||||||
|
|
||||||
async def interactive_chat(self) -> None:
|
|
||||||
"""Interactive session with streaming generation"""
|
|
||||||
print("=" * 60)
|
|
||||||
print("INTERACTIVE STREAMING SESSION")
|
|
||||||
print("=" * 60)
|
|
||||||
print(f"Using endpoint: {self.endpoint_name}")
|
|
||||||
print("Type 'quit' to exit")
|
|
||||||
print()
|
|
||||||
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
user_input = input("You: ").strip()
|
|
||||||
|
|
||||||
if user_input.lower() == "quit":
|
|
||||||
print("👋 Goodbye!")
|
|
||||||
break
|
|
||||||
elif not user_input:
|
|
||||||
continue
|
|
||||||
|
|
||||||
print("Assistant: ", end="", flush=True)
|
|
||||||
stream = await call_generate_stream(
|
|
||||||
client=self.client,
|
|
||||||
endpoint_name=self.endpoint_name,
|
|
||||||
prompt=user_input,
|
|
||||||
max_tokens=MAX_TOKENS,
|
|
||||||
temperature=DEFAULT_TEMPERATURE,
|
|
||||||
)
|
|
||||||
|
|
||||||
full_response = ""
|
|
||||||
async for event in stream:
|
|
||||||
tok = (event.get("token") or {}).get("text")
|
|
||||||
if tok:
|
|
||||||
print(tok, end="", flush=True)
|
|
||||||
full_response += tok
|
|
||||||
print() # newline
|
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
print("\n👋 Session interrupted. Goodbye!")
|
|
||||||
break
|
|
||||||
except Exception as e:
|
|
||||||
log.error("\nError: %s", e)
|
|
||||||
continue
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------- CLI ----------------------
|
|
||||||
def build_arg_parser() -> argparse.ArgumentParser:
|
|
||||||
p = argparse.ArgumentParser(description="Vast TGI Demo (Serverless SDK)")
|
|
||||||
p.add_argument("--endpoint", default=ENDPOINT_NAME, help=f"Vast endpoint name (default: {ENDPOINT_NAME})")
|
|
||||||
|
|
||||||
modes = p.add_mutually_exclusive_group(required=False)
|
|
||||||
modes.add_argument("--generate", action="store_true", help="Test generate endpoint (non-streaming)")
|
|
||||||
modes.add_argument("--generate-stream", action="store_true", help="Test generate endpoint with streaming")
|
|
||||||
modes.add_argument("--interactive", action="store_true", help="Start interactive streaming session")
|
|
||||||
return p
|
|
||||||
|
|
||||||
|
|
||||||
async def main_async():
|
|
||||||
args = build_arg_parser().parse_args()
|
|
||||||
|
|
||||||
selected = sum([args.generate, args.generate_stream, args.interactive])
|
|
||||||
if selected == 0:
|
|
||||||
print("Please specify exactly one test mode:")
|
|
||||||
print(" --generate : Test generate endpoint (non-streaming)")
|
|
||||||
print(" --generate-stream : Test generate endpoint with streaming")
|
|
||||||
print(" --interactive : Start interactive streaming session")
|
|
||||||
print(f"\nExample: python {os.path.basename(sys.argv[0])} --generate-stream --endpoint my-tgi-endpoint")
|
|
||||||
sys.exit(1)
|
|
||||||
elif selected > 1:
|
|
||||||
print("Please specify exactly one test mode")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
print("=" * 60)
|
|
||||||
print(f"Using endpoint: {args.endpoint}")
|
|
||||||
|
|
||||||
try:
|
|
||||||
async with Serverless() as client:
|
|
||||||
demo = APIDemo(client, args.endpoint)
|
|
||||||
|
|
||||||
if args.generate:
|
|
||||||
await demo.demo_generate()
|
|
||||||
elif args.generate_stream:
|
|
||||||
await demo.demo_generate_stream()
|
|
||||||
elif args.interactive:
|
|
||||||
await demo.interactive_chat()
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
log.error("Error during test: %s", e, exc_info=True)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
asyncio.run(main_async())
|
asyncio.run(main())
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ def benchmark_generator() -> dict:
|
|||||||
benchmark_data = {
|
benchmark_data = {
|
||||||
"inputs": prompt,
|
"inputs": prompt,
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"max_new_tokens": 500,
|
"max_new_tokens": 128,
|
||||||
"temperature": 0.7,
|
"temperature": 0.7,
|
||||||
"return_full_text": False
|
"return_full_text": False
|
||||||
}
|
}
|
||||||
@@ -52,18 +52,17 @@ worker_config = WorkerConfig(
|
|||||||
HandlerConfig(
|
HandlerConfig(
|
||||||
route="/generate",
|
route="/generate",
|
||||||
allow_parallel_requests=True,
|
allow_parallel_requests=True,
|
||||||
max_queue_time=600.0,
|
max_queue_time=60.0,
|
||||||
benchmark_config=BenchmarkConfig(
|
benchmark_config=BenchmarkConfig(
|
||||||
generator=benchmark_generator,
|
generator=benchmark_generator,
|
||||||
concurrency=10,
|
concurrency=50
|
||||||
runs=3
|
|
||||||
),
|
),
|
||||||
workload_calculator= lambda x: x["parameters"]["max_new_tokens"]
|
workload_calculator= lambda x: x["parameters"]["max_new_tokens"]
|
||||||
),
|
),
|
||||||
HandlerConfig(
|
HandlerConfig(
|
||||||
route="/generate_stream",
|
route="/generate_stream",
|
||||||
allow_parallel_requests=True,
|
allow_parallel_requests=True,
|
||||||
max_queue_time=600.0,
|
max_queue_time=60.0,
|
||||||
workload_calculator= lambda x: x["parameters"]["max_new_tokens"]
|
workload_calculator= lambda x: x["parameters"]["max_new_tokens"]
|
||||||
)
|
)
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
This is the PyWorker implementation for running **Wan 2.2 T2V A14B** text-to-video workflows in ComfyUI. It provides a unified interface for executing complete ComfyUI video-generation workflows through a proxy-based architecture and returning generated video assets.
|
This is the PyWorker implementation for running **Wan 2.2 T2V A14B** text-to-video workflows in ComfyUI. It provides a unified interface for executing complete ComfyUI video-generation workflows through a proxy-based architecture and returning generated video assets.
|
||||||
|
|
||||||
Each request has a static cost of `10000`. ComfyUI does not support concurrent workloads, and there is no provision to run multiple ComfyUI instances per worker node.
|
Each request has a static cost of `100`. ComfyUI does not support concurrent workloads, and there is no provision to run multiple ComfyUI instances per worker node.
|
||||||
|
|
||||||
## Requirements
|
## Requirements
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user