b52c654f09
Rather than tailing for "Uvicorn running on", which only confirms the
api-wrapper's own HTTP listener is bound, watch for the api-wrapper's
new structured tokens that reflect actual end-to-end reachability:
MODEL_LOAD_LOG_MSG = ["BACKENDS_READY"]
MODEL_ERROR_LOG_MSGS includes:
- "BACKENDS_READY_TIMEOUT" (backends never came up)
- "BACKEND_UNRECOVERABLE" (CUDA fault latched on a backend)
- "Application startup failed" (kept; uvicorn's own ASGI failure)
Closes the race observed on a live test where the pyworker fired
benchmark the moment uvicorn bound, every request inside the
api-wrapper hit Cannot-connect-to-host on ComfyUI, and the SDK
counted the resulting fast 502s as a fast worker (perf=200).
Tokens are emitted by ai-dock/comfyui-api-wrapper#11 and onward;
earlier wrapper versions won't emit BACKENDS_READY so warm-up stalls
indefinitely — pin to a wrapper that includes that change.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
247 lines
9.7 KiB
Python
247 lines
9.7 KiB
Python
"""ComfyUI worker for the vast.ai PyWorker SDK.
|
|
|
|
Each worker runs a benchmark on warm-up. The payload is selected as follows:
|
|
|
|
1. If ``misc/benchmark.json`` exists in the cloned worker tree, it is
|
|
used as a custom ComfyUI workflow. Use this if you fork the repo and
|
|
bake in your workflow.
|
|
2. Else, if ``$BENCHMARK_JSON_PATH`` is set and points at a readable
|
|
file, it is used. Use this from a provisioning script — provisioning
|
|
runs before pyworker is cloned, so it cannot write into ``misc/``,
|
|
but it can drop the workflow elsewhere (e.g. ``/workspace/``) and
|
|
export this env var.
|
|
3. Else, if the well-known path
|
|
``/opt/comfyui-api-wrapper/workflows/pyworker_benchmark.json`` exists,
|
|
it is used. The vast.ai ComfyUI base image's ``convert-workflows.sh``
|
|
maintains this as a symlink to the first provisioned workflow, so on
|
|
that image no env var is needed.
|
|
4. Otherwise an SD1.5 Text2Image fallback runs, parameterised by the
|
|
``BENCHMARK_TEST_{WIDTH,HEIGHT,STEPS}`` env vars and a random prompt
|
|
from ``misc/test_prompts.txt``.
|
|
|
|
``__RANDOM_INT__`` placeholders in custom workflows are substituted
|
|
server-side by ai-dock/comfyui-api-wrapper, so this worker does not handle
|
|
them itself.
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
import os
|
|
import random
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
from vastai import Worker, WorkerConfig, HandlerConfig, LogActionConfig, BenchmarkConfig
|
|
|
|
# ComfyUI model configuration. The model server is ai-dock's
|
|
# comfyui-api-wrapper sitting in front of ComfyUI itself, not ComfyUI's
|
|
# own port (18188). We tail the api-wrapper's log rather than ComfyUI's
|
|
# and key off the api-wrapper's own structured readiness/fault signals:
|
|
#
|
|
# BACKENDS_READY — api-wrapper has confirmed every ComfyUI
|
|
# backend passes HTTP+WS probes. Until
|
|
# this fires, posting to /generate/sync
|
|
# can hit "Cannot connect to host" inside
|
|
# the api-wrapper, which the SDK can't
|
|
# recover from since __call_backend
|
|
# doesn't retry connection-refused.
|
|
# BACKENDS_READY_TIMEOUT — backends never reachable within
|
|
# api-wrapper's deadline. Worker is
|
|
# unrecoverable; mark errored.
|
|
# BACKEND_UNRECOVERABLE — CUDA fault / illegal memory access on a
|
|
# backend's GPU. Same fate.
|
|
# Application startup failed — uvicorn's own ASGI lifespan failed.
|
|
#
|
|
# These tokens are emitted by ai-dock/comfyui-api-wrapper >= the
|
|
# "feat/backend-readiness-log-signals" change. Older wrappers won't
|
|
# emit BACKENDS_READY, so warm-up will stall — pin the wrapper version
|
|
# accordingly.
|
|
MODEL_SERVER_URL = 'http://127.0.0.1'
|
|
MODEL_SERVER_PORT = 18288
|
|
MODEL_LOG_FILE = '/var/log/portal/api-wrapper.log'
|
|
MODEL_HEALTHCHECK_ENDPOINT = "/health"
|
|
|
|
# Trigger benchmark only after the full stack (api-wrapper + ComfyUI
|
|
# backends) is reachable. See BACKENDS_READY in the comment above.
|
|
MODEL_LOAD_LOG_MSG = [
|
|
"BACKENDS_READY",
|
|
]
|
|
|
|
# LogAction.ModelError is fatal: the SDK calls backend_errored() and
|
|
# locks the worker into a permanent error state. Patterns must
|
|
# therefore only match conditions where the api-wrapper genuinely
|
|
# cannot serve any request — supervisord restarts on uvicorn exit, so
|
|
# a real failure self-heals rather than dragging the worker down.
|
|
#
|
|
# Notably *not* matched here:
|
|
# - per-request errors (PreprocessWorker failures, ComfyUI workflow
|
|
# validation, "Value not in list:") — one malformed client payload
|
|
# would otherwise kill the worker
|
|
# - "CUDA out of memory" — surfaces both as a misconfigured GPU
|
|
# (which the benchmark-failure path already catches via
|
|
# backend_errored) and as a too-greedy client request, which is
|
|
# indistinguishable from a substring match
|
|
# - convert-workflows.sh warnings — that script is not load-bearing
|
|
# for serving
|
|
MODEL_ERROR_LOG_MSGS = [
|
|
"BACKENDS_READY_TIMEOUT", # backends never reachable
|
|
"BACKEND_UNRECOVERABLE", # CUDA fault latched per backend
|
|
"Application startup failed", # uvicorn ASGI lifespan startup failed
|
|
]
|
|
|
|
# LogAction.Info is purely informational (echoes log lines into the vast
|
|
# console). Nothing in api-wrapper.log is currently worth surfacing —
|
|
# model downloads are upstream in provisioning, per-request logs are
|
|
# too noisy.
|
|
MODEL_INFO_LOG_MSGS = []
|
|
|
|
# Benchmark assets shipped alongside this worker. Resolved relative to this
|
|
# file so the worker keeps working regardless of the launch cwd.
|
|
MISC_DIR = Path(__file__).parent / "misc"
|
|
BENCHMARK_FILE = MISC_DIR / "benchmark.json"
|
|
TEST_PROMPTS = MISC_DIR / "test_prompts.txt"
|
|
|
|
# Well-known location maintained by the vast.ai ComfyUI base image.
|
|
# convert-workflows.sh symlinks this to the first provisioned workflow,
|
|
# letting the base image work out-of-the-box without any env var.
|
|
WELLKNOWN_BENCHMARK = Path("/opt/comfyui-api-wrapper/workflows/pyworker_benchmark.json")
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
# Used when test_prompts.txt is unreadable or empty. Bare and generic
|
|
# on purpose — this is a benchmark seed, not a creative output.
|
|
_FALLBACK_PROMPT = "a still life on a wooden table, soft daylight"
|
|
|
|
|
|
def _env_int(name: str, default: int) -> int:
|
|
"""Read an integer env var, warning + falling back on bad values."""
|
|
raw = os.getenv(name)
|
|
if raw is None or raw == "":
|
|
return default
|
|
try:
|
|
return int(raw)
|
|
except ValueError:
|
|
log.warning("ignoring %s=%r (not an int); using default %d", name, raw, default)
|
|
return default
|
|
|
|
|
|
def _try_load_workflow(path: Path) -> dict | None:
|
|
"""Load and return a benchmark workflow from ``path``.
|
|
|
|
Returns None on any failure (path missing, not a regular file,
|
|
unreadable, invalid JSON) so the caller can fall through to the
|
|
next tier rather than dropping straight to the SD1.5 default.
|
|
"""
|
|
if not path.is_file():
|
|
return None
|
|
try:
|
|
with open(path) as f:
|
|
return json.load(f)
|
|
except (json.JSONDecodeError, OSError) as e:
|
|
log.warning("Failed to load %s: %s; trying next tier", path, e)
|
|
return None
|
|
|
|
|
|
def _custom_workflow_payload() -> dict | None:
|
|
"""Try each benchmark workflow tier in order; return the first one
|
|
that loads cleanly as a payload, or None if every tier is absent /
|
|
unreadable. Tiers (in order): in-tree ``misc/benchmark.json``,
|
|
``$BENCHMARK_JSON_PATH``, well-known base-image symlink.
|
|
"""
|
|
env_path = os.getenv("BENCHMARK_JSON_PATH")
|
|
candidates = [("misc", BENCHMARK_FILE)]
|
|
if env_path:
|
|
candidates.append(("env", Path(env_path)))
|
|
candidates.append(("well-known", WELLKNOWN_BENCHMARK))
|
|
|
|
for label, path in candidates:
|
|
# Surface a warning specifically when the operator pointed
|
|
# BENCHMARK_JSON_PATH at something we can't use — silent
|
|
# fall-through there is a footgun (typo => SD1.5 fallback,
|
|
# operator wonders why custom benchmark didn't take).
|
|
if not path.is_file():
|
|
if label == "env":
|
|
log.warning(
|
|
"BENCHMARK_JSON_PATH=%s is not a readable file; trying fallbacks", path
|
|
)
|
|
continue
|
|
workflow = _try_load_workflow(path)
|
|
if workflow is None:
|
|
continue
|
|
log.info("Using custom benchmark workflow from %s (%s)", path, label)
|
|
return {
|
|
"input": {
|
|
"request_id": f"test-{random.randint(1000, 99999)}",
|
|
"workflow_json": workflow,
|
|
}
|
|
}
|
|
return None
|
|
|
|
|
|
def _load_prompts() -> list[str]:
|
|
"""Read misc/test_prompts.txt; defensive against missing/empty file."""
|
|
try:
|
|
with open(TEST_PROMPTS) as f:
|
|
prompts = [line.strip() for line in f if line.strip()]
|
|
except OSError as e:
|
|
log.warning("could not read %s: %s; using built-in fallback prompt", TEST_PROMPTS, e)
|
|
return [_FALLBACK_PROMPT]
|
|
if not prompts:
|
|
log.warning("%s is empty; using built-in fallback prompt", TEST_PROMPTS)
|
|
return [_FALLBACK_PROMPT]
|
|
return prompts
|
|
|
|
|
|
def _default_payload() -> dict:
|
|
"""Build the SD1.5 Text2Image fallback payload."""
|
|
prompts = _load_prompts()
|
|
return {
|
|
"input": {
|
|
"request_id": f"test-{random.randint(1000, 99999)}",
|
|
"modifier": "Text2Image",
|
|
"modifications": {
|
|
"prompt": random.choice(prompts),
|
|
"width": _env_int("BENCHMARK_TEST_WIDTH", 512),
|
|
"height": _env_int("BENCHMARK_TEST_HEIGHT", 512),
|
|
"steps": _env_int("BENCHMARK_TEST_STEPS", 20),
|
|
"seed": random.randint(0, sys.maxsize),
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
def make_benchmark_payload() -> dict:
|
|
"""Build one benchmark request payload.
|
|
|
|
Called once per benchmark run by the SDK; using a generator (rather
|
|
than a static ``dataset=``) lets each run re-pick a prompt and re-roll
|
|
the seed, and avoids holding multiple copies of a large workflow JSON
|
|
in memory.
|
|
"""
|
|
return _custom_workflow_payload() or _default_payload()
|
|
|
|
|
|
worker_config = WorkerConfig(
|
|
model_server_url=MODEL_SERVER_URL,
|
|
model_server_port=MODEL_SERVER_PORT,
|
|
model_log_file=MODEL_LOG_FILE,
|
|
model_healthcheck_url=MODEL_HEALTHCHECK_ENDPOINT,
|
|
handlers=[
|
|
HandlerConfig(
|
|
route="/generate/sync",
|
|
allow_parallel_requests=False,
|
|
max_queue_time=10.0,
|
|
benchmark_config=BenchmarkConfig(
|
|
generator=make_benchmark_payload,
|
|
)
|
|
)
|
|
],
|
|
log_action_config=LogActionConfig(
|
|
on_load=MODEL_LOAD_LOG_MSG,
|
|
on_error=MODEL_ERROR_LOG_MSGS,
|
|
on_info=MODEL_INFO_LOG_MSGS
|
|
)
|
|
)
|
|
|
|
Worker(worker_config).run()
|