diff --git a/workers/comfyui-json/README.md b/workers/comfyui-json/README.md index c498c9b..ffde97c 100644 --- a/workers/comfyui-json/README.md +++ b/workers/comfyui-json/README.md @@ -112,6 +112,8 @@ You can provide a custom ComfyUI workflow for benchmarking. This allows you to t 2. **Write the file during provisioning** to a path *outside* the pyworker tree (e.g. `/workspace/benchmark.json`) and export `BENCHMARK_JSON_PATH` so the worker can find it. The pyworker repo is cloned by `start_server.sh` *after* provisioning runs, so provisioning cannot write into `misc/` directly — the destination would be clobbered, or the clone would fail. 3. **Run on the vast.ai ComfyUI base image.** Its `convert-workflows.sh` maintains `/opt/comfyui-api-wrapper/workflows/pyworker_benchmark.json` as a symlink to the first provisioned workflow; the worker reads this automatically when neither of the above is set. No env var required. + *Note on timing:* the symlink is created only after `convert-workflows.sh` finishes converting workflows, which races with the pyworker's own warm-up — both unblock when ComfyUI is ready, but conversion takes a few seconds longer. To avoid losing the race, the worker waits up to 30 seconds for the symlink before falling through. Override the timeout with `BENCHMARK_WAIT_TIMEOUT` if conversion is slow on your setup. The wait fires only on the well-known tier and only once per worker process; non-base-image deployments skip it (the parent directory `/opt/comfyui-api-wrapper/workflows/` doesn't exist, so there's nothing to wait for). + If `BENCHMARK_JSON_PATH` is set but points at a missing or unreadable file, the worker logs a warning and falls through to the next tier rather than going straight to the SD1.5 fallback. An example workflow is provided at `workers/comfyui-json/misc/benchmark.json.example`. To ensure varied generations, use the placeholder `__RANDOM_INT__` in place of static seed values — it will be replaced with a random integer for each benchmark run. @@ -125,6 +127,7 @@ The default benchmark uses Stable Diffusion v1.5 with ComfyUI's standard text-to | Environment Variable | Default Value | Description | | -------------------- | ------------- | ----------- | | BENCHMARK_JSON_PATH | (unset) | Path to a custom workflow file outside the pyworker tree. Used if `misc/benchmark.json` is absent. Falls through to `/opt/comfyui-api-wrapper/workflows/pyworker_benchmark.json` if set but missing. | +| BENCHMARK_WAIT_TIMEOUT | 30 | Seconds to wait for the well-known symlink before giving up (base image only). | | BENCHMARK_TEST_WIDTH | 512 | Fallback benchmark: image width (pixels) | | BENCHMARK_TEST_HEIGHT | 512 | Fallback benchmark: image height (pixels) | | BENCHMARK_TEST_STEPS | 20 | Fallback benchmark: number of denoising steps | diff --git a/workers/comfyui-json/worker.py b/workers/comfyui-json/worker.py index e6dcee3..5e2e2c1 100644 --- a/workers/comfyui-json/worker.py +++ b/workers/comfyui-json/worker.py @@ -29,6 +29,7 @@ import logging import os import random import sys +import time from pathlib import Path from vastai import Worker, WorkerConfig, HandlerConfig, LogActionConfig, BenchmarkConfig @@ -65,8 +66,43 @@ TEST_PROMPTS = MISC_DIR / "test_prompts.txt" # letting the base image work out-of-the-box without any env var. WELLKNOWN_BENCHMARK = Path("/opt/comfyui-api-wrapper/workflows/pyworker_benchmark.json") +# How long to wait for the well-known symlink to appear before giving up. +# convert-workflows.sh and the pyworker both unblock at "ComfyUI ready", +# but conversion takes a few seconds — without this wait the first +# benchmark loses the race and silently drops to the SD1.5 fallback. +# Tunable for slow setups (many workflows / slow disk). +_WELLKNOWN_WAIT_SECS = float(os.getenv("BENCHMARK_WAIT_TIMEOUT", "30")) + log = logging.getLogger(__name__) +_wait_done = False + + +def _wait_for_wellknown() -> None: + """Wait at most once per process for ``WELLKNOWN_BENCHMARK`` to appear. + + Skipped immediately if the parent directory doesn't exist (we're + not on the base image, so the symlink will never appear and there's + no point burning the timeout). Skipped on subsequent calls regardless + of outcome — if the file *does* show up later, ``_resolve_benchmark_path`` + will still pick it up via the regular ``.exists()`` check on the next + benchmark run. + """ + global _wait_done + if _wait_done: + return + _wait_done = True + if WELLKNOWN_BENCHMARK.exists() or not WELLKNOWN_BENCHMARK.parent.is_dir(): + return + deadline = time.monotonic() + _WELLKNOWN_WAIT_SECS + log.info("Waiting up to %.0fs for %s", _WELLKNOWN_WAIT_SECS, WELLKNOWN_BENCHMARK) + while time.monotonic() < deadline: + if WELLKNOWN_BENCHMARK.exists(): + log.info("Found %s after wait", WELLKNOWN_BENCHMARK) + return + time.sleep(0.5) + log.info("%s did not appear within %.0fs; falling through", WELLKNOWN_BENCHMARK, _WELLKNOWN_WAIT_SECS) + def _resolve_benchmark_path() -> Path | None: """Return the path to the custom benchmark workflow, or None if absent. @@ -84,6 +120,7 @@ def _resolve_benchmark_path() -> Path | None: if path.exists(): return path log.warning("BENCHMARK_JSON_PATH=%s does not exist; trying fallbacks", path) + _wait_for_wellknown() if WELLKNOWN_BENCHMARK.exists(): return WELLKNOWN_BENCHMARK return None