Standardize null pyworker load/perf on 150

Bump workload_calculator, benchmark cache value, and client cost from 100
to 150.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Rob Ballantyne
2026-05-11 18:17:57 +01:00
parent 4eef5e22af
commit 8df562e243
2 changed files with 8 additions and 10 deletions
+1 -1
View File
@@ -36,7 +36,7 @@ async def reserve(
start = time.monotonic() start = time.monotonic()
log.info("[%s] POST /reserve duration=%ss", label, duration) log.info("[%s] POST /reserve duration=%ss", label, duration)
try: try:
resp = await endpoint.request("/reserve", payload, cost=100) resp = await endpoint.request("/reserve", payload, cost=150)
elapsed = time.monotonic() - start elapsed = time.monotonic() - start
log.info("[%s] returned after %.1fs: %s", label, elapsed, resp.get("response")) log.info("[%s] returned after %.1fs: %s", label, elapsed, resp.get("response"))
return resp["response"] return resp["response"]
+7 -9
View File
@@ -97,9 +97,9 @@ async def null_lifecycle():
# vastai.serverless.server.lib.backend. # vastai.serverless.server.lib.backend.
try: try:
with open(".has_benchmark", "w") as fh: with open(".has_benchmark", "w") as fh:
fh.write("100") fh.write("150")
except OSError as e: except OSError as e:
log.warning(f"Could not pin benchmark cache to 100: {e}") log.warning(f"Could not pin benchmark cache to 150: {e}")
app = _build_internal_app() app = _build_internal_app()
runner = web.AppRunner(app) runner = web.AppRunner(app)
@@ -129,12 +129,10 @@ async def reserve_worker(**params: object) -> dict:
global _active_reservation global _active_reservation
if params.get(BENCHMARK_SENTINEL): if params.get(BENCHMARK_SENTINEL):
# The framework computes max_throughput = workload / time during the # Fallback path only — the lifecycle pre-populates .has_benchmark
# startup benchmark. A null worker has no real throughput concept, # with "150" so __run_benchmark normally short-circuits and never
# so we deliberately take ~1s with workload=100 to pin # invokes us. If the cache write failed, sleep ~1s so the
# max_throughput to ~100. Without this the near-instant benchmark # time-based calculation lands near 150 (workload=150 / time~=1s).
# would report hundreds of thousands of workload/sec, distorting
# any downstream capacity math.
await asyncio.sleep(1.0) await asyncio.sleep(1.0)
return {"ok": True, "benchmark": True} return {"ok": True, "benchmark": True}
@@ -186,7 +184,7 @@ worker_config = WorkerConfig(
# it to a free worker (or spins up a new one). # it to a free worker (or spins up a new one).
max_queue_time=0.0, max_queue_time=0.0,
remote_function=reserve_worker, remote_function=reserve_worker,
workload_calculator=lambda _payload: 100.0, workload_calculator=lambda _payload: 150.0,
benchmark_config=BenchmarkConfig( benchmark_config=BenchmarkConfig(
generator=lambda: {BENCHMARK_SENTINEL: True}, generator=lambda: {BENCHMARK_SENTINEL: True},
runs=1, runs=1,