From 8df562e243efc3d739f9db1274edc946163b3112 Mon Sep 17 00:00:00 2001
From: Rob Ballantyne <rob@vast.ai>
Date: Mon, 11 May 2026 18:17:57 +0100
Subject: [PATCH] Standardize null pyworker load/perf on 150

Bump workload_calculator, benchmark cache value, and client cost from 100
to 150.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 workers/null/client.py |  2 +-
 workers/null/worker.py | 16 +++++++---------
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/workers/null/client.py b/workers/null/client.py
index 29ac192..eacce49 100644
--- a/workers/null/client.py
+++ b/workers/null/client.py
@@ -36,7 +36,7 @@ async def reserve(
     start = time.monotonic()
     log.info("[%s] POST /reserve duration=%ss", label, duration)
     try:
-        resp = await endpoint.request("/reserve", payload, cost=100)
+        resp = await endpoint.request("/reserve", payload, cost=150)
         elapsed = time.monotonic() - start
         log.info("[%s] returned after %.1fs: %s", label, elapsed, resp.get("response"))
         return resp["response"]
diff --git a/workers/null/worker.py b/workers/null/worker.py
index 7722aaa..1410704 100644
--- a/workers/null/worker.py
+++ b/workers/null/worker.py
@@ -97,9 +97,9 @@ async def null_lifecycle():
     # vastai.serverless.server.lib.backend.
     try:
         with open(".has_benchmark", "w") as fh:
-            fh.write("100")
+            fh.write("150")
     except OSError as e:
-        log.warning(f"Could not pin benchmark cache to 100: {e}")
+        log.warning(f"Could not pin benchmark cache to 150: {e}")
 
     app = _build_internal_app()
     runner = web.AppRunner(app)
@@ -129,12 +129,10 @@ async def reserve_worker(**params: object) -> dict:
     global _active_reservation
 
     if params.get(BENCHMARK_SENTINEL):
-        # The framework computes max_throughput = workload / time during the
-        # startup benchmark. A null worker has no real throughput concept,
-        # so we deliberately take ~1s with workload=100 to pin
-        # max_throughput to ~100. Without this the near-instant benchmark
-        # would report hundreds of thousands of workload/sec, distorting
-        # any downstream capacity math.
+        # Fallback path only — the lifecycle pre-populates .has_benchmark
+        # with "150" so __run_benchmark normally short-circuits and never
+        # invokes us. If the cache write failed, sleep ~1s so the
+        # time-based calculation lands near 150 (workload=150 / time~=1s).
         await asyncio.sleep(1.0)
         return {"ok": True, "benchmark": True}
 
@@ -186,7 +184,7 @@ worker_config = WorkerConfig(
             # it to a free worker (or spins up a new one).
             max_queue_time=0.0,
             remote_function=reserve_worker,
-            workload_calculator=lambda _payload: 100.0,
+            workload_calculator=lambda _payload: 150.0,
             benchmark_config=BenchmarkConfig(
                 generator=lambda: {BENCHMARK_SENTINEL: True},
                 runs=1,