From ed0db198c35d3d4d8e0756c973cb85b92e18da37 Mon Sep 17 00:00:00 2001 From: Rob Ballantyne Date: Mon, 11 May 2026 17:05:02 +0100 Subject: [PATCH] Reject queued /reserve immediately on busy null workers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A held reservation runs for up to MAX_RESERVATION_SECONDS (default 1h), so queueing a second /reserve behind it makes no sense — the wait would dwarf any sane timeout. Set max_queue_time=0.0 so the framework rejects 429 as soon as another reservation is in flight, and serverless routes the request to a free worker or scales a new one up. Co-Authored-By: Claude Opus 4.7 (1M context) --- workers/null/README.md | 12 ++++++------ workers/null/worker.py | 7 ++++++- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/workers/null/README.md b/workers/null/README.md index 2aa1653..ee85a64 100644 --- a/workers/null/README.md +++ b/workers/null/README.md @@ -28,10 +28,10 @@ held `/reserve` returns `200`. ## How it works -- `allow_parallel_requests=False`, so one in-flight `/reserve` fully occupies - the worker. Any second request that lands on the same worker queues (or is - rejected with `429` after `max_queue_time`), pushing the autoscaler to - provision more workers. +- `allow_parallel_requests=False` and `max_queue_time=0.0`, so one in-flight + `/reserve` fully occupies the worker and any further request that lands + on it is rejected with `429` immediately — serverless will route to a + free worker or scale a new one up. - `lifecycle` is used instead of `model_log_file`, so there is no log to tail and no model server to start. The worker reports itself ready immediately after the (trivial) benchmark. @@ -85,8 +85,8 @@ Behavior: the duration cap fires (safety net for a stuck consumer). - Returns `499` if the external client disconnects (counted as cancelled in metrics — avoid this; use `/release` instead). -- Returns `429` if the worker is already busy and queue wait would exceed - `max_queue_time` (30s by default). +- Returns `429` immediately if the worker is already holding a reservation + (so serverless routes the request to a free worker instead of queueing). ### `POST /release` (internal port, localhost-only) diff --git a/workers/null/worker.py b/workers/null/worker.py index bd2f505..480f4d5 100644 --- a/workers/null/worker.py +++ b/workers/null/worker.py @@ -159,7 +159,12 @@ worker_config = WorkerConfig( HandlerConfig( route="/reserve", allow_parallel_requests=False, - max_queue_time=30.0, + # Reject (429) any /reserve that arrives while the worker is + # already busy. A held reservation lasts up to MAX_RESERVATION_ + # SECONDS, so queueing behind it would mean hours of wait — + # better to bounce the request immediately so serverless routes + # it to a free worker (or spins up a new one). + max_queue_time=0.0, remote_function=reserve_worker, workload_calculator=lambda _payload: 100.0, benchmark_config=BenchmarkConfig(