diff --git a/workers/null/README.md b/workers/null/README.md
index 2aa1653..ee85a64 100644
--- a/workers/null/README.md
+++ b/workers/null/README.md
@@ -28,10 +28,10 @@ held `/reserve` returns `200`.
 
 ## How it works
 
-- `allow_parallel_requests=False`, so one in-flight `/reserve` fully occupies
-  the worker. Any second request that lands on the same worker queues (or is
-  rejected with `429` after `max_queue_time`), pushing the autoscaler to
-  provision more workers.
+- `allow_parallel_requests=False` and `max_queue_time=0.0`, so one in-flight
+  `/reserve` fully occupies the worker and any further request that lands
+  on it is rejected with `429` immediately — serverless will route to a
+  free worker or scale a new one up.
 - `lifecycle` is used instead of `model_log_file`, so there is no log to tail
   and no model server to start. The worker reports itself ready immediately
   after the (trivial) benchmark.
@@ -85,8 +85,8 @@ Behavior:
   the duration cap fires (safety net for a stuck consumer).
 - Returns `499` if the external client disconnects (counted as cancelled in
   metrics — avoid this; use `/release` instead).
-- Returns `429` if the worker is already busy and queue wait would exceed
-  `max_queue_time` (30s by default).
+- Returns `429` immediately if the worker is already holding a reservation
+  (so serverless routes the request to a free worker instead of queueing).
 
 ### `POST /release`  (internal port, localhost-only)
 
diff --git a/workers/null/worker.py b/workers/null/worker.py
index bd2f505..480f4d5 100644
--- a/workers/null/worker.py
+++ b/workers/null/worker.py
@@ -159,7 +159,12 @@ worker_config = WorkerConfig(
         HandlerConfig(
             route="/reserve",
             allow_parallel_requests=False,
-            max_queue_time=30.0,
+            # Reject (429) any /reserve that arrives while the worker is
+            # already busy. A held reservation lasts up to MAX_RESERVATION_
+            # SECONDS, so queueing behind it would mean hours of wait —
+            # better to bounce the request immediately so serverless routes
+            # it to a free worker (or spins up a new one).
+            max_queue_time=0.0,
             remote_function=reserve_worker,
             workload_calculator=lambda _payload: 100.0,
             benchmark_config=BenchmarkConfig(