From ef3f34a5159eff262f92d6618ff153a1d3fdb15a Mon Sep 17 00:00:00 2001
From: Rob Ballantyne <rob@vast.ai>
Date: Mon, 11 May 2026 18:00:46 +0100
Subject: [PATCH] Restructure null pyworker --demo as a clean trapezoid

Three reservations 30s apart, each with a 90s duration. They end one at
a time, also 30s apart, then the client exits. Each reservation ends
via its duration cap (200 success) rather than the previous "cancel one,
leave two open" pattern that left two 499s pending.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 workers/null/README.md |  9 +++++----
 workers/null/client.py | 36 ++++++++++++++++--------------------
 2 files changed, 21 insertions(+), 24 deletions(-)

diff --git a/workers/null/README.md b/workers/null/README.md
index 9b9e367..4b2848f 100644
--- a/workers/null/README.md
+++ b/workers/null/README.md
@@ -151,10 +151,11 @@ Staggered demo:
 python -m workers.null.client --endpoint <ENDPOINT_NAME> --demo
 ```
 
-Starts three reservations 30s apart (all held concurrently), waits another
-30s, then cancels the first by dropping its HTTP connection. The remaining
-two run until their duration cap. Useful for watching scale-up and
-scale-down behaviour in the autoscaler dashboard.
+Starts three reservations 30s apart (all held concurrently) with a 90s
+duration each. They scale down one at a time, also 30s apart, then the
+client exits — a clean trapezoidal load curve for watching scale-up and
+scale-down in the autoscaler dashboard. Each reservation ends via its
+duration cap (a 200 success in metrics).
 
 ## Notes and caveats
 
diff --git a/workers/null/client.py b/workers/null/client.py
index 2239a31..ac5c46a 100644
--- a/workers/null/client.py
+++ b/workers/null/client.py
@@ -50,46 +50,43 @@ async def run_demo(
     client: Serverless,
     *,
     endpoint_name: str,
-    duration: float,
     interval: float,
 ) -> None:
-    """Reserve, wait, reserve, wait, reserve, wait, cancel one.
+    """Trapezoidal load: ramp up three reservations, then let them scale down.
 
-    All three reservations run concurrently as separate held HTTP requests.
-    After all three are in flight, we cancel the first to demonstrate the
-    early-release path. The remaining two are left to run to their natural
-    duration cap (or you can ctrl-c to drop them).
+    Start three reservations spaced `interval` seconds apart, each with a
+    duration equal to 3 * interval. The staggered starts and identical
+    durations mean they end one at a time, also `interval` apart, so the
+    load curve ramps up over 2*interval, plateaus at 3 for `interval`, and
+    ramps down over 2*interval. Each reservation ends via its duration cap
+    (a 200 success, not a 499 cancellation).
     """
+    hold = interval * 3
     tasks: list[asyncio.Task] = []
     for i in range(1, 4):
         label = f"res-{i}"
+        log.info(
+            "[%s] starting (auto-release after %.0fs)", label, hold
+        )
         task = asyncio.create_task(
             reserve(
                 client,
                 endpoint_name=endpoint_name,
-                duration=duration,
+                duration=hold,
                 label=label,
             ),
             name=label,
         )
         tasks.append(task)
         if i < 3:
-            log.info("Waiting %.0fs before starting next reservation...", interval)
+            log.info("Waiting %.0fs before next reservation...", interval)
             await asyncio.sleep(interval)
 
     log.info(
-        "All 3 reservations in flight. Waiting %.0fs, then cancelling res-1...",
+        "All 3 reservations in flight; they will scale down %.0fs apart, "
+        "starting in %.0fs",
         interval,
-    )
-    await asyncio.sleep(interval)
-
-    log.info("Cancelling res-1 (drops the HTTP connection — produces a 499)")
-    tasks[0].cancel()
-
-    log.info(
-        "res-2 and res-3 left running. They will end at their duration cap "
-        "(%.0fs), or you can ctrl-c to drop them.",
-        duration,
+        hold - 2 * interval,
     )
     results = await asyncio.gather(*tasks, return_exceptions=True)
     for task, result in zip(tasks, results):
@@ -144,7 +141,6 @@ async def main_async():
                 await run_demo(
                     client,
                     endpoint_name=args.endpoint,
-                    duration=args.duration,
                     interval=args.interval,
                 )
             else: