From e02f4bc943dbe403b763f789b0ea777abcc64d04 Mon Sep 17 00:00:00 2001
From: Lucas Armand <lucas@vast.ai>
Date: Wed, 17 Dec 2025 11:55:33 -0800
Subject: [PATCH] Lowered concurrency of vLLM and TGI benchmarks

---
 workers/openai/worker.py | 4 ++--
 workers/tgi/worker.py    | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/workers/openai/worker.py b/workers/openai/worker.py
index 9298f45..95b4dd9 100644
--- a/workers/openai/worker.py
+++ b/workers/openai/worker.py
@@ -64,8 +64,8 @@ worker_config = WorkerConfig(
             max_queue_time=600.0,
             benchmark_config=BenchmarkConfig(
                 generator=completions_benchmark_generator,
-                concurrency=100,
-                runs=2
+                concurrency=10,
+                runs=3
             )
         ),
         HandlerConfig(
diff --git a/workers/tgi/worker.py b/workers/tgi/worker.py
index 85425e2..9d83062 100644
--- a/workers/tgi/worker.py
+++ b/workers/tgi/worker.py
@@ -55,7 +55,8 @@ worker_config = WorkerConfig(
             max_queue_time=600.0,
             benchmark_config=BenchmarkConfig(
                 generator=benchmark_generator,
-                concurrency=50
+                concurrency=10,
+                runs=3
             ),
             workload_calculator= lambda x: x["parameters"]["max_new_tokens"]
         ),