From e02f4bc943dbe403b763f789b0ea777abcc64d04 Mon Sep 17 00:00:00 2001 From: Lucas Armand Date: Wed, 17 Dec 2025 11:55:33 -0800 Subject: [PATCH] Lowered concurrency of vLLM and TGI benchmarks --- workers/openai/worker.py | 4 ++-- workers/tgi/worker.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/workers/openai/worker.py b/workers/openai/worker.py index 9298f45..95b4dd9 100644 --- a/workers/openai/worker.py +++ b/workers/openai/worker.py @@ -64,8 +64,8 @@ worker_config = WorkerConfig( max_queue_time=600.0, benchmark_config=BenchmarkConfig( generator=completions_benchmark_generator, - concurrency=100, - runs=2 + concurrency=10, + runs=3 ) ), HandlerConfig( diff --git a/workers/tgi/worker.py b/workers/tgi/worker.py index 85425e2..9d83062 100644 --- a/workers/tgi/worker.py +++ b/workers/tgi/worker.py @@ -55,7 +55,8 @@ worker_config = WorkerConfig( max_queue_time=600.0, benchmark_config=BenchmarkConfig( generator=benchmark_generator, - concurrency=50 + concurrency=10, + runs=3 ), workload_calculator= lambda x: x["parameters"]["max_new_tokens"] ),