Lowered concurrency of vLLM and TGI benchmarks

2025-12-17 11:55:33 -08:00
parent bcb04b9a32
commit e02f4bc943
2 changed files with 4 additions and 3 deletions
@@ -55,7 +55,8 @@ worker_config = WorkerConfig(
            max_queue_time=600.0,
            benchmark_config=BenchmarkConfig(
                generator=benchmark_generator,
-                concurrency=50
+                concurrency=10,
+                runs=3
            ),
            workload_calculator= lambda x: x["parameters"]["max_new_tokens"]
        ),