Lowered concurrency of vLLM and TGI benchmarks

This commit is contained in:
Lucas Armand
2025-12-17 11:55:33 -08:00
parent bcb04b9a32
commit e02f4bc943
2 changed files with 4 additions and 3 deletions
+2 -1
View File
@@ -55,7 +55,8 @@ worker_config = WorkerConfig(
max_queue_time=600.0,
benchmark_config=BenchmarkConfig(
generator=benchmark_generator,
concurrency=50
concurrency=10,
runs=3
),
workload_calculator= lambda x: x["parameters"]["max_new_tokens"]
),