Lowered concurrency of vLLM and TGI benchmarks

2025-12-17 11:55:33 -08:00
parent bcb04b9a32
commit e02f4bc943
2 changed files with 4 additions and 3 deletions
@@ -64,8 +64,8 @@ worker_config = WorkerConfig(
            max_queue_time=600.0,
            benchmark_config=BenchmarkConfig(
                generator=completions_benchmark_generator,
-                concurrency=100,
+                concurrency=10,
-                runs=2
+                runs=3
            )
        ),
        HandlerConfig(
@@ -55,7 +55,8 @@ worker_config = WorkerConfig(
            max_queue_time=600.0,
            benchmark_config=BenchmarkConfig(
                generator=benchmark_generator,
-                concurrency=50
+                concurrency=10,
                runs=3
            ),
            workload_calculator= lambda x: x["parameters"]["max_new_tokens"]
        ),