Increase queue limits for vLLM and TGI

This commit is contained in:
Lucas Armand
2025-12-17 11:38:55 -08:00
parent 29f836eb1a
commit 9daf171487
2 changed files with 4 additions and 4 deletions
+2 -2
View File
@@ -52,7 +52,7 @@ worker_config = WorkerConfig(
HandlerConfig(
route="/generate",
allow_parallel_requests=True,
max_queue_time=60.0,
max_queue_time=600.0,
benchmark_config=BenchmarkConfig(
generator=benchmark_generator,
concurrency=50
@@ -62,7 +62,7 @@ worker_config = WorkerConfig(
HandlerConfig(
route="/generate_stream",
allow_parallel_requests=True,
max_queue_time=60.0,
max_queue_time=600.0,
workload_calculator= lambda x: x["parameters"]["max_new_tokens"]
)
],