Increase queue limits for vLLM and TGI

This commit is contained in:
Lucas Armand
2025-12-17 11:38:55 -08:00
parent 29f836eb1a
commit 9daf171487
2 changed files with 4 additions and 4 deletions
+2 -2
View File
@@ -60,8 +60,8 @@ worker_config = WorkerConfig(
route="/v1/completions",
workload_calculator= lambda data: data.get("max_tokens", 0),
allow_parallel_requests=True,
max_queue_time=60.0,
request_parser=request_parser,
max_queue_time=600.0,
benchmark_config=BenchmarkConfig(
generator=completions_benchmark_generator,
concurrency=100,
@@ -72,8 +72,8 @@ worker_config = WorkerConfig(
route="/v1/chat/completions",
workload_calculator= lambda data: data.get("max_tokens", 0),
allow_parallel_requests=True,
max_queue_time=60.0,
request_parser=request_parser
max_queue_time=600.0,
)
],
log_action_config=LogActionConfig(