diff --git a/workers/openai/worker.py b/workers/openai/worker.py index 995fb3d..96ad077 100644 --- a/workers/openai/worker.py +++ b/workers/openai/worker.py @@ -60,8 +60,8 @@ worker_config = WorkerConfig( route="/v1/completions", workload_calculator= lambda data: data.get("max_tokens", 0), allow_parallel_requests=True, - max_queue_time=60.0, request_parser=request_parser, + max_queue_time=600.0, benchmark_config=BenchmarkConfig( generator=completions_benchmark_generator, concurrency=100, @@ -72,8 +72,8 @@ worker_config = WorkerConfig( route="/v1/chat/completions", workload_calculator= lambda data: data.get("max_tokens", 0), allow_parallel_requests=True, - max_queue_time=60.0, request_parser=request_parser + max_queue_time=600.0, ) ], log_action_config=LogActionConfig( diff --git a/workers/tgi/worker.py b/workers/tgi/worker.py index f8084ab..85425e2 100644 --- a/workers/tgi/worker.py +++ b/workers/tgi/worker.py @@ -52,7 +52,7 @@ worker_config = WorkerConfig( HandlerConfig( route="/generate", allow_parallel_requests=True, - max_queue_time=60.0, + max_queue_time=600.0, benchmark_config=BenchmarkConfig( generator=benchmark_generator, concurrency=50 @@ -62,7 +62,7 @@ worker_config = WorkerConfig( HandlerConfig( route="/generate_stream", allow_parallel_requests=True, - max_queue_time=60.0, + max_queue_time=600.0, workload_calculator= lambda x: x["parameters"]["max_new_tokens"] ) ],