fix pyright errors + revert to old way of handling cancelled api requests (#23)
This commit is contained in:
+15
-13
@@ -7,20 +7,20 @@ from lib.server import start_server
|
||||
|
||||
# This line indicates that the inference server is listening
|
||||
MODEL_SERVER_START_LOG_MSG = [
|
||||
"Application startup complete.", # vLLM
|
||||
"llama runner started", # Ollama
|
||||
'"message":"Connected","target":"text_generation_router"', # TGI
|
||||
'"message":"Connected","target":"text_generation_router::server"', # TGI
|
||||
"Application startup complete.", # vLLM
|
||||
"llama runner started", # Ollama
|
||||
'"message":"Connected","target":"text_generation_router"', # TGI
|
||||
'"message":"Connected","target":"text_generation_router::server"', # TGI
|
||||
]
|
||||
|
||||
MODEL_SERVER_ERROR_LOG_MSGS = [
|
||||
"INFO exited: vllm", # vLLM
|
||||
"RuntimeError: Engine", # vLLM
|
||||
"Error: pull model manifest:", # Ollama
|
||||
"stalled; retrying", # Ollama
|
||||
"Error: WebserverFailed", # TGI
|
||||
"Error: DownloadError", # TGI
|
||||
"Error: ShardCannotStart", #TGI
|
||||
"INFO exited: vllm", # vLLM
|
||||
"RuntimeError: Engine", # vLLM
|
||||
"Error: pull model manifest:", # Ollama
|
||||
"stalled; retrying", # Ollama
|
||||
"Error: WebserverFailed", # TGI
|
||||
"Error: DownloadError", # TGI
|
||||
"Error: ShardCannotStart", # TGI
|
||||
]
|
||||
|
||||
logging.basicConfig(
|
||||
@@ -31,8 +31,8 @@ logging.basicConfig(
|
||||
log = logging.getLogger(__file__)
|
||||
|
||||
backend = Backend(
|
||||
model_server_url=os.environ.get("MODEL_SERVER_URL"),
|
||||
model_log_file=os.environ.get("MODEL_LOG"),
|
||||
model_server_url=os.environ["MODEL_SERVER_URL"],
|
||||
model_log_file=os.environ["MODEL_LOG"],
|
||||
allow_parallel_requests=True,
|
||||
benchmark_handler=CompletionsHandler(benchmark_runs=3, benchmark_words=256),
|
||||
log_actions=[
|
||||
@@ -45,9 +45,11 @@ backend = Backend(
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
async def handle_ping(_):
|
||||
return web.Response(body="pong")
|
||||
|
||||
|
||||
routes = [
|
||||
web.post("/v1/completions", backend.create_handler(CompletionsHandler())),
|
||||
web.post("/v1/chat/completions", backend.create_handler(ChatCompletionsHandler())),
|
||||
|
||||
Reference in New Issue
Block a user