fix pyright errors + revert to old way of handling cancelled api requests (#23)

2025-07-17 16:59:06 -07:00
parent 9e369c55a5
commit be2aafdb1f
7 changed files with 265 additions and 234 deletions
@@ -7,20 +7,20 @@ from lib.server import start_server

 # This line indicates that the inference server is listening
 MODEL_SERVER_START_LOG_MSG = [
-    "Application startup complete.", # vLLM
-    "llama runner started", # Ollama
-    '"message":"Connected","target":"text_generation_router"', # TGI
-    '"message":"Connected","target":"text_generation_router::server"', # TGI
+    "Application startup complete.",  # vLLM
+    "llama runner started",  # Ollama
+    '"message":"Connected","target":"text_generation_router"',  # TGI
+    '"message":"Connected","target":"text_generation_router::server"',  # TGI
 ]

 MODEL_SERVER_ERROR_LOG_MSGS = [
-    "INFO exited: vllm", # vLLM
-    "RuntimeError: Engine", # vLLM
-    "Error: pull model manifest:", # Ollama
-    "stalled; retrying", # Ollama
-    "Error: WebserverFailed", # TGI
-    "Error: DownloadError", # TGI
-    "Error: ShardCannotStart", #TGI
+    "INFO exited: vllm",  # vLLM
+    "RuntimeError: Engine",  # vLLM
+    "Error: pull model manifest:",  # Ollama
+    "stalled; retrying",  # Ollama
+    "Error: WebserverFailed",  # TGI
+    "Error: DownloadError",  # TGI
+    "Error: ShardCannotStart",  # TGI
 ]

 logging.basicConfig(
@@ -31,8 +31,8 @@ logging.basicConfig(
 log = logging.getLogger(__file__)

 backend = Backend(
-    model_server_url=os.environ.get("MODEL_SERVER_URL"),
-    model_log_file=os.environ.get("MODEL_LOG"),
+    model_server_url=os.environ["MODEL_SERVER_URL"],
+    model_log_file=os.environ["MODEL_LOG"],
    allow_parallel_requests=True,
    benchmark_handler=CompletionsHandler(benchmark_runs=3, benchmark_words=256),
    log_actions=[
@@ -45,9 +45,11 @@ backend = Backend(
    ],
 )

+
 async def handle_ping(_):
    return web.Response(body="pong")

+
 routes = [
    web.post("/v1/completions", backend.create_handler(CompletionsHandler())),
    web.post("/v1/chat/completions", backend.create_handler(ChatCompletionsHandler())),