catch the case where all benchmarks fail (sets error)

Suppress matplot debug logs
Fix
2025-10-27 12:01:55 -07:00 · 2025-10-25 16:18:02 -07:00 · 2025-10-24 15:44:38 -07:00 · 2025-10-24 15:41:00 -07:00
5 changed files with 63 additions and 15 deletions
@@ -26,7 +26,8 @@ from lib.data_types import (
    LogAction,
    ApiPayload_T,
    JsonDataException,
-    RequestMetrics
+    RequestMetrics,
    BenchmarkResult
 )
 VERSION = "0.1.0"
@@ -332,18 +333,26 @@ class Backend:
            for run in range(1, self.benchmark_handler.benchmark_runs + 1):
                start = time.time()
-                tasks = []
+                benchmark_requests = []
                total_workload = 0
-                for _ in range(concurrent_requests):
+                for i in range(concurrent_requests):
                    payload = self.benchmark_handler.make_benchmark_payload()
-                    total_workload += payload.count_workload()
+                    workload = payload.count_workload()
-                    tasks.append(
+                    task = self.__call_api(handler=self.benchmark_handler, payload=payload)
-                        self.__call_api(handler=self.benchmark_handler, payload=payload)
+                    benchmark_requests.append(
                        BenchmarkResult(request_idx=i, workload=workload, task=task)
                    )
-                responses = await gather(*tasks)
+                responses = await gather(*[br.task for br in benchmark_requests])
                for br, response in zip(benchmark_requests, responses):
                    br.response = response
                total_workload = sum(br.workload for br in benchmark_requests if br.is_successful)
                time_elapsed = time.time() - start
                successful_responses = sum([1 for br in benchmark_requests if br.is_successful])
                if successful_responses == 0:
                    self.backend_errored("No successful responses from benchmark")
                    log.debug(f"benchmark failed: {successful_responses}/{concurrent_requests} successful responses")
                throughput = total_workload / time_elapsed
                sum_throughput += throughput
@@ -357,7 +366,7 @@ class Backend:
                            f"Run: {run}, concurrent_requests: {concurrent_requests}",
                            f"Total workload: {total_workload}, time_elapsed: {time_elapsed}s",
                            f"Throughput: {throughput} workload/s",
-                            f"Successful responses: {len([r for r in responses if r.status == 200])}",
+                            f"Successful responses: {successful_responses}/{concurrent_requests}",
                            "#" * 60,
                        ]
                    )
@@ -3,7 +3,7 @@ import logging
 from dataclasses import dataclass, field
 from enum import Enum
 from abc import ABC, abstractmethod
-from typing import Dict, Any, Union, Tuple, Optional, Set, TypeVar, Generic, Type
+from typing import Dict, Any, Union, Tuple, Optional, Set, TypeVar, Generic, Type, Awaitable
 from aiohttp import web, ClientResponse
 import inspect
@@ -206,6 +206,17 @@ class RequestMetrics:
    status: str
    success: bool = False
@dataclass
 class BenchmarkResult:
    request_idx: int
    workload: float
    task: Awaitable[ClientResponse]
    response: Optional[ClientResponse] = None
    @property
    def is_successful(self) -> bool:
        return self.response is not None and self.response.status == 200
@dataclass
 class ModelMetrics:
    """Model specific metrics"""
@@ -246,7 +257,7 @@ class ModelMetrics:
    def wait_time(self) -> float:
        if (len(self.requests_working) == 0):
            return 0.0
-        return sum([request.workload for request in self.requests_working.values()]) / self.max_throughput
+        return sum([request.workload for request in self.requests_working.values()]) / max(self.max_throughput, 0.00001)
    @property
    def cur_load(self) -> float:
@@ -152,11 +152,13 @@ class Metrics:
                "request_idxs": [r.request_idx for r in self.model_metrics.requests_deleting if r.success == success],
                "success": success
            }
            log.debug(f"Deleting requests that {'succeeded' if success else 'failed'}: {data['request_idxs']}")
            full_path = report_addr.rstrip("/") + "/delete_requests/"
            for attempt in range(1, 4):
                try:
                    session = await self.http()
                    async with session.post(full_path, json=data) as res:
                        log.debug(f"delete_requests response: {res.status}")
                        res.raise_for_status()
                    return True
                except asyncio.TimeoutError:
@@ -119,14 +119,25 @@ class GenericHandler(EndpointHandler[GenericData], ABC):
 class CompletionsData(GenericData):
    @classmethod
    def for_test(cls) -> "CompletionsData":
-        prompt = " ".join(random.choices(WORD_LIST, k=int(250)))
+        system_prompt = """You are a helpful AI assistant. You have access to the following knowledge base:
        Zebras (US: /ˈziːbrəz/, UK: /ˈzɛbrəz, ˈziː-/)[2] (subgenus Hippotigris) are African equines 
        with distinctive black-and-white striped coats. There are three living species: Grévy's zebra 
        (Equus grevyi), the plains zebra (E. quagga), and the mountain zebra (E. zebra). Zebras share the 
        genus Equus with horses and asses, the three groups being the only living members of the family 
        Equidae. Zebra stripes come in different patterns, unique to each individual. Zebras inhabit eastern 
        and southern Africa and can be found in a variety of habitats such as savannahs, grasslands, 
        woodlands, shrublands, and mountainous areas.
        Please answer the following question based on the above context."""
        unique_question = " ".join(random.choices(WORD_LIST, k=int(100)))
        model = os.environ.get("MODEL_NAME")
        if not model:
            raise ValueError("MODEL_NAME environment variable not set")
        test_input = {
            "model": model,
-            "prompt": prompt,
+            "prompt": f"{system_prompt}\n\n{unique_question}",
            "temperature": 0.7,
            "max_tokens": 500,
        }
@@ -153,7 +164,18 @@ class ChatCompletionsData(GenericData):
    @classmethod
    def for_test(cls) -> "ChatCompletionsData":
-        prompt = " ".join(random.choices(WORD_LIST, k=int(250)))
+        system_prompt = """You are a helpful AI assistant. You have access to the following knowledge base:
        Zebras (US: /ˈziːbrəz/, UK: /ˈzɛbrəz, ˈziː-/)[2] (subgenus Hippotigris) are African equines 
        with distinctive black-and-white striped coats. There are three living species: Grévy's zebra 
        (Equus grevyi), the plains zebra (E. quagga), and the mountain zebra (E. zebra). Zebras share the 
        genus Equus with horses and asses, the three groups being the only living members of the family 
        Equidae. Zebra stripes come in different patterns, unique to each individual. Zebras inhabit eastern 
        and southern Africa and can be found in a variety of habitats such as savannahs, grasslands, 
        woodlands, shrublands, and mountainous areas.
        Please answer the following question based on the above context."""
        unique_question = " ".join(random.choices(WORD_LIST, k=int(100)))
        model = os.environ.get("MODEL_NAME")
        if not model:
            raise ValueError("MODEL_NAME environment variable not set")
@@ -161,7 +183,10 @@ class ChatCompletionsData(GenericData):
        # Chat completions use messages format instead of prompt
        test_input = {
            "model": model,
-            "messages": [{"role": "user", "content": prompt}],
+            "messages": [
                {"role": "system", "content": system_prompt},  # Shared prefix
                {"role": "user", "content": unique_question}   # Unique per request
            ],
            "temperature": 0.7,
            "max_tokens": 500,
        }
@@ -82,6 +82,7 @@ def do_one(endpoint_name: str,
        # 1) Check if we got a worker back from route
        worker_url = msg.get("url", "")
        if not worker_url:
            status = msg.get("status", "")
            m = re.search(r"total workers:\s*(\d+).*loading workers:\s*(\d+).*standby workers:\s*(\d+).*error workers:\s*(\d+)", status, re.I | re.S)
            if m:
                tot, loading, standby, err = map(int, m.groups())
Author	SHA1	Message	Date
Colter Downing	d6eb498ee4	catch the case where all benchmarks fail (sets error)	2025-10-27 12:01:55 -07:00
Colter Downing	bcecd6df40	Suppress matplot debug logs	2025-10-25 16:18:02 -07:00
Lucas Armand	4d9bf2048c	Fix	2025-10-24 15:44:38 -07:00
Lucas Armand	7788bc4a62	Added some debug logs	2025-10-24 15:41:00 -07:00