graphing errors over time

non vibe coded test_load
improved test load
2025-10-25 12:14:27 -07:00 · 2025-10-24 19:08:36 -07:00 · 2025-10-24 12:53:35 -07:00 · 2025-10-24 12:30:20 -07:00
5 changed files with 15 additions and 63 deletions
@@ -26,8 +26,7 @@ from lib.data_types import (
    LogAction,
    ApiPayload_T,
    JsonDataException,
-    RequestMetrics,
+    RequestMetrics
    BenchmarkResult
 )
 VERSION = "0.1.0"
@@ -333,26 +332,18 @@ class Backend:
            for run in range(1, self.benchmark_handler.benchmark_runs + 1):
                start = time.time()
-                benchmark_requests = []
+                tasks = []
                total_workload = 0
-                for i in range(concurrent_requests):
+                for _ in range(concurrent_requests):
                    payload = self.benchmark_handler.make_benchmark_payload()
-                    workload = payload.count_workload()
+                    total_workload += payload.count_workload()
-                    task = self.__call_api(handler=self.benchmark_handler, payload=payload)
+                    tasks.append(
-                    benchmark_requests.append(
+                        self.__call_api(handler=self.benchmark_handler, payload=payload)
                        BenchmarkResult(request_idx=i, workload=workload, task=task)
                    )
-                responses = await gather(*[br.task for br in benchmark_requests])
+                responses = await gather(*tasks)
                for br, response in zip(benchmark_requests, responses):
                    br.response = response
                total_workload = sum(br.workload for br in benchmark_requests if br.is_successful)
                time_elapsed = time.time() - start
                successful_responses = sum([1 for br in benchmark_requests if br.is_successful])
                if successful_responses == 0:
                    self.backend_errored("No successful responses from benchmark")
                    log.debug(f"benchmark failed: {successful_responses}/{concurrent_requests} successful responses")
                throughput = total_workload / time_elapsed
                sum_throughput += throughput
@@ -366,7 +357,7 @@ class Backend:
                            f"Run: {run}, concurrent_requests: {concurrent_requests}",
                            f"Total workload: {total_workload}, time_elapsed: {time_elapsed}s",
                            f"Throughput: {throughput} workload/s",
-                            f"Successful responses: {successful_responses}/{concurrent_requests}",
+                            f"Successful responses: {len([r for r in responses if r.status == 200])}",
                            "#" * 60,
                        ]
                    )
@@ -3,7 +3,7 @@ import logging
 from dataclasses import dataclass, field
 from enum import Enum
 from abc import ABC, abstractmethod
-from typing import Dict, Any, Union, Tuple, Optional, Set, TypeVar, Generic, Type, Awaitable
+from typing import Dict, Any, Union, Tuple, Optional, Set, TypeVar, Generic, Type
 from aiohttp import web, ClientResponse
 import inspect
@@ -206,17 +206,6 @@ class RequestMetrics:
    status: str
    success: bool = False
@dataclass
 class BenchmarkResult:
    request_idx: int
    workload: float
    task: Awaitable[ClientResponse]
    response: Optional[ClientResponse] = None
    @property
    def is_successful(self) -> bool:
        return self.response is not None and self.response.status == 200
@dataclass
 class ModelMetrics:
    """Model specific metrics"""
@@ -257,7 +246,7 @@ class ModelMetrics:
    def wait_time(self) -> float:
        if (len(self.requests_working) == 0):
            return 0.0
-        return sum([request.workload for request in self.requests_working.values()]) / max(self.max_throughput, 0.00001)
+        return sum([request.workload for request in self.requests_working.values()]) / self.max_throughput
    @property
    def cur_load(self) -> float:
@@ -152,13 +152,11 @@ class Metrics:
                "request_idxs": [r.request_idx for r in self.model_metrics.requests_deleting if r.success == success],
                "success": success
            }
            log.debug(f"Deleting requests that {'succeeded' if success else 'failed'}: {data['request_idxs']}")
            full_path = report_addr.rstrip("/") + "/delete_requests/"
            for attempt in range(1, 4):
                try:
                    session = await self.http()
                    async with session.post(full_path, json=data) as res:
                        log.debug(f"delete_requests response: {res.status}")
                        res.raise_for_status()
                    return True
                except asyncio.TimeoutError:
@@ -119,25 +119,14 @@ class GenericHandler(EndpointHandler[GenericData], ABC):
 class CompletionsData(GenericData):
    @classmethod
    def for_test(cls) -> "CompletionsData":
-        system_prompt = """You are a helpful AI assistant. You have access to the following knowledge base:
+        prompt = " ".join(random.choices(WORD_LIST, k=int(250)))
        Zebras (US: /ˈziːbrəz/, UK: /ˈzɛbrəz, ˈziː-/)[2] (subgenus Hippotigris) are African equines 
        with distinctive black-and-white striped coats. There are three living species: Grévy's zebra 
        (Equus grevyi), the plains zebra (E. quagga), and the mountain zebra (E. zebra). Zebras share the 
        genus Equus with horses and asses, the three groups being the only living members of the family 
        Equidae. Zebra stripes come in different patterns, unique to each individual. Zebras inhabit eastern 
        and southern Africa and can be found in a variety of habitats such as savannahs, grasslands, 
        woodlands, shrublands, and mountainous areas.
        Please answer the following question based on the above context."""
        unique_question = " ".join(random.choices(WORD_LIST, k=int(100)))
        model = os.environ.get("MODEL_NAME")
        if not model:
            raise ValueError("MODEL_NAME environment variable not set")
        test_input = {
            "model": model,
-            "prompt": f"{system_prompt}\n\n{unique_question}",
+            "prompt": prompt,
            "temperature": 0.7,
            "max_tokens": 500,
        }
@@ -164,18 +153,7 @@ class ChatCompletionsData(GenericData):
    @classmethod
    def for_test(cls) -> "ChatCompletionsData":
-        system_prompt = """You are a helpful AI assistant. You have access to the following knowledge base:
+        prompt = " ".join(random.choices(WORD_LIST, k=int(250)))
        Zebras (US: /ˈziːbrəz/, UK: /ˈzɛbrəz, ˈziː-/)[2] (subgenus Hippotigris) are African equines 
        with distinctive black-and-white striped coats. There are three living species: Grévy's zebra 
        (Equus grevyi), the plains zebra (E. quagga), and the mountain zebra (E. zebra). Zebras share the 
        genus Equus with horses and asses, the three groups being the only living members of the family 
        Equidae. Zebra stripes come in different patterns, unique to each individual. Zebras inhabit eastern 
        and southern Africa and can be found in a variety of habitats such as savannahs, grasslands, 
        woodlands, shrublands, and mountainous areas.
        Please answer the following question based on the above context."""
        unique_question = " ".join(random.choices(WORD_LIST, k=int(100)))
        model = os.environ.get("MODEL_NAME")
        if not model:
            raise ValueError("MODEL_NAME environment variable not set")
@@ -183,10 +161,7 @@ class ChatCompletionsData(GenericData):
        # Chat completions use messages format instead of prompt
        test_input = {
            "model": model,
-            "messages": [
+            "messages": [{"role": "user", "content": prompt}],
                {"role": "system", "content": system_prompt},  # Shared prefix
                {"role": "user", "content": unique_question}   # Unique per request
            ],
            "temperature": 0.7,
            "max_tokens": 500,
        }
@@ -82,7 +82,6 @@ def do_one(endpoint_name: str,
        # 1) Check if we got a worker back from route
        worker_url = msg.get("url", "")
        if not worker_url:
            status = msg.get("status", "")
            m = re.search(r"total workers:\s*(\d+).*loading workers:\s*(\d+).*standby workers:\s*(\d+).*error workers:\s*(\d+)", status, re.I | re.S)
            if m:
                tot, loading, standby, err = map(int, m.groups())
Author	SHA1	Message	Date
Colter Downing	e756f61b9a	graphing errors over time	2025-10-25 12:14:27 -07:00
Colter Downing	8cb98c84f9	non vibe coded test_load	2025-10-24 19:08:36 -07:00
Colter Downing	e251afda2b	improved test load	2025-10-24 12:53:35 -07:00
Lucas Armand	74bd932327	Suppress matplot debug logs	2025-10-24 12:30:20 -07:00