Compare commits

..

4 Commits

Author SHA1 Message Date
Colter Downing e756f61b9a graphing errors over time 2025-10-25 12:14:27 -07:00
Colter Downing 8cb98c84f9 non vibe coded test_load 2025-10-24 19:08:36 -07:00
Colter Downing e251afda2b improved test load 2025-10-24 12:53:35 -07:00
Lucas Armand 74bd932327 Suppress matplot debug logs 2025-10-24 12:30:20 -07:00
5 changed files with 15 additions and 63 deletions
+9 -18
View File
@@ -26,8 +26,7 @@ from lib.data_types import (
LogAction, LogAction,
ApiPayload_T, ApiPayload_T,
JsonDataException, JsonDataException,
RequestMetrics, RequestMetrics
BenchmarkResult
) )
VERSION = "0.1.0" VERSION = "0.1.0"
@@ -333,26 +332,18 @@ class Backend:
for run in range(1, self.benchmark_handler.benchmark_runs + 1): for run in range(1, self.benchmark_handler.benchmark_runs + 1):
start = time.time() start = time.time()
benchmark_requests = [] tasks = []
total_workload = 0
for i in range(concurrent_requests): for _ in range(concurrent_requests):
payload = self.benchmark_handler.make_benchmark_payload() payload = self.benchmark_handler.make_benchmark_payload()
workload = payload.count_workload() total_workload += payload.count_workload()
task = self.__call_api(handler=self.benchmark_handler, payload=payload) tasks.append(
benchmark_requests.append( self.__call_api(handler=self.benchmark_handler, payload=payload)
BenchmarkResult(request_idx=i, workload=workload, task=task)
) )
responses = await gather(*[br.task for br in benchmark_requests]) responses = await gather(*tasks)
for br, response in zip(benchmark_requests, responses):
br.response = response
total_workload = sum(br.workload for br in benchmark_requests if br.is_successful)
time_elapsed = time.time() - start time_elapsed = time.time() - start
successful_responses = sum([1 for br in benchmark_requests if br.is_successful])
if successful_responses == 0:
self.backend_errored("No successful responses from benchmark")
log.debug(f"benchmark failed: {successful_responses}/{concurrent_requests} successful responses")
throughput = total_workload / time_elapsed throughput = total_workload / time_elapsed
sum_throughput += throughput sum_throughput += throughput
@@ -366,7 +357,7 @@ class Backend:
f"Run: {run}, concurrent_requests: {concurrent_requests}", f"Run: {run}, concurrent_requests: {concurrent_requests}",
f"Total workload: {total_workload}, time_elapsed: {time_elapsed}s", f"Total workload: {total_workload}, time_elapsed: {time_elapsed}s",
f"Throughput: {throughput} workload/s", f"Throughput: {throughput} workload/s",
f"Successful responses: {successful_responses}/{concurrent_requests}", f"Successful responses: {len([r for r in responses if r.status == 200])}",
"#" * 60, "#" * 60,
] ]
) )
+2 -13
View File
@@ -3,7 +3,7 @@ import logging
from dataclasses import dataclass, field from dataclasses import dataclass, field
from enum import Enum from enum import Enum
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import Dict, Any, Union, Tuple, Optional, Set, TypeVar, Generic, Type, Awaitable from typing import Dict, Any, Union, Tuple, Optional, Set, TypeVar, Generic, Type
from aiohttp import web, ClientResponse from aiohttp import web, ClientResponse
import inspect import inspect
@@ -206,17 +206,6 @@ class RequestMetrics:
status: str status: str
success: bool = False success: bool = False
@dataclass
class BenchmarkResult:
request_idx: int
workload: float
task: Awaitable[ClientResponse]
response: Optional[ClientResponse] = None
@property
def is_successful(self) -> bool:
return self.response is not None and self.response.status == 200
@dataclass @dataclass
class ModelMetrics: class ModelMetrics:
"""Model specific metrics""" """Model specific metrics"""
@@ -257,7 +246,7 @@ class ModelMetrics:
def wait_time(self) -> float: def wait_time(self) -> float:
if (len(self.requests_working) == 0): if (len(self.requests_working) == 0):
return 0.0 return 0.0
return sum([request.workload for request in self.requests_working.values()]) / max(self.max_throughput, 0.00001) return sum([request.workload for request in self.requests_working.values()]) / self.max_throughput
@property @property
def cur_load(self) -> float: def cur_load(self) -> float:
-2
View File
@@ -152,13 +152,11 @@ class Metrics:
"request_idxs": [r.request_idx for r in self.model_metrics.requests_deleting if r.success == success], "request_idxs": [r.request_idx for r in self.model_metrics.requests_deleting if r.success == success],
"success": success "success": success
} }
log.debug(f"Deleting requests that {'succeeded' if success else 'failed'}: {data['request_idxs']}")
full_path = report_addr.rstrip("/") + "/delete_requests/" full_path = report_addr.rstrip("/") + "/delete_requests/"
for attempt in range(1, 4): for attempt in range(1, 4):
try: try:
session = await self.http() session = await self.http()
async with session.post(full_path, json=data) as res: async with session.post(full_path, json=data) as res:
log.debug(f"delete_requests response: {res.status}")
res.raise_for_status() res.raise_for_status()
return True return True
except asyncio.TimeoutError: except asyncio.TimeoutError:
+4 -29
View File
@@ -119,25 +119,14 @@ class GenericHandler(EndpointHandler[GenericData], ABC):
class CompletionsData(GenericData): class CompletionsData(GenericData):
@classmethod @classmethod
def for_test(cls) -> "CompletionsData": def for_test(cls) -> "CompletionsData":
system_prompt = """You are a helpful AI assistant. You have access to the following knowledge base: prompt = " ".join(random.choices(WORD_LIST, k=int(250)))
Zebras (US: /ˈziːbrəz/, UK: /ˈzɛbrəz, ˈziː-/)[2] (subgenus Hippotigris) are African equines
with distinctive black-and-white striped coats. There are three living species: Grévy's zebra
(Equus grevyi), the plains zebra (E. quagga), and the mountain zebra (E. zebra). Zebras share the
genus Equus with horses and asses, the three groups being the only living members of the family
Equidae. Zebra stripes come in different patterns, unique to each individual. Zebras inhabit eastern
and southern Africa and can be found in a variety of habitats such as savannahs, grasslands,
woodlands, shrublands, and mountainous areas.
Please answer the following question based on the above context."""
unique_question = " ".join(random.choices(WORD_LIST, k=int(100)))
model = os.environ.get("MODEL_NAME") model = os.environ.get("MODEL_NAME")
if not model: if not model:
raise ValueError("MODEL_NAME environment variable not set") raise ValueError("MODEL_NAME environment variable not set")
test_input = { test_input = {
"model": model, "model": model,
"prompt": f"{system_prompt}\n\n{unique_question}", "prompt": prompt,
"temperature": 0.7, "temperature": 0.7,
"max_tokens": 500, "max_tokens": 500,
} }
@@ -164,18 +153,7 @@ class ChatCompletionsData(GenericData):
@classmethod @classmethod
def for_test(cls) -> "ChatCompletionsData": def for_test(cls) -> "ChatCompletionsData":
system_prompt = """You are a helpful AI assistant. You have access to the following knowledge base: prompt = " ".join(random.choices(WORD_LIST, k=int(250)))
Zebras (US: /ˈziːbrəz/, UK: /ˈzɛbrəz, ˈziː-/)[2] (subgenus Hippotigris) are African equines
with distinctive black-and-white striped coats. There are three living species: Grévy's zebra
(Equus grevyi), the plains zebra (E. quagga), and the mountain zebra (E. zebra). Zebras share the
genus Equus with horses and asses, the three groups being the only living members of the family
Equidae. Zebra stripes come in different patterns, unique to each individual. Zebras inhabit eastern
and southern Africa and can be found in a variety of habitats such as savannahs, grasslands,
woodlands, shrublands, and mountainous areas.
Please answer the following question based on the above context."""
unique_question = " ".join(random.choices(WORD_LIST, k=int(100)))
model = os.environ.get("MODEL_NAME") model = os.environ.get("MODEL_NAME")
if not model: if not model:
raise ValueError("MODEL_NAME environment variable not set") raise ValueError("MODEL_NAME environment variable not set")
@@ -183,10 +161,7 @@ class ChatCompletionsData(GenericData):
# Chat completions use messages format instead of prompt # Chat completions use messages format instead of prompt
test_input = { test_input = {
"model": model, "model": model,
"messages": [ "messages": [{"role": "user", "content": prompt}],
{"role": "system", "content": system_prompt}, # Shared prefix
{"role": "user", "content": unique_question} # Unique per request
],
"temperature": 0.7, "temperature": 0.7,
"max_tokens": 500, "max_tokens": 500,
} }
-1
View File
@@ -82,7 +82,6 @@ def do_one(endpoint_name: str,
# 1) Check if we got a worker back from route # 1) Check if we got a worker back from route
worker_url = msg.get("url", "") worker_url = msg.get("url", "")
if not worker_url: if not worker_url:
status = msg.get("status", "")
m = re.search(r"total workers:\s*(\d+).*loading workers:\s*(\d+).*standby workers:\s*(\d+).*error workers:\s*(\d+)", status, re.I | re.S) m = re.search(r"total workers:\s*(\d+).*loading workers:\s*(\d+).*standby workers:\s*(\d+).*error workers:\s*(\d+)", status, re.I | re.S)
if m: if m:
tot, loading, standby, err = map(int, m.groups()) tot, loading, standby, err = map(int, m.groups())