Compare commits

..

1 Commits

Author SHA1 Message Date
Lucas Armand 055e346c8c Send metrics on request start 2025-10-09 10:13:50 -07:00
2 changed files with 27 additions and 32 deletions
+1
View File
@@ -45,6 +45,7 @@ class Metrics:
self.model_metrics.workload_received += workload self.model_metrics.workload_received += workload
self.model_metrics.requests_recieved.add(reqnum) self.model_metrics.requests_recieved.add(reqnum)
self.model_metrics.requests_working.add(reqnum) self.model_metrics.requests_working.add(reqnum)
self.update_pending = True
def _request_end(self, workload: float, reqnum: int) -> None: def _request_end(self, workload: float, reqnum: int) -> None:
""" """
+25 -31
View File
@@ -33,38 +33,32 @@ log = logging.getLogger(__file__)
async def generate_client_response( async def generate_client_response(
client_request: web.Request, model_response: ClientResponse client_request: web.Request, model_response: ClientResponse
) -> Union[web.Response, web.StreamResponse]: ) -> Union[web.Response, web.StreamResponse]:
match model_response.status: # Check if the response is actually streaming based on response headers/content-type
case 200: is_streaming_response = (
log.debug("SUCCESS") model_response.content_type == "text/event-stream"
# Check if the response is actually streaming based on response headers/content-type or model_response.content_type == "application/x-ndjson"
is_streaming_response = ( or model_response.headers.get("Transfer-Encoding") == "chunked"
model_response.content_type == "text/event-stream" or "stream" in model_response.content_type.lower()
or model_response.content_type == "application/x-ndjson" )
or model_response.headers.get("Transfer-Encoding") == "chunked"
or "stream" in model_response.content_type.lower()
)
if is_streaming_response: if is_streaming_response:
log.debug("Detected streaming response...") log.debug("Detected streaming response...")
res = web.StreamResponse() res = web.StreamResponse()
res.content_type = model_response.content_type res.content_type = model_response.content_type
await res.prepare(client_request) await res.prepare(client_request)
async for chunk in model_response.content: async for chunk in model_response.content:
await res.write(chunk) await res.write(chunk)
await res.write_eof() await res.write_eof()
log.debug("Done streaming response") log.debug("Done streaming response")
return res return res
else: else:
log.debug("Detected non-streaming response...") log.debug("Detected non-streaming response...")
content = await model_response.read() content = await model_response.read()
return web.Response( return web.Response(
body=content, body=content,
status=model_response.status, status=model_response.status,
content_type=model_response.content_type content_type=model_response.content_type
) )
case code:
log.debug(f"Model responded with error {code}")
return web.Response(status=code)
@dataclasses.dataclass @dataclasses.dataclass