Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 055e346c8c |
@@ -45,6 +45,7 @@ class Metrics:
|
|||||||
self.model_metrics.workload_received += workload
|
self.model_metrics.workload_received += workload
|
||||||
self.model_metrics.requests_recieved.add(reqnum)
|
self.model_metrics.requests_recieved.add(reqnum)
|
||||||
self.model_metrics.requests_working.add(reqnum)
|
self.model_metrics.requests_working.add(reqnum)
|
||||||
|
self.update_pending = True
|
||||||
|
|
||||||
def _request_end(self, workload: float, reqnum: int) -> None:
|
def _request_end(self, workload: float, reqnum: int) -> None:
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -33,38 +33,32 @@ log = logging.getLogger(__file__)
|
|||||||
async def generate_client_response(
|
async def generate_client_response(
|
||||||
client_request: web.Request, model_response: ClientResponse
|
client_request: web.Request, model_response: ClientResponse
|
||||||
) -> Union[web.Response, web.StreamResponse]:
|
) -> Union[web.Response, web.StreamResponse]:
|
||||||
match model_response.status:
|
# Check if the response is actually streaming based on response headers/content-type
|
||||||
case 200:
|
is_streaming_response = (
|
||||||
log.debug("SUCCESS")
|
model_response.content_type == "text/event-stream"
|
||||||
# Check if the response is actually streaming based on response headers/content-type
|
or model_response.content_type == "application/x-ndjson"
|
||||||
is_streaming_response = (
|
or model_response.headers.get("Transfer-Encoding") == "chunked"
|
||||||
model_response.content_type == "text/event-stream"
|
or "stream" in model_response.content_type.lower()
|
||||||
or model_response.content_type == "application/x-ndjson"
|
)
|
||||||
or model_response.headers.get("Transfer-Encoding") == "chunked"
|
|
||||||
or "stream" in model_response.content_type.lower()
|
|
||||||
)
|
|
||||||
|
|
||||||
if is_streaming_response:
|
if is_streaming_response:
|
||||||
log.debug("Detected streaming response...")
|
log.debug("Detected streaming response...")
|
||||||
res = web.StreamResponse()
|
res = web.StreamResponse()
|
||||||
res.content_type = model_response.content_type
|
res.content_type = model_response.content_type
|
||||||
await res.prepare(client_request)
|
await res.prepare(client_request)
|
||||||
async for chunk in model_response.content:
|
async for chunk in model_response.content:
|
||||||
await res.write(chunk)
|
await res.write(chunk)
|
||||||
await res.write_eof()
|
await res.write_eof()
|
||||||
log.debug("Done streaming response")
|
log.debug("Done streaming response")
|
||||||
return res
|
return res
|
||||||
else:
|
else:
|
||||||
log.debug("Detected non-streaming response...")
|
log.debug("Detected non-streaming response...")
|
||||||
content = await model_response.read()
|
content = await model_response.read()
|
||||||
return web.Response(
|
return web.Response(
|
||||||
body=content,
|
body=content,
|
||||||
status=model_response.status,
|
status=model_response.status,
|
||||||
content_type=model_response.content_type
|
content_type=model_response.content_type
|
||||||
)
|
)
|
||||||
case code:
|
|
||||||
log.debug(f"Model responded with error {code}")
|
|
||||||
return web.Response(status=code)
|
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass
|
@dataclasses.dataclass
|
||||||
|
|||||||
Reference in New Issue
Block a user