Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 814c3acd4c | |||
| 22bca74087 |
@@ -91,17 +91,6 @@ class Backend:
|
||||
timeout = ClientTimeout(total=None)
|
||||
return ClientSession(self.model_server_url, timeout=timeout, connector=connector)
|
||||
|
||||
async def _worker(self):
|
||||
while True:
|
||||
handler, request, fut = await self.request_queue.get()
|
||||
try:
|
||||
res = await self.__process_request(handler, request)
|
||||
fut.set_result(res)
|
||||
except Exception as e:
|
||||
fut.set_exception(e)
|
||||
finally:
|
||||
self.request_queue.task_done()
|
||||
|
||||
def create_handler(
|
||||
self,
|
||||
handler: EndpointHandler[ApiPayload_T],
|
||||
|
||||
+2
-1
@@ -190,10 +190,11 @@ class SystemMetrics:
|
||||
self.additional_disk_usage = disk_usage - self.last_disk_usage
|
||||
self.last_disk_usage = disk_usage
|
||||
|
||||
def reset(self):
|
||||
def reset(self, expected: float | None) -> None:
|
||||
# autoscaler excepts model_loading_time to be populated only once, when the instance has
|
||||
# finished benchmarking and is ready to receive requests. This applies to restarted instances
|
||||
# as well: they should send model_loading_time once when they are done loading
|
||||
if self.model_loading_time == expected:
|
||||
self.model_loading_time = None
|
||||
|
||||
|
||||
|
||||
+10
-4
@@ -200,11 +200,13 @@ class Metrics:
|
||||
|
||||
async def __send_metrics_and_reset(self):
|
||||
|
||||
loadtime_snapshot = self.system_metrics.model_loading_time
|
||||
|
||||
def compute_autoscaler_data() -> AutoScalerData:
|
||||
return AutoScalerData(
|
||||
id=self.id,
|
||||
version=self.version,
|
||||
loadtime=(self.system_metrics.model_loading_time or 0.0),
|
||||
loadtime=(loadtime_snapshot or 0.0),
|
||||
new_load=self.model_metrics.workload_processing,
|
||||
cur_load=self.model_metrics.cur_load,
|
||||
rej_load=self.model_metrics.workload_rejected,
|
||||
@@ -252,11 +254,15 @@ class Metrics:
|
||||
|
||||
self.system_metrics.update_disk_usage()
|
||||
|
||||
sent = False
|
||||
for report_addr in self.report_addr:
|
||||
success = await send_data(report_addr)
|
||||
if success is True:
|
||||
if await send_data(report_addr):
|
||||
sent = True
|
||||
break
|
||||
|
||||
if sent:
|
||||
# clear the one-shot loadtime only if we actually sent *this* value
|
||||
self.system_metrics.reset(expected=loadtime_snapshot)
|
||||
self.update_pending = False
|
||||
self.model_metrics.reset()
|
||||
self.system_metrics.reset()
|
||||
self.last_metric_update = time.time()
|
||||
|
||||
Reference in New Issue
Block a user