From 1cedb28acf318fcf84337aa905f2fe7da78e9685 Mon Sep 17 00:00:00 2001 From: Lucas Armand Date: Wed, 8 Oct 2025 16:54:18 -0700 Subject: [PATCH 1/2] Removed division by elapsed time, since autoscaler cur_load in units of workload --- lib/metrics.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/metrics.py b/lib/metrics.py index 166706b..5152d73 100644 --- a/lib/metrics.py +++ b/lib/metrics.py @@ -78,10 +78,10 @@ class Metrics: elapsed = time.time() - self.last_metric_update if self.system_metrics.model_is_loaded is False and elapsed >= 10: log.debug(f"sending loading model metrics after {int(elapsed)}s wait") - self.__send_metrics_and_reset(elapsed) + self.__send_metrics_and_reset() elif self.update_pending or elapsed > 10: log.debug(f"sending loaded model metrics after {int(elapsed)}s wait") - self.__send_metrics_and_reset(elapsed) + self.__send_metrics_and_reset() def _model_loaded(self, max_throughput: float) -> None: self.system_metrics.model_loading_time = ( @@ -96,13 +96,13 @@ class Metrics: #######################################Private####################################### - def __send_metrics_and_reset(self, elapsed): + def __send_metrics_and_reset(self): def compute_autoscaler_data() -> AutoScalaerData: return AutoScalaerData( id=self.id, loadtime=(self.system_metrics.model_loading_time or 0.0), - cur_load=(self.model_metrics.workload_processing / elapsed), + cur_load=(self.model_metrics.workload_processing), max_perf=self.model_metrics.max_throughput, cur_perf=self.model_metrics.cur_perf, error_msg=self.model_metrics.error_msg or "", From 055e346c8c63513c5016403e9d25255754b02fdc Mon Sep 17 00:00:00 2001 From: Lucas Armand Date: Thu, 9 Oct 2025 10:13:50 -0700 Subject: [PATCH 2/2] Send metrics on request start --- lib/metrics.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/metrics.py b/lib/metrics.py index 166706b..8bce1ff 100644 --- a/lib/metrics.py +++ b/lib/metrics.py @@ -45,6 +45,7 @@ class Metrics: self.model_metrics.workload_received += workload self.model_metrics.requests_recieved.add(reqnum) self.model_metrics.requests_working.add(reqnum) + self.update_pending = True def _request_end(self, workload: float, reqnum: int) -> None: """