From 22bca74087df00161c622c046619851361fa93df Mon Sep 17 00:00:00 2001 From: Lucas Armand Date: Mon, 27 Oct 2025 18:25:21 -0700 Subject: [PATCH] Prevent load time race --- lib/data_types.py | 5 +++-- lib/metrics.py | 20 +++++++++++++------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/lib/data_types.py b/lib/data_types.py index af1bbd5..ceadfed 100644 --- a/lib/data_types.py +++ b/lib/data_types.py @@ -190,11 +190,12 @@ class SystemMetrics: self.additional_disk_usage = disk_usage - self.last_disk_usage self.last_disk_usage = disk_usage - def reset(self): + def reset(self, expected: float | None) -> None: # autoscaler excepts model_loading_time to be populated only once, when the instance has # finished benchmarking and is ready to receive requests. This applies to restarted instances # as well: they should send model_loading_time once when they are done loading - self.model_loading_time = None + if self.model_loading_time == expected: + self.model_loading_time = None @dataclass diff --git a/lib/metrics.py b/lib/metrics.py index de5490b..6cb9c4f 100644 --- a/lib/metrics.py +++ b/lib/metrics.py @@ -200,11 +200,13 @@ class Metrics: async def __send_metrics_and_reset(self): + loadtime_snapshot = self.system_metrics.model_loading_time + def compute_autoscaler_data() -> AutoScalerData: return AutoScalerData( id=self.id, version=self.version, - loadtime=(self.system_metrics.model_loading_time or 0.0), + loadtime=(loadtime_snapshot or 0.0), new_load=self.model_metrics.workload_processing, cur_load=self.model_metrics.cur_load, rej_load=self.model_metrics.workload_rejected, @@ -252,11 +254,15 @@ class Metrics: self.system_metrics.update_disk_usage() + sent = False for report_addr in self.report_addr: - success = await send_data(report_addr) - if success is True: + if await send_data(report_addr): + sent = True break - self.update_pending = False - self.model_metrics.reset() - self.system_metrics.reset() - self.last_metric_update = time.time() + + if sent: + # clear the one-shot loadtime only if we actually sent *this* value + self.system_metrics.reset(expected=loadtime_snapshot) + self.update_pending = False + self.model_metrics.reset() + self.last_metric_update = time.time()