Prevent load time race

2025-10-27 18:25:21 -07:00
parent 9c795e2a01
commit 22bca74087
2 changed files with 16 additions and 9 deletions
@@ -190,10 +190,11 @@ class SystemMetrics:
        self.additional_disk_usage = disk_usage - self.last_disk_usage
        self.last_disk_usage = disk_usage

-    def reset(self):
+    def reset(self, expected: float | None) -> None:
        # autoscaler excepts model_loading_time to be populated only once, when the instance has
        # finished benchmarking and is ready to receive requests. This applies to restarted instances
        # as well: they should send model_loading_time once when they are done loading
+        if self.model_loading_time == expected:
            self.model_loading_time = None


@@ -200,11 +200,13 @@ class Metrics:

    async def __send_metrics_and_reset(self):

+        loadtime_snapshot = self.system_metrics.model_loading_time
+
        def compute_autoscaler_data() -> AutoScalerData:
            return AutoScalerData(
                id=self.id,
                version=self.version,
-                loadtime=(self.system_metrics.model_loading_time or 0.0),
+                loadtime=(loadtime_snapshot or 0.0), 
                new_load=self.model_metrics.workload_processing,
                cur_load=self.model_metrics.cur_load,
                rej_load=self.model_metrics.workload_rejected,
@@ -252,11 +254,15 @@ class Metrics:

        self.system_metrics.update_disk_usage()

+        sent = False
        for report_addr in self.report_addr:
-            success = await send_data(report_addr)
-            if success is True:
+            if await send_data(report_addr):
+                sent = True
                break
+
+        if sent:
+            # clear the one-shot loadtime only if we actually sent *this* value
+            self.system_metrics.reset(expected=loadtime_snapshot)
            self.update_pending = False
            self.model_metrics.reset()
-        self.system_metrics.reset()
            self.last_metric_update = time.time()