From c595b424100bbbabafb6cf9da22dd3a981ac9478 Mon Sep 17 00:00:00 2001 From: Nader Arbabian Date: Mon, 11 Aug 2025 12:39:28 -0700 Subject: [PATCH] for benchmarking, use concurrent requests (#26) --- lib/backend.py | 65 +++++++++++++++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 27 deletions(-) diff --git a/lib/backend.py b/lib/backend.py index 77147a6..51f71cd 100644 --- a/lib/backend.py +++ b/lib/backend.py @@ -280,41 +280,52 @@ class Backend: return float(f.readline()) except FileNotFoundError: pass + + log.debug("Initial run to trigger model loading...") + payload = self.benchmark_handler.make_benchmark_payload() + await self.__call_api(handler=self.benchmark_handler, payload=payload) + max_throughput = 0 - last_throughput = 0 sum_throughput = 0 - for run in range(self.benchmark_handler.benchmark_runs + 1): + concurrent_requests = 10 if self.allow_parallel_requests else 1 + + for run in range(1, self.benchmark_handler.benchmark_runs + 1): start = time.time() - payload = self.benchmark_handler.make_benchmark_payload() - res = await self.__call_api( - handler=self.benchmark_handler, payload=payload - ) - data = await res.json() - time_elapsed = time.time() - start - # first run triggers one-time loading of the model which is very slow, so we skip counting it - if run == 0: - continue - else: - workload = payload.count_workload() - last_throughput = workload / time_elapsed - sum_throughput += last_throughput - max_throughput = max(max_throughput, last_throughput) - log.debug( - "\n".join( - [ - "#" * 60, - f"Run: {run}, workload: {workload} time_elapsed: {time_elapsed}, throughput: {last_throughput}", - "", - f"response: {data}", - "#" * 60, - ] - ) + tasks = [] + total_workload = 0 + + for _ in range(concurrent_requests): + payload = self.benchmark_handler.make_benchmark_payload() + total_workload += payload.count_workload() + tasks.append( + self.__call_api(handler=self.benchmark_handler, payload=payload) ) + + responses = await gather(*tasks) + time_elapsed = time.time() - start + + throughput = total_workload / time_elapsed + sum_throughput += throughput + max_throughput = max(max_throughput, throughput) + + # Log results for debugging + log.debug( + "\n".join( + [ + "#" * 60, + f"Run: {run}, concurrent_requests: {concurrent_requests}", + f"Total workload: {total_workload}, time_elapsed: {time_elapsed}s", + f"Throughput: {throughput} workload/s", + f"Successful responses: {len([r for r in responses if r.status == 200])}", + "#" * 60, + ] + ) + ) + average_throughput = sum_throughput / self.benchmark_handler.benchmark_runs log.debug( f"benchmark result: avg {average_throughput} workload per second, max {max_throughput}" ) - # save max_throughput so we don't have to run benchmark again on restart of cold instances with open(BENCHMARK_INDICATOR_FILE, "w") as f: f.write(str(max_throughput)) return max_throughput