for benchmarking, use concurrent requests

This commit is contained in:
Nader Arbabian
2025-08-11 12:14:32 -07:00
parent 52ac4c0c1a
commit eecefd1d52
+38 -27
View File
@@ -280,41 +280,52 @@ class Backend:
return float(f.readline()) return float(f.readline())
except FileNotFoundError: except FileNotFoundError:
pass pass
log.debug("Initial run to trigger model loading...")
payload = self.benchmark_handler.make_benchmark_payload()
await self.__call_api(handler=self.benchmark_handler, payload=payload)
max_throughput = 0 max_throughput = 0
last_throughput = 0
sum_throughput = 0 sum_throughput = 0
for run in range(self.benchmark_handler.benchmark_runs + 1): concurrent_requests = 10 if self.allow_parallel_requests else 1
for run in range(1, self.benchmark_handler.benchmark_runs + 1):
start = time.time() start = time.time()
payload = self.benchmark_handler.make_benchmark_payload() tasks = []
res = await self.__call_api( total_workload = 0
handler=self.benchmark_handler, payload=payload
) for _ in range(concurrent_requests):
data = await res.json() payload = self.benchmark_handler.make_benchmark_payload()
time_elapsed = time.time() - start total_workload += payload.count_workload()
# first run triggers one-time loading of the model which is very slow, so we skip counting it tasks.append(
if run == 0: self.__call_api(handler=self.benchmark_handler, payload=payload)
continue
else:
workload = payload.count_workload()
last_throughput = workload / time_elapsed
sum_throughput += last_throughput
max_throughput = max(max_throughput, last_throughput)
log.debug(
"\n".join(
[
"#" * 60,
f"Run: {run}, workload: {workload} time_elapsed: {time_elapsed}, throughput: {last_throughput}",
"",
f"response: {data}",
"#" * 60,
]
)
) )
responses = await gather(*tasks)
time_elapsed = time.time() - start
throughput = total_workload / time_elapsed
sum_throughput += throughput
max_throughput = max(max_throughput, throughput)
# Log results for debugging
log.debug(
"\n".join(
[
"#" * 60,
f"Run: {run}, concurrent_requests: {concurrent_requests}",
f"Total workload: {total_workload}, time_elapsed: {time_elapsed}s",
f"Throughput: {throughput} workload/s",
f"Successful responses: {len([r for r in responses if r.status == 200])}",
"#" * 60,
]
)
)
average_throughput = sum_throughput / self.benchmark_handler.benchmark_runs average_throughput = sum_throughput / self.benchmark_handler.benchmark_runs
log.debug( log.debug(
f"benchmark result: avg {average_throughput} workload per second, max {max_throughput}" f"benchmark result: avg {average_throughput} workload per second, max {max_throughput}"
) )
# save max_throughput so we don't have to run benchmark again on restart of cold instances
with open(BENCHMARK_INDICATOR_FILE, "w") as f: with open(BENCHMARK_INDICATOR_FILE, "w") as f:
f.write(str(max_throughput)) f.write(str(max_throughput))
return max_throughput return max_throughput