Compare commits

..

2 Commits

Author SHA1 Message Date
Lucas Armand c05131cd14 explicit not None check 2025-12-15 19:55:43 -08:00
Lucas Armand ebaf3b6d3a Add fix 2025-12-15 19:51:27 -08:00
3 changed files with 8 additions and 38 deletions
-29
View File
@@ -46,33 +46,6 @@ JSON
exit 1
}
function install_vastai_sdk() {
# If SDK_BRANCH is set, install vastai-sdk from the vast-sdk repo at that branch/tag/commit.
if [ -n "${SDK_BRANCH:-}" ]; then
if [ -n "${SDK_VERSION:-}" ]; then
echo "WARNING: Both SDK_BRANCH and SDK_VERSION are set; using SDK_BRANCH=${SDK_BRANCH}"
fi
echo "Installing vastai-sdk from https://github.com/vast-ai/vast-sdk/ @ ${SDK_BRANCH}"
if ! uv pip install "vastai-sdk @ git+https://github.com/vast-ai/vast-sdk.git@${SDK_BRANCH}"; then
report_error_and_exit "Failed to install vastai-sdk from vast-ai/vast-sdk@${SDK_BRANCH}"
fi
return 0
fi
if [ -n "${SDK_VERSION:-}" ]; then
echo "Installing vastai-sdk version ${SDK_VERSION}"
if ! uv pip install "vastai-sdk==${SDK_VERSION}"; then
report_error_and_exit "Failed to install vastai-sdk==${SDK_VERSION}"
fi
return 0
fi
echo "Installing default vastai-sdk"
if ! uv pip install vastai-sdk; then
report_error_and_exit "Failed to install vastai-sdk"
fi
}
[ -n "$BACKEND" ] && [ -z "$HF_TOKEN" ] && report_error_and_exit "HF_TOKEN must be set when BACKEND is set!"
[ -z "$CONTAINER_ID" ] && report_error_and_exit "CONTAINER_ID must be set!"
[ "$BACKEND" = "comfyui" ] && [ -z "$COMFY_MODEL" ] && report_error_and_exit "For comfyui backends, COMFY_MODEL must be set!"
@@ -150,8 +123,6 @@ then
report_error_and_exit "Failed to install Python requirements"
fi
install_vastai_sdk
if ! touch ~/.no_auto_tmux; then
report_error_and_exit "Failed to create ~/.no_auto_tmux"
fi
+5 -5
View File
@@ -60,20 +60,20 @@ worker_config = WorkerConfig(
route="/v1/completions",
workload_calculator= lambda data: data.get("max_tokens", 0),
allow_parallel_requests=True,
max_queue_time=60.0,
request_parser=request_parser,
max_queue_time=600.0,
benchmark_config=BenchmarkConfig(
generator=completions_benchmark_generator,
concurrency=10,
runs=3
concurrency=100,
runs=2
)
),
HandlerConfig(
route="/v1/chat/completions",
workload_calculator= lambda data: data.get("max_tokens", 0),
allow_parallel_requests=True,
request_parser=request_parser,
max_queue_time=600.0,
max_queue_time=60.0,
request_parser=request_parser
)
],
log_action_config=LogActionConfig(
+3 -4
View File
@@ -52,18 +52,17 @@ worker_config = WorkerConfig(
HandlerConfig(
route="/generate",
allow_parallel_requests=True,
max_queue_time=600.0,
max_queue_time=60.0,
benchmark_config=BenchmarkConfig(
generator=benchmark_generator,
concurrency=10,
runs=3
concurrency=50
),
workload_calculator= lambda x: x["parameters"]["max_new_tokens"]
),
HandlerConfig(
route="/generate_stream",
allow_parallel_requests=True,
max_queue_time=600.0,
max_queue_time=60.0,
workload_calculator= lambda x: x["parameters"]["max_new_tokens"]
)
],