Compare commits

...

20 Commits

Author SHA1 Message Date
Mikhail Yevchenko 7df01c41b4 Update log action configuration to specify model loading message 2026-06-09 12:08:10 +03:00
Mikhail Yevchenko 885064fba6 Update docker-in-docker feature version to 3.0.1 in devcontainer configuration 2026-05-25 08:13:15 +00:00
Mikhail Yevchenko e8e04fe8bc Remove commented out info log actions from log configuration 2026-05-21 19:55:13 +00:00
Mikhail Yevchenko 6cb3acdd64 Add logging import and set logger level to WARNING 2026-05-21 19:50:21 +00:00
Mikhail Yevchenko 586ccbff1b Update log action configuration to specify detailed error and info messages 2026-05-21 19:47:16 +00:00
Mikhail Yevchenko bcc6b62277 Update log action configuration to enable error and info logging 2026-05-21 19:40:47 +00:00
Mikhail Yevchenko 3285d9118f Enhance completions benchmark generator to extract words from a fallback Perl copyright file 2026-05-21 19:33:41 +00:00
Mikhail Yevchenko f77d943d79 Refactor log message handling and improve word extraction in completions benchmark 2026-05-21 19:25:09 +00:00
Mikhail Yevchenko 976622a594 Remove nltk dep 2026-05-21 19:11:53 +00:00
Mikhail Yevchenko 0b47ef80fb Remove version specifications for vastai-sdk and nltk in requirements.txt 2026-05-21 19:10:13 +00:00
Mikhail Yevchenko 3898a8a651 Update model server URL to remove port specification 2026-05-21 18:50:41 +00:00
Mikhail Yevchenko 170571714f Add log message for model load event in Ollama configuration 2026-05-21 15:27:28 +00:00
Mikhail Yevchenko 81347ab8a0 Remove placeholder log messages for model load, error, and info 2026-05-21 15:11:30 +00:00
Mikhail Yevchenko 6bb0097829 Refactor model configuration and update log messages for Ollama 2026-05-21 15:11:25 +00:00
Mikhail Yevchenko 1cea6fbd2d Update model server URL and port configuration 2026-05-20 13:34:45 +00:00
Mikhail Yevchenko 40db98915f Add devcontainer configuration for Vast.ai serverless Ollama template 2026-05-18 20:38:26 +00:00
Mikhail Yevchenko 94926b74b6 Add log message for server listening status 2026-05-18 19:42:40 +00:00
Mikhail Yevchenko d0347b0755 Update log file path and enhance load log messages 2026-05-18 18:41:14 +00:00
Lucas Armand 9bc9ba11c5 Increase TGI benchmark tokens to 500 2026-04-30 14:04:39 -07:00
LucasArmandVast 48fdc65e3d Update to vastai package (#84) 2026-04-14 10:41:31 -07:00
5 changed files with 82 additions and 44 deletions
+37
View File
@@ -0,0 +1,37 @@
// .devcontainer/devcontainer.json
// Dev container for the Vast.ai serverless Ollama template.
// Includes Docker-in-Docker so you can build and test images from inside the container.
{
"name": "vast.ai-serverless-ollama",
"image": "mcr.microsoft.com/devcontainers/base:trixie",
"features": {
"ghcr.io/devcontainers/features/python:1": {
"installTools": true,
"version": "3.12"
},
"ghcr.io/devcontainers/features/docker-in-docker:3.0.1": {
"moby": false,
"version": "latest",
"installDockerBuildx": true,
"dockerDashComposeVersion": "v2"
}
},
"runArgs": ["--privileged"],
"containerEnv": {
"DOCKER_BUILDKIT": "1"
},
"postCreateCommand": "python3 -m pip install --user --upgrade pip && python3 -m pip install --user -r requirements.txt pyyaml",
"customizations": {
"vscode": {
"extensions": [
"ms-python.python",
"ms-azuretools.vscode-docker"
],
"settings": {
"python.defaultInterpreterPath": "/usr/bin/python3",
"terminal.integrated.defaultProfile.linux": "bash",
"docker.showStartPage": false
}
}
}
}
+1 -2
View File
@@ -1,2 +1 @@
vastai-sdk>=0.3.0 vastai-sdk
nltk==3.9.4
+15 -15
View File
@@ -60,32 +60,32 @@ function install_vastai_sdk() {
fi fi
if [ "$FORCE_UPDATE" = true ]; then if [ "$FORCE_UPDATE" = true ]; then
uv_flags+=(--force-reinstall) uv_flags+=(--force-reinstall)
echo "Force reinstalling vastai-sdk" echo "Force reinstalling vastai"
fi fi
# If SDK_BRANCH is set, install vastai-sdk from the vast-sdk repo at that branch/tag/commit. # If SDK_BRANCH is set, install vastai from the vast-cli repo at that branch/tag/commit.
if [ -n "${SDK_BRANCH:-}" ]; then if [ -n "${SDK_BRANCH:-}" ]; then
if [ -n "${SDK_VERSION:-}" ]; then if [ -n "${SDK_VERSION:-}" ]; then
echo "WARNING: Both SDK_BRANCH and SDK_VERSION are set; using SDK_BRANCH=${SDK_BRANCH}" echo "WARNING: Both SDK_BRANCH and SDK_VERSION are set; using SDK_BRANCH=${SDK_BRANCH}"
fi fi
echo "Installing vastai-sdk from https://github.com/vast-ai/vast-sdk/ @ ${SDK_BRANCH}" echo "Installing vastai from https://github.com/vast-ai/vast-cli/ @ ${SDK_BRANCH}"
if ! uv pip install "${uv_flags[@]}" "vastai-sdk @ git+https://github.com/vast-ai/vast-sdk.git@${SDK_BRANCH}"; then if ! uv pip install "${uv_flags[@]}" "vastai @ git+https://github.com/vast-ai/vast-cli.git@${SDK_BRANCH}"; then
report_error_and_exit "Failed to install vastai-sdk from vast-ai/vast-sdk@${SDK_BRANCH}" report_error_and_exit "Failed to install vastai from vast-ai/vast-cli@${SDK_BRANCH}"
fi fi
return 0 return 0
fi fi
if [ -n "${SDK_VERSION:-}" ]; then if [ -n "${SDK_VERSION:-}" ]; then
echo "Installing vastai-sdk version ${SDK_VERSION}" echo "Installing vastai version ${SDK_VERSION}"
if ! uv pip install "${uv_flags[@]}" "vastai-sdk==${SDK_VERSION}"; then if ! uv pip install "${uv_flags[@]}" "vastai==${SDK_VERSION}"; then
report_error_and_exit "Failed to install vastai-sdk==${SDK_VERSION}" report_error_and_exit "Failed to install vastai==${SDK_VERSION}"
fi fi
return 0 return 0
fi fi
echo "Installing default vastai-sdk" echo "Installing default vastai"
if ! uv pip install "${uv_flags[@]}" vastai-sdk; then if ! uv pip install "${uv_flags[@]}" vastai; then
report_error_and_exit "Failed to install vastai-sdk" report_error_and_exit "Failed to install vastai"
fi fi
} }
@@ -339,19 +339,19 @@ set +e
PY_STATUS=1 PY_STATUS=1
if [ -f "$SERVER_DIR/worker.py" ]; then if [ -f "$SERVER_DIR/worker.py" ]; then
echo "trying worker.py" echo "Running worker.py"
python3 -m "worker" |& tee -a "$PYWORKER_LOG" python3 -m "worker" |& tee -a "$PYWORKER_LOG"
PY_STATUS=${PIPESTATUS[0]} PY_STATUS=${PIPESTATUS[0]}
fi fi
if [ "${PY_STATUS}" -ne 0 ] && [ -f "$SERVER_DIR/workers/$BACKEND/worker.py" ]; then if [ "${PY_STATUS}" -ne 0 ] && [ -f "$SERVER_DIR/workers/$BACKEND/worker.py" ]; then
echo "trying workers.${BACKEND}.worker" echo "Running workers.${BACKEND}.worker"
python3 -m "workers.${BACKEND}.worker" |& tee -a "$PYWORKER_LOG" python3 -m "workers.${BACKEND}.worker" |& tee -a "$PYWORKER_LOG"
PY_STATUS=${PIPESTATUS[0]} PY_STATUS=${PIPESTATUS[0]}
fi fi
if [ "${PY_STATUS}" -ne 0 ] && [ -f "$SERVER_DIR/workers/$BACKEND/server.py" ]; then if [ "${PY_STATUS}" -ne 0 ] && [ -f "$SERVER_DIR/workers/$BACKEND/server.py" ]; then
echo "trying workers.${BACKEND}.server" echo "Running workers.${BACKEND}.server"
python3 -m "workers.${BACKEND}.server" |& tee -a "$PYWORKER_LOG" python3 -m "workers.${BACKEND}.server" |& tee -a "$PYWORKER_LOG"
PY_STATUS=${PIPESTATUS[0]} PY_STATUS=${PIPESTATUS[0]}
fi fi
@@ -365,4 +365,4 @@ if [ "${PY_STATUS}" -ne 0 ]; then
report_error_and_exit "PyWorker exited with status ${PY_STATUS}" report_error_and_exit "PyWorker exited with status ${PY_STATUS}"
fi fi
echo "launching PyWorker server done" echo "PyWorker bootstrap complete"
+28 -26
View File
@@ -1,33 +1,19 @@
import nltk
import random import random
import os import os
import re
import logging
from vastai import Worker, WorkerConfig, HandlerConfig, LogActionConfig, BenchmarkConfig from vastai import Worker, WorkerConfig, HandlerConfig, LogActionConfig, BenchmarkConfig
# vLLM model configuration logging.getLogger().setLevel(logging.WARNING) # Only show warnings and errors
# Ollama model configuration
MODEL_SERVER_URL = 'http://127.0.0.1' MODEL_SERVER_URL = 'http://127.0.0.1'
MODEL_SERVER_PORT = 18000 MODEL_SERVER_PORT = 11434
MODEL_LOG_FILE = '/var/log/portal/vllm.log' MODEL_LOG_FILE = '/var/log/onstart.log'
MODEL_HEALTHCHECK_ENDPOINT = "/health" MODEL_HEALTHCHECK_ENDPOINT = "/"
# vLLM-specific log messages
MODEL_LOAD_LOG_MSG = [
"Application startup complete.",
]
MODEL_ERROR_LOG_MSGS = [
"INFO exited: vllm",
"RuntimeError: Engine",
"Traceback (most recent call last):"
]
MODEL_INFO_LOG_MSGS = [
'"message":"Download'
]
nltk.download("words")
WORD_LIST = nltk.corpus.words.words()
# Ollama-specific log messages
def request_parser(request): def request_parser(request):
data = request data = request
if request.get("input") is not None: if request.get("input") is not None:
@@ -36,8 +22,24 @@ def request_parser(request):
def completions_benchmark_generator() -> dict: def completions_benchmark_generator() -> dict:
# extract words from the python source code of the worker to create a list of words for generating prompts
WORD_LIST = []
# Try to load from perl copyright file first
try:
with open("/usr/share/doc/perl/copyright", 'r') as f:
source_code = f.read()
WORD_LIST = re.findall(r'\b\w+\b', source_code)
except (FileNotFoundError, IOError):
# Fallback to loading from python file
with open(__file__, 'r') as f:
source_code = f.read()
WORD_LIST = re.findall(r'\b\w+\b', source_code)
prompt = " ".join(random.choices(WORD_LIST, k=int(250))) prompt = " ".join(random.choices(WORD_LIST, k=int(250)))
model = os.environ.get("MODEL_NAME") model = os.environ.get("MODEL_NAME")
if not model: if not model:
raise ValueError("MODEL_NAME environment variable not set") raise ValueError("MODEL_NAME environment variable not set")
@@ -77,9 +79,9 @@ worker_config = WorkerConfig(
) )
], ],
log_action_config=LogActionConfig( log_action_config=LogActionConfig(
on_load=MODEL_LOAD_LOG_MSG, on_load=["llama_server: model loaded"],
on_error=MODEL_ERROR_LOG_MSGS, on_error=["Traceback (most recent call last):","Error:"],
on_info=MODEL_INFO_LOG_MSGS #on_info=["load_tensors:","llama_context:","print_info:","llama_model_loader:"]
) )
) )
+1 -1
View File
@@ -35,7 +35,7 @@ def benchmark_generator() -> dict:
benchmark_data = { benchmark_data = {
"inputs": prompt, "inputs": prompt,
"parameters": { "parameters": {
"max_new_tokens": 128, "max_new_tokens": 500,
"temperature": 0.7, "temperature": 0.7,
"return_full_text": False "return_full_text": False
} }