Compare commits

...

18 Commits

Author SHA1 Message Date
Mikhail Yevchenko 7df01c41b4 Update log action configuration to specify model loading message 2026-06-09 12:08:10 +03:00
Mikhail Yevchenko 885064fba6 Update docker-in-docker feature version to 3.0.1 in devcontainer configuration 2026-05-25 08:13:15 +00:00
Mikhail Yevchenko e8e04fe8bc Remove commented out info log actions from log configuration 2026-05-21 19:55:13 +00:00
Mikhail Yevchenko 6cb3acdd64 Add logging import and set logger level to WARNING 2026-05-21 19:50:21 +00:00
Mikhail Yevchenko 586ccbff1b Update log action configuration to specify detailed error and info messages 2026-05-21 19:47:16 +00:00
Mikhail Yevchenko bcc6b62277 Update log action configuration to enable error and info logging 2026-05-21 19:40:47 +00:00
Mikhail Yevchenko 3285d9118f Enhance completions benchmark generator to extract words from a fallback Perl copyright file 2026-05-21 19:33:41 +00:00
Mikhail Yevchenko f77d943d79 Refactor log message handling and improve word extraction in completions benchmark 2026-05-21 19:25:09 +00:00
Mikhail Yevchenko 976622a594 Remove nltk dep 2026-05-21 19:11:53 +00:00
Mikhail Yevchenko 0b47ef80fb Remove version specifications for vastai-sdk and nltk in requirements.txt 2026-05-21 19:10:13 +00:00
Mikhail Yevchenko 3898a8a651 Update model server URL to remove port specification 2026-05-21 18:50:41 +00:00
Mikhail Yevchenko 170571714f Add log message for model load event in Ollama configuration 2026-05-21 15:27:28 +00:00
Mikhail Yevchenko 81347ab8a0 Remove placeholder log messages for model load, error, and info 2026-05-21 15:11:30 +00:00
Mikhail Yevchenko 6bb0097829 Refactor model configuration and update log messages for Ollama 2026-05-21 15:11:25 +00:00
Mikhail Yevchenko 1cea6fbd2d Update model server URL and port configuration 2026-05-20 13:34:45 +00:00
Mikhail Yevchenko 40db98915f Add devcontainer configuration for Vast.ai serverless Ollama template 2026-05-18 20:38:26 +00:00
Mikhail Yevchenko 94926b74b6 Add log message for server listening status 2026-05-18 19:42:40 +00:00
Mikhail Yevchenko d0347b0755 Update log file path and enhance load log messages 2026-05-18 18:41:14 +00:00
3 changed files with 66 additions and 28 deletions
+37
View File
@@ -0,0 +1,37 @@
// .devcontainer/devcontainer.json
// Dev container for the Vast.ai serverless Ollama template.
// Includes Docker-in-Docker so you can build and test images from inside the container.
{
"name": "vast.ai-serverless-ollama",
"image": "mcr.microsoft.com/devcontainers/base:trixie",
"features": {
"ghcr.io/devcontainers/features/python:1": {
"installTools": true,
"version": "3.12"
},
"ghcr.io/devcontainers/features/docker-in-docker:3.0.1": {
"moby": false,
"version": "latest",
"installDockerBuildx": true,
"dockerDashComposeVersion": "v2"
}
},
"runArgs": ["--privileged"],
"containerEnv": {
"DOCKER_BUILDKIT": "1"
},
"postCreateCommand": "python3 -m pip install --user --upgrade pip && python3 -m pip install --user -r requirements.txt pyyaml",
"customizations": {
"vscode": {
"extensions": [
"ms-python.python",
"ms-azuretools.vscode-docker"
],
"settings": {
"python.defaultInterpreterPath": "/usr/bin/python3",
"terminal.integrated.defaultProfile.linux": "bash",
"docker.showStartPage": false
}
}
}
}
+1 -2
View File
@@ -1,2 +1 @@
vastai-sdk>=0.3.0 vastai-sdk
nltk==3.9.4
+28 -26
View File
@@ -1,33 +1,19 @@
import nltk
import random import random
import os import os
import re
import logging
from vastai import Worker, WorkerConfig, HandlerConfig, LogActionConfig, BenchmarkConfig from vastai import Worker, WorkerConfig, HandlerConfig, LogActionConfig, BenchmarkConfig
# vLLM model configuration logging.getLogger().setLevel(logging.WARNING) # Only show warnings and errors
# Ollama model configuration
MODEL_SERVER_URL = 'http://127.0.0.1' MODEL_SERVER_URL = 'http://127.0.0.1'
MODEL_SERVER_PORT = 18000 MODEL_SERVER_PORT = 11434
MODEL_LOG_FILE = '/var/log/portal/vllm.log' MODEL_LOG_FILE = '/var/log/onstart.log'
MODEL_HEALTHCHECK_ENDPOINT = "/health" MODEL_HEALTHCHECK_ENDPOINT = "/"
# vLLM-specific log messages
MODEL_LOAD_LOG_MSG = [
"Application startup complete.",
]
MODEL_ERROR_LOG_MSGS = [
"INFO exited: vllm",
"RuntimeError: Engine",
"Traceback (most recent call last):"
]
MODEL_INFO_LOG_MSGS = [
'"message":"Download'
]
nltk.download("words")
WORD_LIST = nltk.corpus.words.words()
# Ollama-specific log messages
def request_parser(request): def request_parser(request):
data = request data = request
if request.get("input") is not None: if request.get("input") is not None:
@@ -36,8 +22,24 @@ def request_parser(request):
def completions_benchmark_generator() -> dict: def completions_benchmark_generator() -> dict:
# extract words from the python source code of the worker to create a list of words for generating prompts
WORD_LIST = []
# Try to load from perl copyright file first
try:
with open("/usr/share/doc/perl/copyright", 'r') as f:
source_code = f.read()
WORD_LIST = re.findall(r'\b\w+\b', source_code)
except (FileNotFoundError, IOError):
# Fallback to loading from python file
with open(__file__, 'r') as f:
source_code = f.read()
WORD_LIST = re.findall(r'\b\w+\b', source_code)
prompt = " ".join(random.choices(WORD_LIST, k=int(250))) prompt = " ".join(random.choices(WORD_LIST, k=int(250)))
model = os.environ.get("MODEL_NAME") model = os.environ.get("MODEL_NAME")
if not model: if not model:
raise ValueError("MODEL_NAME environment variable not set") raise ValueError("MODEL_NAME environment variable not set")
@@ -77,9 +79,9 @@ worker_config = WorkerConfig(
) )
], ],
log_action_config=LogActionConfig( log_action_config=LogActionConfig(
on_load=MODEL_LOAD_LOG_MSG, on_load=["llama_server: model loaded"],
on_error=MODEL_ERROR_LOG_MSGS, on_error=["Traceback (most recent call last):","Error:"],
on_info=MODEL_INFO_LOG_MSGS #on_info=["load_tensors:","llama_context:","print_info:","llama_model_loader:"]
) )
) )