Compare commits

..

14 Commits

Author SHA1 Message Date
Mikhail Yevchenko 7df01c41b4 Update log action configuration to specify model loading message 2026-06-09 12:08:10 +03:00
Mikhail Yevchenko 885064fba6 Update docker-in-docker feature version to 3.0.1 in devcontainer configuration 2026-05-25 08:13:15 +00:00
Mikhail Yevchenko e8e04fe8bc Remove commented out info log actions from log configuration 2026-05-21 19:55:13 +00:00
Mikhail Yevchenko 6cb3acdd64 Add logging import and set logger level to WARNING 2026-05-21 19:50:21 +00:00
Mikhail Yevchenko 586ccbff1b Update log action configuration to specify detailed error and info messages 2026-05-21 19:47:16 +00:00
Mikhail Yevchenko bcc6b62277 Update log action configuration to enable error and info logging 2026-05-21 19:40:47 +00:00
Mikhail Yevchenko 3285d9118f Enhance completions benchmark generator to extract words from a fallback Perl copyright file 2026-05-21 19:33:41 +00:00
Mikhail Yevchenko f77d943d79 Refactor log message handling and improve word extraction in completions benchmark 2026-05-21 19:25:09 +00:00
Mikhail Yevchenko 976622a594 Remove nltk dep 2026-05-21 19:11:53 +00:00
Mikhail Yevchenko 0b47ef80fb Remove version specifications for vastai-sdk and nltk in requirements.txt 2026-05-21 19:10:13 +00:00
Mikhail Yevchenko 3898a8a651 Update model server URL to remove port specification 2026-05-21 18:50:41 +00:00
Mikhail Yevchenko 170571714f Add log message for model load event in Ollama configuration 2026-05-21 15:27:28 +00:00
Mikhail Yevchenko 81347ab8a0 Remove placeholder log messages for model load, error, and info 2026-05-21 15:11:30 +00:00
Mikhail Yevchenko 6bb0097829 Refactor model configuration and update log messages for Ollama 2026-05-21 15:11:25 +00:00
3 changed files with 29 additions and 31 deletions
+1 -1
View File
@@ -9,7 +9,7 @@
"installTools": true,
"version": "3.12"
},
"ghcr.io/devcontainers/features/docker-in-docker:3.0.0": {
"ghcr.io/devcontainers/features/docker-in-docker:3.0.1": {
"moby": false,
"version": "latest",
"installDockerBuildx": true,
+1 -2
View File
@@ -1,2 +1 @@
vastai-sdk>=0.3.0
nltk==3.9.4
vastai-sdk
+27 -28
View File
@@ -1,36 +1,19 @@
import nltk
import random
import os
import re
import logging
from vastai import Worker, WorkerConfig, HandlerConfig, LogActionConfig, BenchmarkConfig
# vLLM model configuration
MODEL_SERVER_URL = 'http://127.0.0.1:11434'
logging.getLogger().setLevel(logging.WARNING) # Only show warnings and errors
# Ollama model configuration
MODEL_SERVER_URL = 'http://127.0.0.1'
MODEL_SERVER_PORT = 11434
MODEL_LOG_FILE = '/var/log/onstart.log'
MODEL_HEALTHCHECK_ENDPOINT = "/health"
# vLLM-specific log messages
MODEL_LOAD_LOG_MSG = [
"Application startup complete.",
"llama runner started in",
"Server listening on",
"msg=\"Listening on",
]
MODEL_ERROR_LOG_MSGS = [
"INFO exited: vllm",
"RuntimeError: Engine",
"Traceback (most recent call last):"
]
MODEL_INFO_LOG_MSGS = [
'"message":"Download'
]
nltk.download("words")
WORD_LIST = nltk.corpus.words.words()
MODEL_HEALTHCHECK_ENDPOINT = "/"
# Ollama-specific log messages
def request_parser(request):
data = request
if request.get("input") is not None:
@@ -39,8 +22,24 @@ def request_parser(request):
def completions_benchmark_generator() -> dict:
# extract words from the python source code of the worker to create a list of words for generating prompts
WORD_LIST = []
# Try to load from perl copyright file first
try:
with open("/usr/share/doc/perl/copyright", 'r') as f:
source_code = f.read()
WORD_LIST = re.findall(r'\b\w+\b', source_code)
except (FileNotFoundError, IOError):
# Fallback to loading from python file
with open(__file__, 'r') as f:
source_code = f.read()
WORD_LIST = re.findall(r'\b\w+\b', source_code)
prompt = " ".join(random.choices(WORD_LIST, k=int(250)))
model = os.environ.get("MODEL_NAME")
if not model:
raise ValueError("MODEL_NAME environment variable not set")
@@ -80,9 +79,9 @@ worker_config = WorkerConfig(
)
],
log_action_config=LogActionConfig(
on_load=MODEL_LOAD_LOG_MSG,
on_error=MODEL_ERROR_LOG_MSGS,
on_info=MODEL_INFO_LOG_MSGS
on_load=["llama_server: model loaded"],
on_error=["Traceback (most recent call last):","Error:"],
#on_info=["load_tensors:","llama_context:","print_info:","llama_model_loader:"]
)
)