Compare commits
14 Commits
1cea6fbd2d
..
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 7df01c41b4 | |||
| 885064fba6 | |||
| e8e04fe8bc | |||
| 6cb3acdd64 | |||
| 586ccbff1b | |||
| bcc6b62277 | |||
| 3285d9118f | |||
| f77d943d79 | |||
| 976622a594 | |||
| 0b47ef80fb | |||
| 3898a8a651 | |||
| 170571714f | |||
| 81347ab8a0 | |||
| 6bb0097829 |
@@ -9,7 +9,7 @@
|
||||
"installTools": true,
|
||||
"version": "3.12"
|
||||
},
|
||||
"ghcr.io/devcontainers/features/docker-in-docker:3.0.0": {
|
||||
"ghcr.io/devcontainers/features/docker-in-docker:3.0.1": {
|
||||
"moby": false,
|
||||
"version": "latest",
|
||||
"installDockerBuildx": true,
|
||||
|
||||
+1
-2
@@ -1,2 +1 @@
|
||||
vastai-sdk>=0.3.0
|
||||
nltk==3.9.4
|
||||
vastai-sdk
|
||||
|
||||
+27
-28
@@ -1,36 +1,19 @@
|
||||
import nltk
|
||||
import random
|
||||
import os
|
||||
import re
|
||||
import logging
|
||||
|
||||
from vastai import Worker, WorkerConfig, HandlerConfig, LogActionConfig, BenchmarkConfig
|
||||
|
||||
# vLLM model configuration
|
||||
MODEL_SERVER_URL = 'http://127.0.0.1:11434'
|
||||
logging.getLogger().setLevel(logging.WARNING) # Only show warnings and errors
|
||||
|
||||
# Ollama model configuration
|
||||
MODEL_SERVER_URL = 'http://127.0.0.1'
|
||||
MODEL_SERVER_PORT = 11434
|
||||
MODEL_LOG_FILE = '/var/log/onstart.log'
|
||||
MODEL_HEALTHCHECK_ENDPOINT = "/health"
|
||||
|
||||
# vLLM-specific log messages
|
||||
MODEL_LOAD_LOG_MSG = [
|
||||
"Application startup complete.",
|
||||
"llama runner started in",
|
||||
"Server listening on",
|
||||
"msg=\"Listening on",
|
||||
]
|
||||
|
||||
MODEL_ERROR_LOG_MSGS = [
|
||||
"INFO exited: vllm",
|
||||
"RuntimeError: Engine",
|
||||
"Traceback (most recent call last):"
|
||||
]
|
||||
|
||||
MODEL_INFO_LOG_MSGS = [
|
||||
'"message":"Download'
|
||||
]
|
||||
|
||||
nltk.download("words")
|
||||
WORD_LIST = nltk.corpus.words.words()
|
||||
MODEL_HEALTHCHECK_ENDPOINT = "/"
|
||||
|
||||
# Ollama-specific log messages
|
||||
def request_parser(request):
|
||||
data = request
|
||||
if request.get("input") is not None:
|
||||
@@ -39,8 +22,24 @@ def request_parser(request):
|
||||
|
||||
|
||||
def completions_benchmark_generator() -> dict:
|
||||
# extract words from the python source code of the worker to create a list of words for generating prompts
|
||||
|
||||
WORD_LIST = []
|
||||
|
||||
# Try to load from perl copyright file first
|
||||
try:
|
||||
with open("/usr/share/doc/perl/copyright", 'r') as f:
|
||||
source_code = f.read()
|
||||
WORD_LIST = re.findall(r'\b\w+\b', source_code)
|
||||
except (FileNotFoundError, IOError):
|
||||
# Fallback to loading from python file
|
||||
with open(__file__, 'r') as f:
|
||||
source_code = f.read()
|
||||
WORD_LIST = re.findall(r'\b\w+\b', source_code)
|
||||
|
||||
prompt = " ".join(random.choices(WORD_LIST, k=int(250)))
|
||||
model = os.environ.get("MODEL_NAME")
|
||||
|
||||
if not model:
|
||||
raise ValueError("MODEL_NAME environment variable not set")
|
||||
|
||||
@@ -80,9 +79,9 @@ worker_config = WorkerConfig(
|
||||
)
|
||||
],
|
||||
log_action_config=LogActionConfig(
|
||||
on_load=MODEL_LOAD_LOG_MSG,
|
||||
on_error=MODEL_ERROR_LOG_MSGS,
|
||||
on_info=MODEL_INFO_LOG_MSGS
|
||||
on_load=["llama_server: model loaded"],
|
||||
on_error=["Traceback (most recent call last):","Error:"],
|
||||
#on_info=["load_tensors:","llama_context:","print_info:","llama_model_loader:"]
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user