Compare commits
14 Commits
1cea6fbd2d
..
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 7df01c41b4 | |||
| 885064fba6 | |||
| e8e04fe8bc | |||
| 6cb3acdd64 | |||
| 586ccbff1b | |||
| bcc6b62277 | |||
| 3285d9118f | |||
| f77d943d79 | |||
| 976622a594 | |||
| 0b47ef80fb | |||
| 3898a8a651 | |||
| 170571714f | |||
| 81347ab8a0 | |||
| 6bb0097829 |
@@ -9,7 +9,7 @@
|
|||||||
"installTools": true,
|
"installTools": true,
|
||||||
"version": "3.12"
|
"version": "3.12"
|
||||||
},
|
},
|
||||||
"ghcr.io/devcontainers/features/docker-in-docker:3.0.0": {
|
"ghcr.io/devcontainers/features/docker-in-docker:3.0.1": {
|
||||||
"moby": false,
|
"moby": false,
|
||||||
"version": "latest",
|
"version": "latest",
|
||||||
"installDockerBuildx": true,
|
"installDockerBuildx": true,
|
||||||
|
|||||||
+1
-2
@@ -1,2 +1 @@
|
|||||||
vastai-sdk>=0.3.0
|
vastai-sdk
|
||||||
nltk==3.9.4
|
|
||||||
|
|||||||
+27
-28
@@ -1,36 +1,19 @@
|
|||||||
import nltk
|
|
||||||
import random
|
import random
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
|
import logging
|
||||||
|
|
||||||
from vastai import Worker, WorkerConfig, HandlerConfig, LogActionConfig, BenchmarkConfig
|
from vastai import Worker, WorkerConfig, HandlerConfig, LogActionConfig, BenchmarkConfig
|
||||||
|
|
||||||
# vLLM model configuration
|
logging.getLogger().setLevel(logging.WARNING) # Only show warnings and errors
|
||||||
MODEL_SERVER_URL = 'http://127.0.0.1:11434'
|
|
||||||
|
# Ollama model configuration
|
||||||
|
MODEL_SERVER_URL = 'http://127.0.0.1'
|
||||||
MODEL_SERVER_PORT = 11434
|
MODEL_SERVER_PORT = 11434
|
||||||
MODEL_LOG_FILE = '/var/log/onstart.log'
|
MODEL_LOG_FILE = '/var/log/onstart.log'
|
||||||
MODEL_HEALTHCHECK_ENDPOINT = "/health"
|
MODEL_HEALTHCHECK_ENDPOINT = "/"
|
||||||
|
|
||||||
# vLLM-specific log messages
|
|
||||||
MODEL_LOAD_LOG_MSG = [
|
|
||||||
"Application startup complete.",
|
|
||||||
"llama runner started in",
|
|
||||||
"Server listening on",
|
|
||||||
"msg=\"Listening on",
|
|
||||||
]
|
|
||||||
|
|
||||||
MODEL_ERROR_LOG_MSGS = [
|
|
||||||
"INFO exited: vllm",
|
|
||||||
"RuntimeError: Engine",
|
|
||||||
"Traceback (most recent call last):"
|
|
||||||
]
|
|
||||||
|
|
||||||
MODEL_INFO_LOG_MSGS = [
|
|
||||||
'"message":"Download'
|
|
||||||
]
|
|
||||||
|
|
||||||
nltk.download("words")
|
|
||||||
WORD_LIST = nltk.corpus.words.words()
|
|
||||||
|
|
||||||
|
# Ollama-specific log messages
|
||||||
def request_parser(request):
|
def request_parser(request):
|
||||||
data = request
|
data = request
|
||||||
if request.get("input") is not None:
|
if request.get("input") is not None:
|
||||||
@@ -39,8 +22,24 @@ def request_parser(request):
|
|||||||
|
|
||||||
|
|
||||||
def completions_benchmark_generator() -> dict:
|
def completions_benchmark_generator() -> dict:
|
||||||
|
# extract words from the python source code of the worker to create a list of words for generating prompts
|
||||||
|
|
||||||
|
WORD_LIST = []
|
||||||
|
|
||||||
|
# Try to load from perl copyright file first
|
||||||
|
try:
|
||||||
|
with open("/usr/share/doc/perl/copyright", 'r') as f:
|
||||||
|
source_code = f.read()
|
||||||
|
WORD_LIST = re.findall(r'\b\w+\b', source_code)
|
||||||
|
except (FileNotFoundError, IOError):
|
||||||
|
# Fallback to loading from python file
|
||||||
|
with open(__file__, 'r') as f:
|
||||||
|
source_code = f.read()
|
||||||
|
WORD_LIST = re.findall(r'\b\w+\b', source_code)
|
||||||
|
|
||||||
prompt = " ".join(random.choices(WORD_LIST, k=int(250)))
|
prompt = " ".join(random.choices(WORD_LIST, k=int(250)))
|
||||||
model = os.environ.get("MODEL_NAME")
|
model = os.environ.get("MODEL_NAME")
|
||||||
|
|
||||||
if not model:
|
if not model:
|
||||||
raise ValueError("MODEL_NAME environment variable not set")
|
raise ValueError("MODEL_NAME environment variable not set")
|
||||||
|
|
||||||
@@ -80,9 +79,9 @@ worker_config = WorkerConfig(
|
|||||||
)
|
)
|
||||||
],
|
],
|
||||||
log_action_config=LogActionConfig(
|
log_action_config=LogActionConfig(
|
||||||
on_load=MODEL_LOAD_LOG_MSG,
|
on_load=["llama_server: model loaded"],
|
||||||
on_error=MODEL_ERROR_LOG_MSGS,
|
on_error=["Traceback (most recent call last):","Error:"],
|
||||||
on_info=MODEL_INFO_LOG_MSGS
|
#on_info=["load_tensors:","llama_context:","print_info:","llama_model_loader:"]
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user