2025-12-15 22:33:03 -05:00
|
|
|
import random
|
|
|
|
|
import os
|
2026-05-21 19:33:41 +00:00
|
|
|
import re
|
2026-05-21 19:50:21 +00:00
|
|
|
import logging
|
2025-12-15 22:33:03 -05:00
|
|
|
|
|
|
|
|
from vastai import Worker, WorkerConfig, HandlerConfig, LogActionConfig, BenchmarkConfig
|
|
|
|
|
|
2026-05-21 19:50:21 +00:00
|
|
|
logging.getLogger().setLevel(logging.WARNING) # Only show warnings and errors
|
|
|
|
|
|
2026-05-21 15:11:25 +00:00
|
|
|
# Ollama model configuration
|
2026-05-21 18:50:41 +00:00
|
|
|
MODEL_SERVER_URL = 'http://127.0.0.1'
|
2026-05-20 13:34:45 +00:00
|
|
|
MODEL_SERVER_PORT = 11434
|
2026-05-18 18:41:14 +00:00
|
|
|
MODEL_LOG_FILE = '/var/log/onstart.log'
|
2026-05-21 15:11:25 +00:00
|
|
|
MODEL_HEALTHCHECK_ENDPOINT = "/"
|
2025-12-15 22:33:03 -05:00
|
|
|
|
2026-05-21 15:11:25 +00:00
|
|
|
# Ollama-specific log messages
|
2025-12-15 22:58:02 -05:00
|
|
|
def request_parser(request):
|
|
|
|
|
data = request
|
|
|
|
|
if request.get("input") is not None:
|
|
|
|
|
data = request.get("input")
|
|
|
|
|
return data
|
|
|
|
|
|
2025-12-15 22:33:03 -05:00
|
|
|
|
|
|
|
|
def completions_benchmark_generator() -> dict:
|
2026-05-21 19:11:53 +00:00
|
|
|
# extract words from the python source code of the worker to create a list of words for generating prompts
|
|
|
|
|
|
|
|
|
|
WORD_LIST = []
|
|
|
|
|
|
2026-05-21 19:33:41 +00:00
|
|
|
# Try to load from perl copyright file first
|
|
|
|
|
try:
|
|
|
|
|
with open("/usr/share/doc/perl/copyright", 'r') as f:
|
|
|
|
|
source_code = f.read()
|
|
|
|
|
WORD_LIST = re.findall(r'\b\w+\b', source_code)
|
|
|
|
|
except (FileNotFoundError, IOError):
|
|
|
|
|
# Fallback to loading from python file
|
|
|
|
|
with open(__file__, 'r') as f:
|
|
|
|
|
source_code = f.read()
|
|
|
|
|
WORD_LIST = re.findall(r'\b\w+\b', source_code)
|
2026-05-21 19:11:53 +00:00
|
|
|
|
2025-12-15 22:33:03 -05:00
|
|
|
prompt = " ".join(random.choices(WORD_LIST, k=int(250)))
|
|
|
|
|
model = os.environ.get("MODEL_NAME")
|
2026-05-21 19:25:09 +00:00
|
|
|
|
2025-12-15 22:33:03 -05:00
|
|
|
if not model:
|
|
|
|
|
raise ValueError("MODEL_NAME environment variable not set")
|
|
|
|
|
|
|
|
|
|
benchmark_data = {
|
|
|
|
|
"model": model,
|
|
|
|
|
"prompt": prompt,
|
|
|
|
|
"temperature": 0.7,
|
|
|
|
|
"max_tokens": 500,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return benchmark_data
|
|
|
|
|
|
|
|
|
|
worker_config = WorkerConfig(
|
|
|
|
|
model_server_url=MODEL_SERVER_URL,
|
|
|
|
|
model_server_port=MODEL_SERVER_PORT,
|
|
|
|
|
model_log_file=MODEL_LOG_FILE,
|
|
|
|
|
model_healthcheck_url=MODEL_HEALTHCHECK_ENDPOINT,
|
|
|
|
|
handlers=[
|
|
|
|
|
HandlerConfig(
|
|
|
|
|
route="/v1/completions",
|
|
|
|
|
workload_calculator= lambda data: data.get("max_tokens", 0),
|
|
|
|
|
allow_parallel_requests=True,
|
2025-12-15 22:58:02 -05:00
|
|
|
request_parser=request_parser,
|
2025-12-17 11:38:55 -08:00
|
|
|
max_queue_time=600.0,
|
2025-12-15 22:33:03 -05:00
|
|
|
benchmark_config=BenchmarkConfig(
|
|
|
|
|
generator=completions_benchmark_generator,
|
2025-12-17 11:55:33 -08:00
|
|
|
concurrency=10,
|
|
|
|
|
runs=3
|
2025-12-15 22:33:03 -05:00
|
|
|
)
|
|
|
|
|
),
|
|
|
|
|
HandlerConfig(
|
|
|
|
|
route="/v1/chat/completions",
|
|
|
|
|
workload_calculator= lambda data: data.get("max_tokens", 0),
|
|
|
|
|
allow_parallel_requests=True,
|
2025-12-17 11:40:40 -08:00
|
|
|
request_parser=request_parser,
|
2025-12-17 11:38:55 -08:00
|
|
|
max_queue_time=600.0,
|
2025-12-15 22:33:03 -05:00
|
|
|
)
|
|
|
|
|
],
|
|
|
|
|
log_action_config=LogActionConfig(
|
2026-05-21 19:25:09 +00:00
|
|
|
on_load=["llama runner started in "],
|
2026-05-21 19:47:16 +00:00
|
|
|
on_error=["Traceback (most recent call last):","Error:"],
|
|
|
|
|
on_info=["load_tensors:","llama_context:","print_info:","llama_model_loader:"]
|
2025-12-15 22:33:03 -05:00
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
Worker(worker_config).run()
|