Files
pyworker/workers/openai/worker.py
T

79 lines
2.3 KiB
Python
Raw Normal View History

2025-12-15 22:33:03 -05:00
import random
import os
from vastai import Worker, WorkerConfig, HandlerConfig, LogActionConfig, BenchmarkConfig
# Ollama model configuration
MODEL_SERVER_URL = 'http://127.0.0.1'
MODEL_SERVER_PORT = 11434
MODEL_LOG_FILE = '/var/log/onstart.log'
MODEL_HEALTHCHECK_ENDPOINT = "/"
2025-12-15 22:33:03 -05:00
# Ollama-specific log messages
2025-12-15 22:58:02 -05:00
def request_parser(request):
data = request
if request.get("input") is not None:
data = request.get("input")
return data
2025-12-15 22:33:03 -05:00
def completions_benchmark_generator() -> dict:
2026-05-21 19:11:53 +00:00
# extract words from the python source code of the worker to create a list of words for generating prompts
WORD_LIST = []
with open(__file__, 'r') as f:
# Use regex to extract words from the source code
import re
source_code = f.read()
WORD_LIST = re.findall(r'\b\w+\b', source_code)
2026-05-21 19:11:53 +00:00
2025-12-15 22:33:03 -05:00
prompt = " ".join(random.choices(WORD_LIST, k=int(250)))
model = os.environ.get("MODEL_NAME")
2025-12-15 22:33:03 -05:00
if not model:
raise ValueError("MODEL_NAME environment variable not set")
benchmark_data = {
"model": model,
"prompt": prompt,
"temperature": 0.7,
"max_tokens": 500,
}
return benchmark_data
worker_config = WorkerConfig(
model_server_url=MODEL_SERVER_URL,
model_server_port=MODEL_SERVER_PORT,
model_log_file=MODEL_LOG_FILE,
model_healthcheck_url=MODEL_HEALTHCHECK_ENDPOINT,
handlers=[
HandlerConfig(
route="/v1/completions",
workload_calculator= lambda data: data.get("max_tokens", 0),
allow_parallel_requests=True,
2025-12-15 22:58:02 -05:00
request_parser=request_parser,
2025-12-17 11:38:55 -08:00
max_queue_time=600.0,
2025-12-15 22:33:03 -05:00
benchmark_config=BenchmarkConfig(
generator=completions_benchmark_generator,
concurrency=10,
runs=3
2025-12-15 22:33:03 -05:00
)
),
HandlerConfig(
route="/v1/chat/completions",
workload_calculator= lambda data: data.get("max_tokens", 0),
allow_parallel_requests=True,
2025-12-17 11:40:40 -08:00
request_parser=request_parser,
2025-12-17 11:38:55 -08:00
max_queue_time=600.0,
2025-12-15 22:33:03 -05:00
)
],
log_action_config=LogActionConfig(
on_load=["llama runner started in "],
#on_error=MODEL_ERROR_LOG_MSGS,
#on_info=MODEL_INFO_LOG_MSGS
2025-12-15 22:33:03 -05:00
)
)
Worker(worker_config).run()