workers/openai/worker.py

import random
import os
import re
import logging

from vastai import Worker, WorkerConfig, HandlerConfig, LogActionConfig, BenchmarkConfig

logging.getLogger().setLevel(logging.WARNING)  # Only show warnings and errors

# Ollama model configuration
MODEL_SERVER_URL           = 'http://127.0.0.1'
MODEL_SERVER_PORT          = 11434
MODEL_LOG_FILE             = '/var/log/onstart.log'
MODEL_HEALTHCHECK_ENDPOINT = "/"

# Ollama-specific log messages
def request_parser(request):
    data = request
    if request.get("input") is not None:
        data = request.get("input")
    return data


def completions_benchmark_generator() -> dict:
    # extract words from the python source code of the worker to create a list of words for generating prompts

    WORD_LIST = []

    # Try to load from perl copyright file first
    try:
        with open("/usr/share/doc/perl/copyright", 'r') as f:
            source_code = f.read()
            WORD_LIST = re.findall(r'\b\w+\b', source_code)
    except (FileNotFoundError, IOError):
        # Fallback to loading from python file
        with open(__file__, 'r') as f:
            source_code = f.read()
            WORD_LIST = re.findall(r'\b\w+\b', source_code)

    prompt = " ".join(random.choices(WORD_LIST, k=int(250)))
    model = os.environ.get("MODEL_NAME")

    if not model:
        raise ValueError("MODEL_NAME environment variable not set")

    benchmark_data = {
        "model": model,
        "prompt": prompt,
        "temperature": 0.7,
        "max_tokens": 500,
    }

    return benchmark_data

worker_config = WorkerConfig(
    model_server_url=MODEL_SERVER_URL,
    model_server_port=MODEL_SERVER_PORT,
    model_log_file=MODEL_LOG_FILE,
    model_healthcheck_url=MODEL_HEALTHCHECK_ENDPOINT,
    handlers=[
        HandlerConfig(
            route="/v1/completions",
            workload_calculator= lambda data: data.get("max_tokens", 0),
            allow_parallel_requests=True,
            request_parser=request_parser,
            max_queue_time=600.0,
            benchmark_config=BenchmarkConfig(
                generator=completions_benchmark_generator,
                concurrency=10,
                runs=3
            )
        ),
        HandlerConfig(
            route="/v1/chat/completions",
            workload_calculator= lambda data: data.get("max_tokens", 0),
            allow_parallel_requests=True,
            request_parser=request_parser,
            max_queue_time=600.0,
        )
    ],
    log_action_config=LogActionConfig(
        on_load=["llama runner started in "],
        on_error=["Traceback (most recent call last):","Error:"],
        on_info=["load_tensors:","llama_context:","print_info:","llama_model_loader:"]
    )
)

Worker(worker_config).run()
Use PyWorker SDK (#67 ) 2025-12-15 22:33:03 -05:00			`import random`
			`import os`
Enhance completions benchmark generator to extract words from a fallback Perl copyright file 2026-05-21 19:33:41 +00:00			`import re`
Add logging import and set logger level to WARNING 2026-05-21 19:50:21 +00:00			`import logging`
Use PyWorker SDK (#67 ) 2025-12-15 22:33:03 -05:00
			`from vastai import Worker, WorkerConfig, HandlerConfig, LogActionConfig, BenchmarkConfig`

Add logging import and set logger level to WARNING 2026-05-21 19:50:21 +00:00			`logging.getLogger().setLevel(logging.WARNING) # Only show warnings and errors`

Refactor model configuration and update log messages for Ollama 2026-05-21 15:11:25 +00:00			`# Ollama model configuration`
Update model server URL to remove port specification 2026-05-21 18:50:41 +00:00			`MODEL_SERVER_URL = 'http://127.0.0.1'`
Update model server URL and port configuration 2026-05-20 13:34:45 +00:00			`MODEL_SERVER_PORT = 11434`
Update log file path and enhance load log messages 2026-05-18 18:41:14 +00:00			`MODEL_LOG_FILE = '/var/log/onstart.log'`
Refactor model configuration and update log messages for Ollama 2026-05-21 15:11:25 +00:00			`MODEL_HEALTHCHECK_ENDPOINT = "/"`
Use PyWorker SDK (#67 ) 2025-12-15 22:33:03 -05:00
Refactor model configuration and update log messages for Ollama 2026-05-21 15:11:25 +00:00			`# Ollama-specific log messages`
Backwards compatible vLLM payload (#75 ) 2025-12-15 22:58:02 -05:00			`def request_parser(request):`
			`data = request`
			`if request.get("input") is not None:`
			`data = request.get("input")`
			`return data`

Use PyWorker SDK (#67 ) 2025-12-15 22:33:03 -05:00
			`def completions_benchmark_generator() -> dict:`
Remove nltk dep 2026-05-21 19:11:53 +00:00			`# extract words from the python source code of the worker to create a list of words for generating prompts`

			`WORD_LIST = []`

Enhance completions benchmark generator to extract words from a fallback Perl copyright file 2026-05-21 19:33:41 +00:00			`# Try to load from perl copyright file first`
			`try:`
			`with open("/usr/share/doc/perl/copyright", 'r') as f:`
			`source_code = f.read()`
			`WORD_LIST = re.findall(r'\b\w+\b', source_code)`
			`except (FileNotFoundError, IOError):`
			`# Fallback to loading from python file`
			`with open(__file__, 'r') as f:`
			`source_code = f.read()`
			`WORD_LIST = re.findall(r'\b\w+\b', source_code)`
Remove nltk dep 2026-05-21 19:11:53 +00:00
Use PyWorker SDK (#67 ) 2025-12-15 22:33:03 -05:00			`prompt = " ".join(random.choices(WORD_LIST, k=int(250)))`
			`model = os.environ.get("MODEL_NAME")`
Refactor log message handling and improve word extraction in completions benchmark 2026-05-21 19:25:09 +00:00
Use PyWorker SDK (#67 ) 2025-12-15 22:33:03 -05:00			`if not model:`
			`raise ValueError("MODEL_NAME environment variable not set")`

			`benchmark_data = {`
			`"model": model,`
			`"prompt": prompt,`
			`"temperature": 0.7,`
			`"max_tokens": 500,`
			`}`

			`return benchmark_data`

			`worker_config = WorkerConfig(`
			`model_server_url=MODEL_SERVER_URL,`
			`model_server_port=MODEL_SERVER_PORT,`
			`model_log_file=MODEL_LOG_FILE,`
			`model_healthcheck_url=MODEL_HEALTHCHECK_ENDPOINT,`
			`handlers=[`
			`HandlerConfig(`
			`route="/v1/completions",`
			`workload_calculator= lambda data: data.get("max_tokens", 0),`
			`allow_parallel_requests=True,`
Backwards compatible vLLM payload (#75 ) 2025-12-15 22:58:02 -05:00			`request_parser=request_parser,`
Increase queue limits for vLLM and TGI 2025-12-17 11:38:55 -08:00			`max_queue_time=600.0,`
Use PyWorker SDK (#67 ) 2025-12-15 22:33:03 -05:00			`benchmark_config=BenchmarkConfig(`
			`generator=completions_benchmark_generator,`
Lowered concurrency of vLLM and TGI benchmarks 2025-12-17 11:55:33 -08:00			`concurrency=10,`
			`runs=3`
Use PyWorker SDK (#67 ) 2025-12-15 22:33:03 -05:00			`)`
			`),`
			`HandlerConfig(`
			`route="/v1/chat/completions",`
			`workload_calculator= lambda data: data.get("max_tokens", 0),`
			`allow_parallel_requests=True,`
add missing comma 2025-12-17 11:40:40 -08:00			`request_parser=request_parser,`
Increase queue limits for vLLM and TGI 2025-12-17 11:38:55 -08:00			`max_queue_time=600.0,`
Use PyWorker SDK (#67 ) 2025-12-15 22:33:03 -05:00			`)`
			`],`
			`log_action_config=LogActionConfig(`
Refactor log message handling and improve word extraction in completions benchmark 2026-05-21 19:25:09 +00:00			`on_load=["llama runner started in "],`
Update log action configuration to specify detailed error and info messages 2026-05-21 19:47:16 +00:00			`on_error=["Traceback (most recent call last):","Error:"],`
			`on_info=["load_tensors:","llama_context:","print_info:","llama_model_loader:"]`
Use PyWorker SDK (#67 ) 2025-12-15 22:33:03 -05:00			`)`
			`)`

			`Worker(worker_config).run()`