Added model type environment variable so we can actually attempt to benchmark with the right payload.

remove redis pubsub from pyworker (#53 )
Co-authored-by: Edgar Lin <edgarlin2000@gmail.com>
2025-10-29 23:25:44 -07:00 · 2025-10-29 17:07:56 -07:00 · 2025-10-28 16:07:28 -07:00 · 2025-10-28 16:03:50 -07:00 · 2025-10-28 16:01:32 -07:00 · 2025-10-28 14:02:53 -07:00
13 changed files with 726 additions and 43 deletions
@@ -26,7 +26,8 @@ from lib.data_types import (
    LogAction,
    ApiPayload_T,
    JsonDataException,
-    RequestMetrics
+    RequestMetrics,
+    BenchmarkResult
 )

 VERSION = "0.1.0"
@@ -285,7 +286,7 @@ class Backend:
        message = {
            key: value
            for (key, value) in (dataclasses.asdict(auth_data).items())
-            if key != "signature"
+            if key != "signature" and key != "__request_id"
        }
        if auth_data.reqnum < (self.reqnum - MSG_HISTORY_LEN):
            log.debug(
@@ -295,7 +296,7 @@ class Backend:
        elif message in self.msg_history:
            log.debug(f"message: {message} already in message history")
            return False
-        elif verify_signature(json.dumps(message, indent=4), auth_data.signature):
+        elif verify_signature(json.dumps(message, indent=4, sort_keys=True), auth_data.signature):
            self.reqnum = max(auth_data.reqnum, self.reqnum)
            self.msg_history.append(message)
            self.msg_history = self.msg_history[-MSG_HISTORY_LEN:]
@@ -332,18 +333,26 @@ class Backend:

            for run in range(1, self.benchmark_handler.benchmark_runs + 1):
                start = time.time()
-                tasks = []
-                total_workload = 0
+                benchmark_requests = []

-                for _ in range(concurrent_requests):
+                for i in range(concurrent_requests):
                    payload = self.benchmark_handler.make_benchmark_payload()
-                    total_workload += payload.count_workload()
-                    tasks.append(
-                        self.__call_api(handler=self.benchmark_handler, payload=payload)
+                    workload = payload.count_workload()
+                    task = self.__call_api(handler=self.benchmark_handler, payload=payload)
+                    benchmark_requests.append(
+                        BenchmarkResult(request_idx=i, workload=workload, task=task)
                    )

-                responses = await gather(*tasks)
+                responses = await gather(*[br.task for br in benchmark_requests])
+                for br, response in zip(benchmark_requests, responses):
+                    br.response = response
+
+                total_workload = sum(br.workload for br in benchmark_requests if br.is_successful)
                time_elapsed = time.time() - start
+                successful_responses = sum([1 for br in benchmark_requests if br.is_successful])
+                if successful_responses == 0:
+                    self.backend_errored("No successful responses from benchmark")
+                    log.debug(f"benchmark failed: {successful_responses}/{concurrent_requests} successful responses")

                throughput = total_workload / time_elapsed
                sum_throughput += throughput
@@ -357,7 +366,7 @@ class Backend:
                            f"Run: {run}, concurrent_requests: {concurrent_requests}",
                            f"Total workload: {total_workload}, time_elapsed: {time_elapsed}s",
                            f"Throughput: {throughput} workload/s",
-                            f"Successful responses: {len([r for r in responses if r.status == 200])}",
+                            f"Successful responses: {successful_responses}/{concurrent_requests}",
                            "#" * 60,
                        ]
                    )
@@ -3,7 +3,7 @@ import logging
 from dataclasses import dataclass, field
 from enum import Enum
 from abc import ABC, abstractmethod
-from typing import Dict, Any, Union, Tuple, Optional, Set, TypeVar, Generic, Type
+from typing import Dict, Any, Union, Tuple, Optional, Set, TypeVar, Generic, Type, Awaitable
 from aiohttp import web, ClientResponse
 import inspect

@@ -65,12 +65,12 @@ class ApiPayload(ABC):
 class AuthData:
    """data used to authenticate requester"""

-    signature: str
    cost: str
    endpoint: str
    reqnum: int
-    url: str
    request_idx: int
+    signature: str
+    url: str

    @classmethod
    def from_json_msg(cls, json_msg: Dict[str, Any]):
@@ -190,11 +190,12 @@ class SystemMetrics:
        self.additional_disk_usage = disk_usage - self.last_disk_usage
        self.last_disk_usage = disk_usage

-    def reset(self):
+    def reset(self, expected: float | None) -> None:
        # autoscaler excepts model_loading_time to be populated only once, when the instance has
        # finished benchmarking and is ready to receive requests. This applies to restarted instances
        # as well: they should send model_loading_time once when they are done loading
-        self.model_loading_time = None
+        if self.model_loading_time == expected:
+            self.model_loading_time = None


@dataclass
@@ -206,6 +207,17 @@ class RequestMetrics:
    status: str
    success: bool = False

+@dataclass
+class BenchmarkResult:
+    request_idx: int
+    workload: float
+    task: Awaitable[ClientResponse]
+    response: Optional[ClientResponse] = None
+
+    @property
+    def is_successful(self) -> bool:
+        return self.response is not None and self.response.status == 200
+
@dataclass
 class ModelMetrics:
    """Model specific metrics"""
@@ -246,7 +258,7 @@ class ModelMetrics:
    def wait_time(self) -> float:
        if (len(self.requests_working) == 0):
            return 0.0
-        return sum([request.workload for request in self.requests_working.values()]) / self.max_throughput
+        return sum([request.workload for request in self.requests_working.values()]) / max(self.max_throughput, 0.00001)
    
    @property
    def cur_load(self) -> float:
@@ -145,41 +145,72 @@ class Metrics:
    #######################################Private#######################################

    async def __send_delete_requests_and_reset(self):
-
-        async def send_data(report_addr: str, success: bool) -> bool:
+        async def post(report_addr: str, idxs: list[int], success_flag: bool) -> bool:
            data = {
                "worker_id": self.id,
-                "request_idxs": [r.request_idx for r in self.model_metrics.requests_deleting if r.success == success],
-                "success": success
+                "request_idxs": idxs,
+                "success": success_flag,
            }
+            log.debug(
+                f"Deleting requests that {'succeeded' if success_flag else 'failed'}: {data['request_idxs']}"
+            )
            full_path = report_addr.rstrip("/") + "/delete_requests/"
            for attempt in range(1, 4):
                try:
                    session = await self.http()
                    async with session.post(full_path, json=data) as res:
+                        log.debug(f"delete_requests response: {res.status}")
                        res.raise_for_status()
                    return True
                except asyncio.TimeoutError:
-                    log.debug(f"delete_requests timed out")
+                    log.debug("delete_requests timed out")
                except (ClientResponseError, Exception) as e:
                    log.debug(f"delete_requests failed with error: {e}")
                await asyncio.sleep(2)
                log.debug(f"retrying delete_request, attempt: {attempt}")
+            return False
+
+        # Take a snapshot of what we plan to send this tick.
+        # New arrivals after this snapshot will remain in the queue for the next tick.
+        snapshot = list(self.model_metrics.requests_deleting)
+        success_idxs = [r.request_idx for r in snapshot if r.success is True]
+        failed_idxs  = [r.request_idx for r in snapshot if r.success is False]
+
+        if not success_idxs and not failed_idxs:
+            return  # nothing to do

        for report_addr in self.report_addr:
-            success = await send_data(report_addr, success=True) and await send_data(report_addr, success=False)
-            if success is True:
-                self.model_metrics.requests_deleting.clear()
+            # TODO: Add a Redis subscriber queue for delete_requests
+            if report_addr == "https://cloud.vast.ai/api/v0":
+                # Patch: ignore the Redis API report_addr
+                continue
+            sent_success = True
+            sent_failed  = True
+
+            if success_idxs:
+                sent_success = await post(report_addr, success_idxs, True)
+            if failed_idxs:
+                sent_failed = await post(report_addr, failed_idxs, False)
+
+            if sent_success and sent_failed:
+                # Remove only the items we actually sent from the live queue.
+                sent_set = set(success_idxs) | set(failed_idxs)
+                self.model_metrics.requests_deleting[:] = [
+                    r for r in self.model_metrics.requests_deleting
+                    if r.request_idx not in sent_set
+                ]
                break


    async def __send_metrics_and_reset(self):

+        loadtime_snapshot = self.system_metrics.model_loading_time
+
        def compute_autoscaler_data() -> AutoScalerData:
            return AutoScalerData(
                id=self.id,
                version=self.version,
-                loadtime=(self.system_metrics.model_loading_time or 0.0),
+                loadtime=(loadtime_snapshot or 0.0), 
                new_load=self.model_metrics.workload_processing,
                cur_load=self.model_metrics.cur_load,
                rej_load=self.model_metrics.workload_rejected,
@@ -227,11 +258,15 @@ class Metrics:

        self.system_metrics.update_disk_usage()

+        sent = False
        for report_addr in self.report_addr:
-            success = await send_data(report_addr)
-            if success is True:
+            if await send_data(report_addr):
+                sent = True
                break
-        self.update_pending = False
-        self.model_metrics.reset()
-        self.system_metrics.reset()
-        self.last_metric_update = time.time()
+
+        if sent:
+            # clear the one-shot loadtime only if we actually sent *this* value
+            self.system_metrics.reset(expected=loadtime_snapshot)
+            self.update_pending = False
+            self.model_metrics.reset()
+            self.last_metric_update = time.time()
@@ -9,9 +9,10 @@ ENV_PATH="$WORKSPACE_DIR/worker-env"
 DEBUG_LOG="$WORKSPACE_DIR/debug.log"
 PYWORKER_LOG="$WORKSPACE_DIR/pyworker.log"

-REPORT_ADDR="${REPORT_ADDR:-https://cloud.vast.ai/api/v0,https://run.vast.ai}"
+REPORT_ADDR="${REPORT_ADDR:-https://run.vast.ai}"
 USE_SSL="${USE_SSL:-true}"
 WORKER_PORT="${WORKER_PORT:-3000}"
+MODEL_TYPE="${MODEL_TYPE:-image}"
 mkdir -p "$WORKSPACE_DIR"
 cd "$WORKSPACE_DIR"

@@ -12,9 +12,21 @@ A docker image is provided but you may use any if the above requirements are met

 ## Benchmarking

-A simple image generation benchmark runs when each worker initializes to validate GPU performance and identify underperforming machines.
+### Custom Benchmark Workflows

-The benchmark uses Stable Diffusion v1.5 with ComfyUI's default text-to-image workflow. Configure the benchmark complexity and duration using these variables:
+You can provide a custom ComfyUI workflow for benchmarking by creating `workers/comfyui-json/misc/benchmark.json`. This allows you to test performance using your preferred models and workflow complexity.
+
+**Ways to provide the benchmark file:**
+- Fork this repository and add your `benchmark.json` file
+- Write the file during worker provisioning (onstart script or setup phase)
+
+An example file is provided in the repository. To ensure varied generations, use the placeholder `__RANDOM_INT__` in place of static seed values - it will be replaced with a random integer for each benchmark run.
+
+### Default Benchmark (Fallback)
+
+If `benchmark.json` is not available, a simple image generation benchmark runs when each worker initializes. This validates GPU performance and helps identify underperforming machines.
+
+The default benchmark uses Stable Diffusion v1.5 with ComfyUI's standard text-to-image workflow. Configure it using these environment variables:

 | Environment Variable | Default Value | Description |
 | -------------------- | ------------- | ----------- |
@@ -24,7 +36,7 @@ The benchmark uses Stable Diffusion v1.5 with ComfyUI's default text-to-image wo

 Each benchmark run uses a random prompt from `misc/test_prompts.txt` and a random seed to ensure consistent GPU load patterns.

-### Calibrating Benchmark Duration
+#### Calibrating Fallback Benchmark Duration

 To screen for underperforming hardware, set `BENCHMARK_TEST_STEPS` to match your expected production workflow duration. This allows you to identify machines that won't meet performance requirements.

@@ -1,3 +1,5 @@
+import dataclasses
+from enum import Enum
 import os
 import sys
 import random
@@ -5,12 +7,19 @@ import dataclasses
 from typing import Dict, Any
 from functools import cache
 from math import ceil
+from pathlib import Path
+import json
+import logging

 from lib.data_types import ApiPayload, JsonDataException

+log = logging.getLogger(__file__)
+
+class ModelType(Enum):
+    image = "image"
+    audio = "audio"
+    video = "video"

-with open("workers/comfyui/misc/test_prompts.txt", "r") as f:
-    test_prompts = f.readlines()

 def count_workload() -> float:
    # Always 100.0 where there is a single instance of ComfyUI handling requests
@@ -20,13 +29,43 @@ def count_workload() -> float:
@dataclasses.dataclass
 class ComfyWorkflowData(ApiPayload):
    input: dict
+    model_type: ModelType = dataclasses.field(
+        default_factory=lambda: ModelType(
+            os.environ.get("MODEL_TYPE", "image").lower()
+        )
+    )

    @classmethod
    def for_test(cls):
        """
-        Use the variables available to simulate workflows of the required running time
+        If the user has provided a benchmark workflow we can use it here to properly gauge performance.
+        Otherwise, use the variables available to simulate workflows of the required running time
        Example: SD1.5, simple image gen 10000 steps, 512px x 512px will run for approximately 9 minutes @ ~18 it/s (RTX 4090)
        """
+        # Try to load benchmark.json
+        #Note:  We should cross check with Rob if the audio sample benchmark file is correct
+        model_type = ModelType(os.environ.get("MODEL_TYPE", "image").lower())
+        benchmark_file = Path(f"workers/comfyui-json/misc/benchmark_{model_type.value}.json")
+        if benchmark_file.exists():
+            try:
+                with open(benchmark_file, "r") as f:
+                    benchmark_workflow = json.load(f)
+                log.info(f"using benchmark json file for {model_type.value}")
+                return cls(
+                    input={
+                        "request_id": f"{model_type.value}-{random.randint(1000, 99999)}",
+                        "workflow_json": benchmark_workflow
+                    }
+                )
+            except (json.JSONDecodeError, IOError):
+                # JSON is malformed or file can't be read, fall through to default
+                log.error(f"Failed to benchmark using {benchmark_file}")
+        
+        # Fallback: read prompts and construct payload
+        log.info("Using fallback method for benchmarking")
+        with open("workers/comfyui-json/misc/test_prompts.txt", "r") as f:
+            test_prompts = f.readlines()
+        
        test_prompt = random.choice(test_prompts).rstrip()
        return cls(
            input={
@@ -0,0 +1,107 @@
+{
+    "3": {
+        "inputs": {
+            "seed": "__RANDOM_INT__",
+            "steps": 20,
+            "cfg": 8,
+            "sampler_name": "euler",
+            "scheduler": "normal",
+            "denoise": 1,
+            "model": [
+            "4",
+            0
+            ],
+            "positive": [
+            "6",
+            0
+            ],
+            "negative": [
+            "7",
+            0
+            ],
+            "latent_image": [
+            "5",
+            0
+            ]
+        },
+        "class_type": "KSampler",
+        "_meta": {
+            "title": "KSampler"
+        }
+    },
+    "4": {
+        "inputs": {
+            "ckpt_name": "v1-5-pruned-emaonly-fp16.safetensors"
+        },
+        "class_type": "CheckpointLoaderSimple",
+        "_meta": {
+            "title": "Load Checkpoint"
+        }
+    },
+    "5": {
+        "inputs": {
+            "width": 512,
+            "height": 512,
+            "batch_size": 1
+        },
+        "class_type": "EmptyLatentImage",
+        "_meta": {
+            "title": "Empty Latent Image"
+        }
+    },
+    "6": {
+        "inputs": {
+            "text": "beautiful scenery nature glass bottle landscape, , purple galaxy bottle,",
+            "clip": [
+            "4",
+            1
+            ]
+        },
+        "class_type": "CLIPTextEncode",
+        "_meta": {
+            "title": "CLIP Text Encode (Prompt)"
+        }
+    },
+    "7": {
+        "inputs": {
+            "text": "text, watermark",
+            "clip": [
+            "4",
+            1
+            ]
+        },
+        "class_type": "CLIPTextEncode",
+        "_meta": {
+            "title": "CLIP Text Encode (Prompt)"
+        }
+    },
+    "8": {
+        "inputs": {
+            "samples": [
+            "3",
+            0
+            ],
+            "vae": [
+            "4",
+            2
+            ]
+        },
+        "class_type": "VAEDecode",
+        "_meta": {
+            "title": "VAE Decode"
+        }
+    },
+    "9": {
+        "inputs": {
+            "filename_prefix": "ComfyUI",
+            "images": [
+            "8",
+            0
+            ]
+        },
+        "class_type": "SaveImage",
+        "_meta": {
+            "title": "Save Image"
+        }
+    }
+}
@@ -0,0 +1,118 @@
+{
+        "3": {
+            "inputs": {
+            "seed": 98942092715729,
+            "steps": 50,
+            "cfg": 4.98,
+            "sampler_name": "dpmpp_3m_sde_gpu",
+            "scheduler": "exponential",
+            "denoise": 1,
+            "model": [
+                "4",
+                0
+            ],
+            "positive": [
+                "6",
+                0
+            ],
+            "negative": [
+                "7",
+                0
+            ],
+            "latent_image": [
+                "11",
+                0
+            ]
+            },
+            "class_type": "KSampler",
+            "_meta": {
+            "title": "KSampler"
+            }
+        },
+        "4": {
+            "inputs": {
+            "ckpt_name": "stable-audio-open-1.0.safetensors"
+            },
+            "class_type": "CheckpointLoaderSimple",
+            "_meta": {
+            "title": "Load Checkpoint"
+            }
+        },
+        "6": {
+            "inputs": {
+            "text": "heaven church electronic dance music",
+            "clip": [
+                "10",
+                0
+            ]
+            },
+            "class_type": "CLIPTextEncode",
+            "_meta": {
+            "title": "CLIP Text Encode (Prompt)"
+            }
+        },
+        "7": {
+            "inputs": {
+            "text": "",
+            "clip": [
+                "10",
+                0
+            ]
+            },
+            "class_type": "CLIPTextEncode",
+            "_meta": {
+            "title": "CLIP Text Encode (Prompt)"
+            }
+        },
+        "10": {
+            "inputs": {
+            "clip_name": "t5-base.safetensors",
+            "type": "stable_audio",
+            "device": "default"
+            },
+            "class_type": "CLIPLoader",
+            "_meta": {
+            "title": "Load CLIP"
+            }
+        },
+        "11": {
+            "inputs": {
+            "seconds": 47.6,
+            "batch_size": 1
+            },
+            "class_type": "EmptyLatentAudio",
+            "_meta": {
+            "title": "EmptyLatentAudio"
+            }
+        },
+        "12": {
+            "inputs": {
+            "samples": [
+                "3",
+                0
+            ],
+            "vae": [
+                "4",
+                2
+            ]
+            },
+            "class_type": "VAEDecodeAudio",
+            "_meta": {
+            "title": "VAEDecodeAudio"
+            }
+        },
+        "13": {
+            "inputs": {
+            "filename_prefix": "audio/ComfyUI",
+            "audioUI": "",
+            "audio": [
+                "12",
+                0
+            ]
+            },
+            "class_type": "SaveAudio",
+            "_meta": {
+            "title": "SaveAudio"
+            }
+        }
+    }
@@ -0,0 +1,107 @@
+{
+      "3": {
+        "inputs": {
+          "seed": 588445435278533,
+          "steps": 20,
+          "cfg": 8,
+          "sampler_name": "euler",
+          "scheduler": "normal",
+          "denoise": 1,
+          "model": [
+            "4",
+            0
+          ],
+          "positive": [
+            "6",
+            0
+          ],
+          "negative": [
+            "7",
+            0
+          ],
+          "latent_image": [
+            "5",
+            0
+          ]
+        },
+        "class_type": "KSampler",
+        "_meta": {
+          "title": "KSampler"
+        }
+      },
+      "4": {
+        "inputs": {
+          "ckpt_name": "v1-5-pruned-emaonly-fp16.safetensors"
+        },
+        "class_type": "CheckpointLoaderSimple",
+        "_meta": {
+          "title": "Load Checkpoint"
+        }
+      },
+      "5": {
+        "inputs": {
+          "width": 512,
+          "height": 512,
+          "batch_size": 1
+        },
+        "class_type": "EmptyLatentImage",
+        "_meta": {
+          "title": "Empty Latent Image"
+        }
+      },
+      "6": {
+        "inputs": {
+          "text": "beautiful scenery nature glass bottle landscape, , purple galaxy bottle,",
+          "clip": [
+            "4",
+            1
+          ]
+        },
+        "class_type": "CLIPTextEncode",
+        "_meta": {
+          "title": "CLIP Text Encode (Prompt)"
+        }
+      },
+      "7": {
+        "inputs": {
+          "text": "text, watermark",
+          "clip": [
+            "4",
+            1
+          ]
+        },
+        "class_type": "CLIPTextEncode",
+        "_meta": {
+          "title": "CLIP Text Encode (Prompt)"
+        }
+      },
+      "8": {
+        "inputs": {
+          "samples": [
+            "3",
+            0
+          ],
+          "vae": [
+            "4",
+            2
+          ]
+        },
+        "class_type": "VAEDecode",
+        "_meta": {
+          "title": "VAE Decode"
+        }
+      },
+      "9": {
+        "inputs": {
+          "filename_prefix": "ComfyUI",
+          "images": [
+            "8",
+            0
+          ]
+        },
+        "class_type": "SaveImage",
+        "_meta": {
+          "title": "Save Image"
+        }
+      }
+    }
@@ -0,0 +1,216 @@
+{
+    "90": {
+        "inputs": {
+            "clip_name": "umt5_xxl_fp8_e4m3fn_scaled.safetensors",
+            "type": "wan",
+            "device": "default"
+        },
+        "class_type": "CLIPLoader",
+        "_meta": {
+            "title": "Load CLIP"
+        }
+    },
+    "91": {
+        "inputs": {
+            "text": "色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走，裸露，NSFW",
+            "clip": [
+                "90",
+                0
+            ]
+        },
+        "class_type": "CLIPTextEncode",
+        "_meta": {
+            "title": "CLIP Text Encode (Negative Prompt)"
+        }
+    },
+    "92": {
+        "inputs": {
+            "vae_name": "wan_2.1_vae.safetensors"
+        },
+        "class_type": "VAELoader",
+        "_meta": {
+            "title": "Load VAE"
+        }
+    },
+    "93": {
+        "inputs": {
+            "shift": 8.000000000000002,
+            "model": [
+                "101",
+                0
+            ]
+        },
+        "class_type": "ModelSamplingSD3",
+        "_meta": {
+            "title": "ModelSamplingSD3"
+        }
+    },
+    "94": {
+        "inputs": {
+            "shift": 8,
+            "model": [
+                "102",
+                0
+            ]
+        },
+        "class_type": "ModelSamplingSD3",
+        "_meta": {
+            "title": "ModelSamplingSD3"
+        }
+    },
+    "95": {
+        "inputs": {
+            "add_noise": "disable",
+            "noise_seed": 0,
+            "steps": 20,
+            "cfg": 3.5,
+            "sampler_name": "euler",
+            "scheduler": "simple",
+            "start_at_step": 10,
+            "end_at_step": 10000,
+            "return_with_leftover_noise": "disable",
+            "model": [
+                "94",
+                0
+            ],
+            "positive": [
+                "99",
+                0
+            ],
+            "negative": [
+                "91",
+                0
+            ],
+            "latent_image": [
+                "96",
+                0
+            ]
+        },
+        "class_type": "KSamplerAdvanced",
+        "_meta": {
+            "title": "KSampler (Advanced)"
+        }
+    },
+    "96": {
+        "inputs": {
+            "add_noise": "enable",
+            "noise_seed": "__RANDOM_INT__",
+            "steps": 20,
+            "cfg": 3.5,
+            "sampler_name": "euler",
+            "scheduler": "simple",
+            "start_at_step": 0,
+            "end_at_step": 10,
+            "return_with_leftover_noise": "enable",
+            "model": [
+                "93",
+                0
+            ],
+            "positive": [
+                "99",
+                0
+            ],
+            "negative": [
+                "91",
+                0
+            ],
+            "latent_image": [
+                "104",
+                0
+            ]
+        },
+        "class_type": "KSamplerAdvanced",
+        "_meta": {
+            "title": "KSampler (Advanced)"
+        }
+    },
+    "97": {
+        "inputs": {
+            "samples": [
+                "95",
+                0
+            ],
+            "vae": [
+                "92",
+                0
+            ]
+        },
+        "class_type": "VAEDecode",
+        "_meta": {
+            "title": "VAE Decode"
+        }
+    },
+    "98": {
+        "inputs": {
+            "filename_prefix": "video/ComfyUI",
+            "format": "auto",
+            "codec": "auto",
+            "video": [
+                "100",
+                0
+            ]
+        },
+        "class_type": "SaveVideo",
+        "_meta": {
+            "title": "Save Video"
+        }
+    },
+    "99": {
+        "inputs": {
+            "text": "Beautiful young European woman with honey blonde hair gracefully turning her head back over shoulder, gentle smile, bright eyes looking at camera. Hair flowing in slow motion as she turns. Soft natural lighting, clean background, cinematic portrait.",
+            "clip": [
+                "90",
+                0
+            ]
+        },
+        "class_type": "CLIPTextEncode",
+        "_meta": {
+            "title": "CLIP Text Encode (Positive Prompt)"
+        }
+    },
+    "100": {
+        "inputs": {
+            "fps": 16,
+            "images": [
+                "97",
+                0
+            ]
+        },
+        "class_type": "CreateVideo",
+        "_meta": {
+            "title": "Create Video"
+        }
+    },
+    "101": {
+        "inputs": {
+            "unet_name": "wan2.2_t2v_high_noise_14B_fp8_scaled.safetensors",
+            "weight_dtype": "default"
+        },
+        "class_type": "UNETLoader",
+        "_meta": {
+            "title": "Load Diffusion Model"
+        }
+    },
+    "102": {
+        "inputs": {
+            "unet_name": "wan2.2_t2v_low_noise_14B_fp8_scaled.safetensors",
+            "weight_dtype": "default"
+        },
+        "class_type": "UNETLoader",
+        "_meta": {
+            "title": "Load Diffusion Model"
+        }
+    },
+    "104": {
+        "inputs": {
+            "width": 640,
+            "height": 640,
+            "length": 81,
+            "batch_size": 1
+        },
+        "class_type": "EmptyHunyuanLatentVideo",
+        "_meta": {
+            "title": "EmptyHunyuanLatentVideo"
+        }
+    }
+}
@@ -19,6 +19,7 @@ MODEL_SERVER_START_LOG_MSG = "To see the GUI go to: "
 MODEL_SERVER_ERROR_LOG_MSGS = [
    "MetadataIncompleteBuffer",  # This error is emitted when the downloaded model is corrupted
    "Value not in list: ",  # This error is emitted when the model file is not there at all
+    "[ERROR] Provisioning Script failed", # Error inserted by provisioning script if models/nodes fail to download
 ]


@@ -119,14 +119,25 @@ class GenericHandler(EndpointHandler[GenericData], ABC):
 class CompletionsData(GenericData):
    @classmethod
    def for_test(cls) -> "CompletionsData":
-        prompt = " ".join(random.choices(WORD_LIST, k=int(250)))
+        system_prompt = """You are a helpful AI assistant. You have access to the following knowledge base:
+    
+        Zebras (US: /ˈziːbrəz/, UK: /ˈzɛbrəz, ˈziː-/)[2] (subgenus Hippotigris) are African equines 
+        with distinctive black-and-white striped coats. There are three living species: Grévy's zebra 
+        (Equus grevyi), the plains zebra (E. quagga), and the mountain zebra (E. zebra). Zebras share the 
+        genus Equus with horses and asses, the three groups being the only living members of the family 
+        Equidae. Zebra stripes come in different patterns, unique to each individual. Zebras inhabit eastern 
+        and southern Africa and can be found in a variety of habitats such as savannahs, grasslands, 
+        woodlands, shrublands, and mountainous areas.
+        
+        Please answer the following question based on the above context."""
+        unique_question = " ".join(random.choices(WORD_LIST, k=int(100)))
        model = os.environ.get("MODEL_NAME")
        if not model:
            raise ValueError("MODEL_NAME environment variable not set")

        test_input = {
            "model": model,
-            "prompt": prompt,
+            "prompt": f"{system_prompt}\n\n{unique_question}",
            "temperature": 0.7,
            "max_tokens": 500,
        }
@@ -153,7 +164,18 @@ class ChatCompletionsData(GenericData):

    @classmethod
    def for_test(cls) -> "ChatCompletionsData":
-        prompt = " ".join(random.choices(WORD_LIST, k=int(250)))
+        system_prompt = """You are a helpful AI assistant. You have access to the following knowledge base:
+    
+        Zebras (US: /ˈziːbrəz/, UK: /ˈzɛbrəz, ˈziː-/)[2] (subgenus Hippotigris) are African equines 
+        with distinctive black-and-white striped coats. There are three living species: Grévy's zebra 
+        (Equus grevyi), the plains zebra (E. quagga), and the mountain zebra (E. zebra). Zebras share the 
+        genus Equus with horses and asses, the three groups being the only living members of the family 
+        Equidae. Zebra stripes come in different patterns, unique to each individual. Zebras inhabit eastern 
+        and southern Africa and can be found in a variety of habitats such as savannahs, grasslands, 
+        woodlands, shrublands, and mountainous areas.
+        
+        Please answer the following question based on the above context."""
+        unique_question = " ".join(random.choices(WORD_LIST, k=int(100)))
        model = os.environ.get("MODEL_NAME")
        if not model:
            raise ValueError("MODEL_NAME environment variable not set")
@@ -161,7 +183,10 @@ class ChatCompletionsData(GenericData):
        # Chat completions use messages format instead of prompt
        test_input = {
            "model": model,
-            "messages": [{"role": "user", "content": prompt}],
+            "messages": [
+                {"role": "system", "content": system_prompt},  # Shared prefix
+                {"role": "user", "content": unique_question}   # Unique per request
+            ],
            "temperature": 0.7,
            "max_tokens": 500,
        }
@@ -82,6 +82,7 @@ def do_one(endpoint_name: str,
        # 1) Check if we got a worker back from route
        worker_url = msg.get("url", "")
        if not worker_url:
+            status = msg.get("status", "")
            m = re.search(r"total workers:\s*(\d+).*loading workers:\s*(\d+).*standby workers:\s*(\d+).*error workers:\s*(\d+)", status, re.I | re.S)
            if m:
                tot, loading, standby, err = map(int, m.groups())
Author	SHA1	Message	Date
Abiola Akinnubi	b03645d145	Added model type environment variable so we can actually attempt to benchmark with the right payload.	2025-10-29 23:25:44 -07:00
edgaratvast	02c8307af7	remove redis pubsub from pyworker (#53 ) Co-authored-by: Edgar Lin <edgarlin2000@gmail.com>	2025-10-29 17:07:56 -07:00
LucasArmandVast	9f5a432513	Merge pull request #51 from vast-ai/delete-reqs-hotfix Redis subscriber queue patch	2025-10-28 16:07:28 -07:00
Lucas Armand	e09f1fa953	patch for redis queue	2025-10-28 16:03:50 -07:00
edgaratvast	ba6f1c2e4b	Fix signature (#50 ) * change order of fields in auth_data to match autoscaler for signature verification * also ignore __request_id * Revert "change order of fields in auth_data to match autoscaler for signature verification" so that it's alphabetical again This reverts commit `b8223879c9`. * enforce alphabetical json dumping of message for signature verification --------- Co-authored-by: Edgar Lin <edgarlin2000@gmail.com>	2025-10-28 16:01:32 -07:00
edgaratvast	298590fb88	Merge pull request #45 from vast-ai/new-pyworker New PyWorker	2025-10-28 14:02:53 -07:00
Lucas Armand	814c3acd4c	remove unused code	2025-10-28 13:43:57 -07:00
Lucas Armand	22bca74087	Prevent load time race	2025-10-27 18:25:21 -07:00
Lucas Armand	9c795e2a01	removed bad code	2025-10-27 17:03:13 -07:00
Lucas Armand	830b532781	Trying unified delete	2025-10-27 16:57:52 -07:00
LucasArmandVast	d6a6e34c6b	Merge branch 'main' into new-pyworker	2025-10-27 12:43:49 -07:00
Colter-Downing	ac1e109c48	Merge pull request #47 from vast-ai/new-pyworker-vllm-prefix-cache vLLM Prefix caching, benchmark bug fix, test load script	2025-10-27 12:30:34 -07:00
Colter Downing	d6eb498ee4	catch the case where all benchmarks fail (sets error)	2025-10-27 12:01:55 -07:00
Colter Downing	bcecd6df40	Suppress matplot debug logs	2025-10-25 16:18:02 -07:00
Lucas Armand	4d9bf2048c	Fix	2025-10-24 15:44:38 -07:00
Lucas Armand	7788bc4a62	Added some debug logs	2025-10-24 15:41:00 -07:00
Rob Ballantyne	70d51bafe1	Merge pull request #36 from robballantyne/feat/comfyui-json-benchmark-workflow-from-file	2025-10-23 17:05:48 +01:00
Rob Ballantyne	63909736bb	Merge pull request #4 from robballantyne/feat/comfyui-json-benchmark-workflow-from-file-no-silent-fail Feat/comfyui json benchmark workflow from file no silent fail	2025-10-23 17:02:12 +01:00
Rob Ballantyne	f4f7080df1	Re-add comment	2025-10-23 17:00:28 +01:00
Rob Ballantyne	d51a338e8f	log when benchmark file not used	2025-10-23 16:41:02 +01:00
Rob Ballantyne	92a04bd7af	No silent fail if benchmark file is missing	2025-10-23 13:41:03 +01:00
LucasArmandVast	c98d661513	Merge pull request #39 from vast-ai/remove-time-divide PyWorker fixes for cur_load and acks bug	2025-10-13 10:06:22 -07:00
Lucas Armand	f6fd1c6ac1	merge	2025-10-09 18:15:55 -07:00
Lucas Armand	055e346c8c	Send metrics on request start	2025-10-09 10:13:50 -07:00
Lucas Armand	1cedb28acf	Removed division by elapsed time, since autoscaler cur_load in units of workload	2025-10-08 16:54:18 -07:00
Rob Ballantyne	ec25dda3ad	Merge branch 'vast-ai:main' into feat/comfyui-json-benchmark-workflow-from-file	2025-10-08 14:49:32 +01:00
Colter-Downing	0397af719d	Merge pull request #37 from robballantyne/bugfix/healthcheck-endpoint Fix healthcheck endpoint URL Tested and merged by Colter	2025-10-06 15:11:27 -07:00
Rob Ballantyne	3786cf978d	Add awareness of errors thrown by the provisioning script	2025-10-05 23:14:59 +01:00
Rob Ballantyne	a86d4bcf9c	Import json	2025-10-05 23:05:33 +01:00
Rob Ballantyne	e9b6a14a5e	Import Path	2025-10-05 22:59:19 +01:00
Rob Ballantyne	cadac033e1	Enables use of custom workflow for benchmarking Retains existing method is misc/benchmark.json is nopt present	2025-10-05 22:53:22 +01:00