Added model type environment variable so we can actually attempt to benchmark with the right payload.

remove redis pubsub from pyworker (#53 )
Co-authored-by: Edgar Lin <edgarlin2000@gmail.com>
2025-10-29 23:25:44 -07:00 · 2025-10-29 17:07:56 -07:00 · 2025-10-28 16:07:28 -07:00 · 2025-10-28 16:03:50 -07:00 · 2025-10-28 16:01:32 -07:00 · 2025-10-28 14:02:53 -07:00
8 changed files with 490 additions and 84 deletions
@@ -5,7 +5,7 @@ import base64
 import subprocess
 import dataclasses
 import logging
-from asyncio import wait, sleep, gather, Semaphore, FIRST_COMPLETED, create_task, get_running_loop
+from asyncio import wait, sleep, gather, Semaphore, FIRST_COMPLETED, create_task
 from typing import Tuple, Awaitable, NoReturn, List, Union, Callable, Optional
 from functools import cached_property
 from distutils.util import strtobool
@@ -47,7 +47,7 @@ class Backend:
    This class is responsible for:
    1. Tailing logs and updating load time metrics
    2. Taking an EndpointHandler alongside incoming payload, preparing a json to be sent to the model, and
-       sending the request. It also updates metrics as it makes those requests.
+    sending the request. It also updates metrics as it makes those requests.
    3. Running a benchmark from an EndpointHandler
    """

@@ -74,11 +74,6 @@ class Backend:
        self._pubkey = self._fetch_pubkey()
        self.__start_healthcheck: bool = False

-        # NEW: FIFO queue + worker count
-        self.request_queue: "asyncio.Queue[tuple[EndpointHandler[ApiPayload_T], web.Request, asyncio.Future]]" = asyncio.Queue()
-        # If parallel allowed, let multiple workers drain the queue (order preserved by FIFO per worker; overall start order is FIFO).
-        self._num_workers: int = 1 if not self.allow_parallel_requests else int(os.environ.get("WORKERS", "4"))
-
    @property
    def pubkey(self) -> Optional[RSA.RsaKey]:
        if self._pubkey is None:
@@ -96,22 +91,6 @@ class Backend:
        timeout = ClientTimeout(total=None)
        return ClientSession(self.model_server_url, timeout=timeout, connector=connector)

-    async def _worker(self):
-        while True:
-            handler, request, fut = await self.request_queue.get()
-            try:
-                # Skip if already cancelled while waiting in the queue
-                if fut.cancelled():
-                    continue
-                res = await self.__process_enqueued_request(handler, request)
-                if not fut.cancelled():
-                    fut.set_result(res)
-            except Exception as e:
-                if not fut.cancelled():
-                    fut.set_exception(e)
-            finally:
-                self.request_queue.task_done()
-
    def create_handler(
        self,
        handler: EndpointHandler[ApiPayload_T],
@@ -148,36 +127,7 @@ class Backend:
        handler: EndpointHandler[ApiPayload_T],
        request: web.Request,
    ) -> Union[web.Response, web.StreamResponse]:
-        """use this function to enqueue requests for FIFO processing"""
-        loop = get_running_loop()
-        fut: asyncio.Future = loop.create_future()
-
-        # If the client disconnects while waiting in the FIFO, cancel the future so the worker skips it
-        cancel_watch = create_task(request.wait_for_disconnection())
-        def _cancel_if_disconnected(_):
-            if not fut.done():
-                fut.cancel()
-        cancel_watch.add_done_callback(_cancel_if_disconnected)
-
-        try:
-            await self.request_queue.put((handler, request, fut))
-            return await fut
-        except asyncio.CancelledError:
-            # Propagate cancellation to ensure aiohttp doesn't expect a response body
-            raise
-        finally:
-            # Best-effort cleanup of the watcher
-            cancel_watch.cancel()
-
-    async def __process_enqueued_request(
-        self,
-        handler: EndpointHandler[ApiPayload_T],
-        request: web.Request,
-    ) -> Union[web.Response, web.StreamResponse]:
-        """
-        This contains the original __handle_request logic and is invoked by workers,
-        ensuring FIFO execution via asyncio.Queue.
-        """
+        """use this function to forward requests to the model endpoint"""
        try:
            data = await request.json()
            auth_data, payload = handler.get_data_from_request(data)
@@ -185,11 +135,8 @@ class Backend:
            return web.json_response(data=e.message, status=422)
        except json.JSONDecodeError:
            return web.json_response(dict(error="invalid JSON"), status=422)
-
        workload = payload.count_workload()
-        request_metrics: RequestMetrics = RequestMetrics(
-            request_idx=auth_data.request_idx, reqnum=auth_data.reqnum, workload=workload, status="Created"
-        )
+        request_metrics: RequestMetrics = RequestMetrics(request_idx=auth_data.request_idx, reqnum=auth_data.reqnum, workload=workload, status="Created")

        async def cancel_api_call_if_disconnected() -> web.Response:
            await request.wait_for_disconnection()
@@ -230,8 +177,6 @@ class Backend:
        acquired = False
        try:
            self.metrics._request_start(request_metrics)
-
-            # Preserve existing semaphore behavior for serializing requests when requested
            if self.allow_parallel_requests is False:
                log.debug(f"Waiting to aquire Sem for reqnum:{request_metrics.reqnum}")
                await self.sem.acquire()
@@ -241,7 +186,6 @@ class Backend:
                )
            else:
                log.debug(f"Starting request for reqnum:{request_metrics.reqnum}")
-
            done, pending = await wait(
                [
                    create_task(make_request()),
@@ -309,14 +253,8 @@ class Backend:
                self.backend_errored(str(e))

    async def _start_tracking(self) -> None:
-        # Start the FIFO workers alongside existing loops
-        worker_tasks = tuple(self._worker() for _ in range(self._num_workers))
        await gather(
-            self.__read_logs(),
-            self.metrics._send_metrics_loop(),
-            self.__healthcheck(),
-            self.metrics._send_delete_requests_loop(),
-            *worker_tasks,
+            self.__read_logs(), self.metrics._send_metrics_loop(), self.__healthcheck(), self.metrics._send_delete_requests_loop()
        )

    def backend_errored(self, msg: str) -> None:
@@ -348,7 +286,7 @@ class Backend:
        message = {
            key: value
            for (key, value) in (dataclasses.asdict(auth_data).items())
-            if key != "signature"
+            if key != "signature" and key != "__request_id"
        }
        if auth_data.reqnum < (self.reqnum - MSG_HISTORY_LEN):
            log.debug(
@@ -358,7 +296,7 @@ class Backend:
        elif message in self.msg_history:
            log.debug(f"message: {message} already in message history")
            return False
-        elif verify_signature(json.dumps(message, indent=4), auth_data.signature):
+        elif verify_signature(json.dumps(message, indent=4, sort_keys=True), auth_data.signature):
            self.reqnum = max(auth_data.reqnum, self.reqnum)
            self.msg_history.append(message)
            self.msg_history = self.msg_history[-MSG_HISTORY_LEN:]
@@ -65,12 +65,12 @@ class ApiPayload(ABC):
 class AuthData:
    """data used to authenticate requester"""

-    signature: str
    cost: str
    endpoint: str
    reqnum: int
-    url: str
    request_idx: int
+    signature: str
+    url: str

    @classmethod
    def from_json_msg(cls, json_msg: Dict[str, Any]):
@@ -190,11 +190,12 @@ class SystemMetrics:
        self.additional_disk_usage = disk_usage - self.last_disk_usage
        self.last_disk_usage = disk_usage

-    def reset(self):
+    def reset(self, expected: float | None) -> None:
        # autoscaler excepts model_loading_time to be populated only once, when the instance has
        # finished benchmarking and is ready to receive requests. This applies to restarted instances
        # as well: they should send model_loading_time once when they are done loading
-        self.model_loading_time = None
+        if self.model_loading_time == expected:
+            self.model_loading_time = None


@dataclass
@@ -180,6 +180,10 @@ class Metrics:
            return  # nothing to do

        for report_addr in self.report_addr:
+            # TODO: Add a Redis subscriber queue for delete_requests
+            if report_addr == "https://cloud.vast.ai/api/v0":
+                # Patch: ignore the Redis API report_addr
+                continue
            sent_success = True
            sent_failed  = True

@@ -200,11 +204,13 @@ class Metrics:

    async def __send_metrics_and_reset(self):

+        loadtime_snapshot = self.system_metrics.model_loading_time
+
        def compute_autoscaler_data() -> AutoScalerData:
            return AutoScalerData(
                id=self.id,
                version=self.version,
-                loadtime=(self.system_metrics.model_loading_time or 0.0),
+                loadtime=(loadtime_snapshot or 0.0), 
                new_load=self.model_metrics.workload_processing,
                cur_load=self.model_metrics.cur_load,
                rej_load=self.model_metrics.workload_rejected,
@@ -252,11 +258,15 @@ class Metrics:

        self.system_metrics.update_disk_usage()

+        sent = False
        for report_addr in self.report_addr:
-            success = await send_data(report_addr)
-            if success is True:
+            if await send_data(report_addr):
+                sent = True
                break
-        self.update_pending = False
-        self.model_metrics.reset()
-        self.system_metrics.reset()
-        self.last_metric_update = time.time()
+
+        if sent:
+            # clear the one-shot loadtime only if we actually sent *this* value
+            self.system_metrics.reset(expected=loadtime_snapshot)
+            self.update_pending = False
+            self.model_metrics.reset()
+            self.last_metric_update = time.time()
@@ -9,9 +9,10 @@ ENV_PATH="$WORKSPACE_DIR/worker-env"
 DEBUG_LOG="$WORKSPACE_DIR/debug.log"
 PYWORKER_LOG="$WORKSPACE_DIR/pyworker.log"

-REPORT_ADDR="${REPORT_ADDR:-https://cloud.vast.ai/api/v0,https://run.vast.ai}"
+REPORT_ADDR="${REPORT_ADDR:-https://run.vast.ai}"
 USE_SSL="${USE_SSL:-true}"
 WORKER_PORT="${WORKER_PORT:-3000}"
+MODEL_TYPE="${MODEL_TYPE:-image}"
 mkdir -p "$WORKSPACE_DIR"
 cd "$WORKSPACE_DIR"

@@ -1,3 +1,5 @@
+import dataclasses
+from enum import Enum
 import os
 import sys
 import random
@@ -13,6 +15,12 @@ from lib.data_types import ApiPayload, JsonDataException

 log = logging.getLogger(__file__)

+class ModelType(Enum):
+    image = "image"
+    audio = "audio"
+    video = "video"
+
+
 def count_workload() -> float:
    # Always 100.0 where there is a single instance of ComfyUI handling requests
    # Results will indicate % or a job completed per second.  Avoids sub 0.1 sec performance indication
@@ -21,6 +29,11 @@ def count_workload() -> float:
@dataclasses.dataclass
 class ComfyWorkflowData(ApiPayload):
    input: dict
+    model_type: ModelType = dataclasses.field(
+        default_factory=lambda: ModelType(
+            os.environ.get("MODEL_TYPE", "image").lower()
+        )
+    )

    @classmethod
    def for_test(cls):
@@ -30,15 +43,17 @@ class ComfyWorkflowData(ApiPayload):
        Example: SD1.5, simple image gen 10000 steps, 512px x 512px will run for approximately 9 minutes @ ~18 it/s (RTX 4090)
        """
        # Try to load benchmark.json
-        benchmark_file = Path("workers/comfyui-json/misc/benchmark.json")
-        
+        #Note:  We should cross check with Rob if the audio sample benchmark file is correct
+        model_type = ModelType(os.environ.get("MODEL_TYPE", "image").lower())
+        benchmark_file = Path(f"workers/comfyui-json/misc/benchmark_{model_type.value}.json")
        if benchmark_file.exists():
            try:
                with open(benchmark_file, "r") as f:
                    benchmark_workflow = json.load(f)
+                log.info(f"using benchmark json file for {model_type.value}")
                return cls(
                    input={
-                        "request_id": f"test-{random.randint(1000, 99999)}",
+                        "request_id": f"{model_type.value}-{random.randint(1000, 99999)}",
                        "workflow_json": benchmark_workflow
                    }
                )
@@ -0,0 +1,118 @@
+{
+        "3": {
+            "inputs": {
+            "seed": 98942092715729,
+            "steps": 50,
+            "cfg": 4.98,
+            "sampler_name": "dpmpp_3m_sde_gpu",
+            "scheduler": "exponential",
+            "denoise": 1,
+            "model": [
+                "4",
+                0
+            ],
+            "positive": [
+                "6",
+                0
+            ],
+            "negative": [
+                "7",
+                0
+            ],
+            "latent_image": [
+                "11",
+                0
+            ]
+            },
+            "class_type": "KSampler",
+            "_meta": {
+            "title": "KSampler"
+            }
+        },
+        "4": {
+            "inputs": {
+            "ckpt_name": "stable-audio-open-1.0.safetensors"
+            },
+            "class_type": "CheckpointLoaderSimple",
+            "_meta": {
+            "title": "Load Checkpoint"
+            }
+        },
+        "6": {
+            "inputs": {
+            "text": "heaven church electronic dance music",
+            "clip": [
+                "10",
+                0
+            ]
+            },
+            "class_type": "CLIPTextEncode",
+            "_meta": {
+            "title": "CLIP Text Encode (Prompt)"
+            }
+        },
+        "7": {
+            "inputs": {
+            "text": "",
+            "clip": [
+                "10",
+                0
+            ]
+            },
+            "class_type": "CLIPTextEncode",
+            "_meta": {
+            "title": "CLIP Text Encode (Prompt)"
+            }
+        },
+        "10": {
+            "inputs": {
+            "clip_name": "t5-base.safetensors",
+            "type": "stable_audio",
+            "device": "default"
+            },
+            "class_type": "CLIPLoader",
+            "_meta": {
+            "title": "Load CLIP"
+            }
+        },
+        "11": {
+            "inputs": {
+            "seconds": 47.6,
+            "batch_size": 1
+            },
+            "class_type": "EmptyLatentAudio",
+            "_meta": {
+            "title": "EmptyLatentAudio"
+            }
+        },
+        "12": {
+            "inputs": {
+            "samples": [
+                "3",
+                0
+            ],
+            "vae": [
+                "4",
+                2
+            ]
+            },
+            "class_type": "VAEDecodeAudio",
+            "_meta": {
+            "title": "VAEDecodeAudio"
+            }
+        },
+        "13": {
+            "inputs": {
+            "filename_prefix": "audio/ComfyUI",
+            "audioUI": "",
+            "audio": [
+                "12",
+                0
+            ]
+            },
+            "class_type": "SaveAudio",
+            "_meta": {
+            "title": "SaveAudio"
+            }
+        }
+    }
@@ -0,0 +1,107 @@
+{
+      "3": {
+        "inputs": {
+          "seed": 588445435278533,
+          "steps": 20,
+          "cfg": 8,
+          "sampler_name": "euler",
+          "scheduler": "normal",
+          "denoise": 1,
+          "model": [
+            "4",
+            0
+          ],
+          "positive": [
+            "6",
+            0
+          ],
+          "negative": [
+            "7",
+            0
+          ],
+          "latent_image": [
+            "5",
+            0
+          ]
+        },
+        "class_type": "KSampler",
+        "_meta": {
+          "title": "KSampler"
+        }
+      },
+      "4": {
+        "inputs": {
+          "ckpt_name": "v1-5-pruned-emaonly-fp16.safetensors"
+        },
+        "class_type": "CheckpointLoaderSimple",
+        "_meta": {
+          "title": "Load Checkpoint"
+        }
+      },
+      "5": {
+        "inputs": {
+          "width": 512,
+          "height": 512,
+          "batch_size": 1
+        },
+        "class_type": "EmptyLatentImage",
+        "_meta": {
+          "title": "Empty Latent Image"
+        }
+      },
+      "6": {
+        "inputs": {
+          "text": "beautiful scenery nature glass bottle landscape, , purple galaxy bottle,",
+          "clip": [
+            "4",
+            1
+          ]
+        },
+        "class_type": "CLIPTextEncode",
+        "_meta": {
+          "title": "CLIP Text Encode (Prompt)"
+        }
+      },
+      "7": {
+        "inputs": {
+          "text": "text, watermark",
+          "clip": [
+            "4",
+            1
+          ]
+        },
+        "class_type": "CLIPTextEncode",
+        "_meta": {
+          "title": "CLIP Text Encode (Prompt)"
+        }
+      },
+      "8": {
+        "inputs": {
+          "samples": [
+            "3",
+            0
+          ],
+          "vae": [
+            "4",
+            2
+          ]
+        },
+        "class_type": "VAEDecode",
+        "_meta": {
+          "title": "VAE Decode"
+        }
+      },
+      "9": {
+        "inputs": {
+          "filename_prefix": "ComfyUI",
+          "images": [
+            "8",
+            0
+          ]
+        },
+        "class_type": "SaveImage",
+        "_meta": {
+          "title": "Save Image"
+        }
+      }
+    }
@@ -0,0 +1,216 @@
+{
+    "90": {
+        "inputs": {
+            "clip_name": "umt5_xxl_fp8_e4m3fn_scaled.safetensors",
+            "type": "wan",
+            "device": "default"
+        },
+        "class_type": "CLIPLoader",
+        "_meta": {
+            "title": "Load CLIP"
+        }
+    },
+    "91": {
+        "inputs": {
+            "text": "色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走，裸露，NSFW",
+            "clip": [
+                "90",
+                0
+            ]
+        },
+        "class_type": "CLIPTextEncode",
+        "_meta": {
+            "title": "CLIP Text Encode (Negative Prompt)"
+        }
+    },
+    "92": {
+        "inputs": {
+            "vae_name": "wan_2.1_vae.safetensors"
+        },
+        "class_type": "VAELoader",
+        "_meta": {
+            "title": "Load VAE"
+        }
+    },
+    "93": {
+        "inputs": {
+            "shift": 8.000000000000002,
+            "model": [
+                "101",
+                0
+            ]
+        },
+        "class_type": "ModelSamplingSD3",
+        "_meta": {
+            "title": "ModelSamplingSD3"
+        }
+    },
+    "94": {
+        "inputs": {
+            "shift": 8,
+            "model": [
+                "102",
+                0
+            ]
+        },
+        "class_type": "ModelSamplingSD3",
+        "_meta": {
+            "title": "ModelSamplingSD3"
+        }
+    },
+    "95": {
+        "inputs": {
+            "add_noise": "disable",
+            "noise_seed": 0,
+            "steps": 20,
+            "cfg": 3.5,
+            "sampler_name": "euler",
+            "scheduler": "simple",
+            "start_at_step": 10,
+            "end_at_step": 10000,
+            "return_with_leftover_noise": "disable",
+            "model": [
+                "94",
+                0
+            ],
+            "positive": [
+                "99",
+                0
+            ],
+            "negative": [
+                "91",
+                0
+            ],
+            "latent_image": [
+                "96",
+                0
+            ]
+        },
+        "class_type": "KSamplerAdvanced",
+        "_meta": {
+            "title": "KSampler (Advanced)"
+        }
+    },
+    "96": {
+        "inputs": {
+            "add_noise": "enable",
+            "noise_seed": "__RANDOM_INT__",
+            "steps": 20,
+            "cfg": 3.5,
+            "sampler_name": "euler",
+            "scheduler": "simple",
+            "start_at_step": 0,
+            "end_at_step": 10,
+            "return_with_leftover_noise": "enable",
+            "model": [
+                "93",
+                0
+            ],
+            "positive": [
+                "99",
+                0
+            ],
+            "negative": [
+                "91",
+                0
+            ],
+            "latent_image": [
+                "104",
+                0
+            ]
+        },
+        "class_type": "KSamplerAdvanced",
+        "_meta": {
+            "title": "KSampler (Advanced)"
+        }
+    },
+    "97": {
+        "inputs": {
+            "samples": [
+                "95",
+                0
+            ],
+            "vae": [
+                "92",
+                0
+            ]
+        },
+        "class_type": "VAEDecode",
+        "_meta": {
+            "title": "VAE Decode"
+        }
+    },
+    "98": {
+        "inputs": {
+            "filename_prefix": "video/ComfyUI",
+            "format": "auto",
+            "codec": "auto",
+            "video": [
+                "100",
+                0
+            ]
+        },
+        "class_type": "SaveVideo",
+        "_meta": {
+            "title": "Save Video"
+        }
+    },
+    "99": {
+        "inputs": {
+            "text": "Beautiful young European woman with honey blonde hair gracefully turning her head back over shoulder, gentle smile, bright eyes looking at camera. Hair flowing in slow motion as she turns. Soft natural lighting, clean background, cinematic portrait.",
+            "clip": [
+                "90",
+                0
+            ]
+        },
+        "class_type": "CLIPTextEncode",
+        "_meta": {
+            "title": "CLIP Text Encode (Positive Prompt)"
+        }
+    },
+    "100": {
+        "inputs": {
+            "fps": 16,
+            "images": [
+                "97",
+                0
+            ]
+        },
+        "class_type": "CreateVideo",
+        "_meta": {
+            "title": "Create Video"
+        }
+    },
+    "101": {
+        "inputs": {
+            "unet_name": "wan2.2_t2v_high_noise_14B_fp8_scaled.safetensors",
+            "weight_dtype": "default"
+        },
+        "class_type": "UNETLoader",
+        "_meta": {
+            "title": "Load Diffusion Model"
+        }
+    },
+    "102": {
+        "inputs": {
+            "unet_name": "wan2.2_t2v_low_noise_14B_fp8_scaled.safetensors",
+            "weight_dtype": "default"
+        },
+        "class_type": "UNETLoader",
+        "_meta": {
+            "title": "Load Diffusion Model"
+        }
+    },
+    "104": {
+        "inputs": {
+            "width": 640,
+            "height": 640,
+            "length": 81,
+            "batch_size": 1
+        },
+        "class_type": "EmptyHunyuanLatentVideo",
+        "_meta": {
+            "title": "EmptyHunyuanLatentVideo"
+        }
+    }
+}
Author	SHA1	Message	Date
Abiola Akinnubi	b03645d145	Added model type environment variable so we can actually attempt to benchmark with the right payload.	2025-10-29 23:25:44 -07:00
edgaratvast	02c8307af7	remove redis pubsub from pyworker (#53 ) Co-authored-by: Edgar Lin <edgarlin2000@gmail.com>	2025-10-29 17:07:56 -07:00
LucasArmandVast	9f5a432513	Merge pull request #51 from vast-ai/delete-reqs-hotfix Redis subscriber queue patch	2025-10-28 16:07:28 -07:00
Lucas Armand	e09f1fa953	patch for redis queue	2025-10-28 16:03:50 -07:00
edgaratvast	ba6f1c2e4b	Fix signature (#50 ) * change order of fields in auth_data to match autoscaler for signature verification * also ignore __request_id * Revert "change order of fields in auth_data to match autoscaler for signature verification" so that it's alphabetical again This reverts commit `b8223879c9`. * enforce alphabetical json dumping of message for signature verification --------- Co-authored-by: Edgar Lin <edgarlin2000@gmail.com>	2025-10-28 16:01:32 -07:00
edgaratvast	298590fb88	Merge pull request #45 from vast-ai/new-pyworker New PyWorker	2025-10-28 14:02:53 -07:00
Lucas Armand	814c3acd4c	remove unused code	2025-10-28 13:43:57 -07:00
Lucas Armand	22bca74087	Prevent load time race	2025-10-27 18:25:21 -07:00