diff --git a/workers/comfyui-json/README.md b/workers/comfyui-json/README.md index 472b1d1..8d8097a 100644 --- a/workers/comfyui-json/README.md +++ b/workers/comfyui-json/README.md @@ -2,6 +2,28 @@ This is the base PyWorker for ComfyUI. It provides a unified interface for running any ComfyUI workflow through a proxy-based architecture. +The cost for each request has a static value of `1`. ComfyUI does not handle concurrent workloads and there is no current provision to load multiple instances of ComfyUI per worker node. + +## Requirements + +This worker requires both [ComfyUI](https://github.com/comfyanonymous/ComfyUI) and [ComfyUI API Wrapper](https://github.com/ai-dock/comfyui-api-wrapper). + +A docker image is provided but you may use any if the above requirements are met. + +## Benchmarking + +A simple image generation benchmark is run when the worker is first loaded. + +This benchmark uses SD v1.5 in the default text to image workflow provided by ComfyUI. The following variables can be used to alter the complexity and running time of the benchmark: + +| Environment Variable | Default Value | +| -------------------- | ------------- | +| BENCHMARK_TEST_WIDTH | 512 | +| BENCHMARK_TEST_HEIGHT | 512 | +| BENCHMARK_TEST_STEPS | 20 | + +The prompt will be randomly selected from the file in misc/test_prompts.txt and a random seed used for every run of the benchmark. + ## Endpoint The worker provides a single endpoint: @@ -27,8 +49,7 @@ The worker accepts requests in the following format. Choose either modifier mode }, "s3": { ... }, // optional "webhook": { ... } // optional - }, - "expected_time": 30.0 + } } ``` @@ -42,8 +63,7 @@ The worker accepts requests in the following format. Choose either modifier mode }, "s3": { ... }, // optional "webhook": { ... } // optional - }, - "expected_time": 30.0 + } } ``` @@ -53,7 +73,6 @@ The worker accepts requests in the following format. Choose either modifier mode - **`input`**: Contains the main workflow data - **`input.request_id`**: Unique identifier for the request -- **`expected_time`**: Expected runtime in seconds on RTX4090 (defaults to 46.0 if not provided) ### Workflow Mode (Choose One) @@ -128,8 +147,7 @@ WEBHOOK_TIMEOUT=30 # Webhook timeout in seconds "steps": 20, "seed": 42 } - }, - "expected_time": 25.0 + } } ``` @@ -156,20 +174,10 @@ WEBHOOK_TIMEOUT=30 # Webhook timeout in seconds "class_type": "KSampler" } } - }, - "expected_time": 45.0 + } } ``` -## Expected Time Guidelines - -The `expected_time` field helps with resource planning and should reflect expected runtime on RTX4090: - -- **Simple text-to-image**: 15-30 seconds -- **Complex workflows with upscaling**: 60+ seconds -- **Video generation**: 180+ seconds -- **Default**: 46 seconds (if not specified) - ## Client Libraries See the test client examples for implementation details on how to integrate with the ComfyUI worker. diff --git a/workers/comfyui-json/data_types.py b/workers/comfyui-json/data_types.py index 5140d14..e73163d 100644 --- a/workers/comfyui-json/data_types.py +++ b/workers/comfyui-json/data_types.py @@ -1,8 +1,7 @@ +import os import sys -import json import random import dataclasses -import inspect from typing import Dict, Any from functools import cache from math import ceil @@ -13,27 +12,33 @@ from lib.data_types import ApiPayload, JsonDataException with open("workers/comfyui/misc/test_prompts.txt", "r") as f: test_prompts = f.readlines() +def count_workload() -> float: + # Always 1.0 where there is a single instance of ComfyUI handling requests + return 1.0 + @dataclasses.dataclass class ComfyWorkflowData(ApiPayload): input: dict - expected_time: float = 46.0 # Default: 2x baseline (23s * 2) for RTX4090 @classmethod def for_test(cls): + """ + Use the variables available to simulate workflows of the required running time + Example: SD1.5, simple image gen 10000 steps, 512px x 512px will run for approximately 9 minutes @ ~18 it/s (RTX 4090) + """ test_prompt = random.choice(test_prompts).rstrip() return cls( input={ - "request_id": f"test-{random.randint(1000, 9999)}", + "request_id": f"test-{random.randint(1000, 99999)}", "modifier": "Text2Image", "modifications": { "prompt": test_prompt, - "width": 1024, - "height": 1024, - "steps": 28, + "width": os.getenv('BENCHMARK_TEST_WIDTH', 512), + "height": os.getenv('BENCHMARK_TEST_HEIGHT', 512), + "steps": os.getenv('BENCHMARK_TEST_STEPS', 20), "seed": random.randint(0, sys.maxsize), } - }, - expected_time=25.0 # Test data: expect 25 seconds on RTX4090 (slightly above baseline) + } ) def generate_payload_json(self) -> Dict[str, Any]: @@ -41,31 +46,7 @@ class ComfyWorkflowData(ApiPayload): return {"input": self.input} def count_workload(self) -> float: - """ - This needs review. We cannot reasonably predict the workload based on the inputs. We may be processing: - - Images - - Videos - - Audio... There may also be complex loops in the workflow. - - User will provide an expected time to complete and we will calculate equivalent cost - - Convert user-provided expected_time (RTX4090 seconds) to the old scoring system. - - The old system normalized to: 1024x1024, 28 steps = 200 tokens on RTX4090 - The old formula was: REQUEST_TIME_FOR_STANDARD_IMAGE * (time_ratio * 200) - - Now the user provides the expected request time directly. - Default expected_time is 46s (2x baseline) if not specified. - """ - # Baseline: standard image (1024x1024, 28 steps) = 23s = 200 tokens on RTX4090 - RTX4090_BASELINE_TIME = 23.0 # seconds for standard image on RTX4090 - BASELINE_TOKENS = 200 # tokens for standard image - - # Calculate time ratio compared to baseline - time_ratio = self.expected_time / RTX4090_BASELINE_TIME - - # Return workload score: time_ratio * baseline tokens - return time_ratio * BASELINE_TOKENS + return count_workload() @classmethod def from_json_msg(cls, json_msg: Dict[str, Any]) -> "ComfyWorkflowData": @@ -73,10 +54,6 @@ class ComfyWorkflowData(ApiPayload): if "input" not in json_msg: raise JsonDataException({"input": "missing parameter"}) - # expected_time is optional, uses default if not provided - expected_time = json_msg.get("expected_time", 46.0) # Default: 2x baseline - return cls( - input=json_msg["input"], - expected_time=float(expected_time) + input=json_msg["input"] ) \ No newline at end of file