This commit is contained in:
Lucas Armand
2025-12-12 12:12:57 -08:00
parent ccd29ed8b6
commit 375633cb18
+4 -4
View File
@@ -108,7 +108,7 @@ async def call_completions(client: Serverless, *, model: str, prompt: str, **kwa
"temperature": kwargs.get("temperature", DEFAULT_TEMPERATURE), "temperature": kwargs.get("temperature", DEFAULT_TEMPERATURE),
} }
log.debug("POST /v1/completions %s", json.dumps(payload)[:500]) log.debug("POST /v1/completions %s", json.dumps(payload)[:500])
resp = await endpoint.request("/v1/completions", payload, cost=payload["input"]["max_tokens"]) resp = await endpoint.request("/v1/completions", payload, cost=payload["max_tokens"])
return resp["response"] return resp["response"]
async def call_chat_completions(client: Serverless, *, model: str, messages: List[Dict[str, Any]], **kwargs) -> Dict[str, Any]: async def call_chat_completions(client: Serverless, *, model: str, messages: List[Dict[str, Any]], **kwargs) -> Dict[str, Any]:
@@ -124,7 +124,7 @@ async def call_chat_completions(client: Serverless, *, model: str, messages: Lis
**({"tool_choice": kwargs["tool_choice"]} if "tool_choice" in kwargs else {}), **({"tool_choice": kwargs["tool_choice"]} if "tool_choice" in kwargs else {}),
} }
log.debug("POST /v1/chat/completions %s", json.dumps(payload)[:500]) log.debug("POST /v1/chat/completions %s", json.dumps(payload)[:500])
resp = await endpoint.request("/v1/chat/completions", payload, cost=payload["input"]["max_tokens"]) resp = await endpoint.request("/v1/chat/completions", payload, cost=payload["max_tokens"])
return resp["response"] return resp["response"]
# ---- Streaming variants ---- # ---- Streaming variants ----
@@ -141,7 +141,7 @@ async def stream_completions(client: Serverless, *, model: str, prompt: str, **k
**({"stop": kwargs["stop"]} if "stop" in kwargs else {}), **({"stop": kwargs["stop"]} if "stop" in kwargs else {}),
} }
log.debug("STREAM /v1/completions %s", json.dumps(payload)[:500]) log.debug("STREAM /v1/completions %s", json.dumps(payload)[:500])
resp = await endpoint.request("/v1/completions", payload, cost=payload["input"]["max_tokens"], stream=True) resp = await endpoint.request("/v1/completions", payload, cost=payload["max_tokens"], stream=True)
return resp["response"] # async generator return resp["response"] # async generator
async def stream_chat_completions(client: Serverless, *, model: str, messages: List[Dict[str, Any]], **kwargs): async def stream_chat_completions(client: Serverless, *, model: str, messages: List[Dict[str, Any]], **kwargs):
@@ -158,7 +158,7 @@ async def stream_chat_completions(client: Serverless, *, model: str, messages: L
**({"tool_choice": kwargs["tool_choice"]} if "tool_choice" in kwargs else {}), **({"tool_choice": kwargs["tool_choice"]} if "tool_choice" in kwargs else {}),
} }
log.debug("STREAM /v1/chat/completions %s", json.dumps(payload)[:500]) log.debug("STREAM /v1/chat/completions %s", json.dumps(payload)[:500])
resp = await endpoint.request("/v1/chat/completions", payload, cost=payload["input"]["max_tokens"], stream=True) resp = await endpoint.request("/v1/chat/completions", payload, cost=payload["max_tokens"], stream=True)
return resp["response"] # async generator return resp["response"] # async generator