import httpx import mm_agents.uipath.utils as utils import os class GrounderClient(object): def __init__(self): # Proxy for hosting UI-TARS + UiElementPredictor # Could be replaced with a VLLM server and grounder (UI-TARS) specific processing # Or any other grounder self.url = "" async def predict( self, image_base64: str, action_description: str, action: str | None = None ) -> utils.GroundingOutput: request = utils.GroundingRequest( description=action_description, image_base64=image_base64, action_type=action, ) api_key = os.getenv("SERVICE_KEY") async with httpx.AsyncClient() as client: response = await client.post( self.url, json={ "image_base64": request.image_base64, "action_description": request.description, "action": request.action_type, }, headers={ "X-API-KEY": api_key }, timeout=100.0, ) if response.status_code != 200: raise ValueError(f"Prediction failed: {response.text}") data = response.json() return utils.GroundingOutput( description=data["description"], position=tuple(data["position"]), )