mirrored 2 minutes ago
0
alexandruilie7Add ui agent (#343) * add uipath agent * readme updatef59cf00
import httpx
import mm_agents.uipath.utils as utils
import os

class GrounderClient(object):
    def __init__(self):
        # Proxy for hosting UI-TARS + UiElementPredictor
        # Could be replaced with a VLLM server and grounder (UI-TARS) specific processing
        # Or any other grounder 
        self.url = ""

    async def predict(
        self, image_base64: str, action_description: str, action: str | None = None
    ) -> utils.GroundingOutput:
        request = utils.GroundingRequest(
            description=action_description,
            image_base64=image_base64,
            action_type=action,
        )
        api_key = os.getenv("SERVICE_KEY")

        async with httpx.AsyncClient() as client:
            response = await client.post(
                self.url,
                json={
                    "image_base64": request.image_base64,
                    "action_description": request.description,
                    "action": request.action_type,
                },
                headers={
                    "X-API-KEY": api_key
                },
                timeout=100.0,
            )

        if response.status_code != 200:
            raise ValueError(f"Prediction failed: {response.text}")

        data = response.json()
        return utils.GroundingOutput(
            description=data["description"],
            position=tuple(data["position"]),
        )