Skip to main content

Overview

Custom tools are Python functions that extend agent capabilities beyond built-in atomic actions (click, type, swipe). Use cases:
  • External API calls (webhooks, REST services)
  • Data processing and calculations
  • Database operations
  • Domain-specific logic

Quick Start

Basic Example

Simple custom tool without device access:
import asyncio
from droidrun import DroidAgent, DroidrunConfig

def calculate_tax(amount: float, rate: float, **kwargs) -> str:
    """Calculate tax for a given amount."""
    tax = amount * rate
    total = amount + tax
    return f"Tax: ${tax:.2f}, Total: ${total:.2f}"

custom_tools = {
    "calculate_tax": {
        "parameters": {
            "amount": {"type": "number", "required": True},
            "rate": {"type": "number", "required": True},
        },
        "description": "Calculate tax for a given amount and rate",
        "function": calculate_tax
    }
}

async def main():
    config = DroidrunConfig()

    agent = DroidAgent(
        goal="Calculate tax for $100 at 8% rate",
        config=config,
        custom_tools=custom_tools
    )

    result = await agent.run()
    print(result.success, result.reason)

asyncio.run(main())

Tool Structure

All custom tools follow this format:
custom_tools = {
    "tool_name": {
        "parameters": {                          # Parameter definitions
            "arg1": {"type": "string", "required": True},
            "arg2": {"type": "string", "required": True},
        },
        "description": "Tool description...",    # For LLM prompt
        "function": callable_function            # Python function
    }
}
Function signature:
async def tool_name(arg1: type, arg2: type, *, ctx: ActionContext, **kwargs) -> str:
    """
    Args:
        arg1: Your parameter
        arg2: Another parameter
        ctx: ActionContext (injected automatically by the registry)
    """
    # Implementation
    return "result"
Key points:
  • List only user parameters in "parameters" (not ctx)
  • ctx (an ActionContext instance) is injected automatically as a keyword argument
  • Access device via ctx.driver, shared state via ctx.shared_state, credentials via ctx.credential_manager
  • Use **kwargs for forward compatibility
  • Return type should be str

Using ActionContext

Access the device and state via the ctx parameter (an ActionContext instance injected automatically):
async def screenshot_and_count(*, ctx, **kwargs) -> str:
    """Take screenshot and count UI elements."""
    # Take screenshot via the driver
    screenshot = await ctx.driver.screenshot()

    # Get UI state via the state provider
    ui_state = await ctx.state_provider.get_state()
    element_count = len(ui_state.elements) if ui_state else 0

    return f"Screenshot taken. Found {element_count} UI elements"

custom_tools = {
    "screenshot_and_count": {
        "parameters": {},
        "description": "Take screenshot and count UI elements on screen",
        "function": screenshot_and_count
    }
}
Available via ctx:
  • ctx.driver - DeviceDriver for raw device I/O (screenshot, tap, swipe, etc.)
  • ctx.state_provider - StateProvider to fetch/parse UI state
  • ctx.ui - Current UIState (refreshed each step)
  • ctx.shared_state - DroidAgentState for agent coordination
  • ctx.credential_manager - CredentialManager for secrets

Accessing Shared State

Access agent state via ctx.shared_state:
async def check_action_history(action_name: str, *, ctx, **kwargs) -> str:
    """Check if action was recently performed."""
    shared_state = ctx.shared_state

    # Check recent actions
    recent_actions = shared_state.action_history[-5:]
    already_done = any(a.get("action") == action_name for a in recent_actions)

    if already_done:
        return f"Action '{action_name}' was already performed recently"

    # Check step count
    if shared_state.step_number > 10:
        return "Warning: Task taking too many steps"

    # Access memory
    if "skip_validation" in shared_state.memory:
        return "Validation skipped per memory"

    return f"Action '{action_name}' not yet performed"

custom_tools = {
    "check_action_history": {
        "parameters": {
            "action_name": {"type": "string", "required": True},
        },
        "description": "Check if a specific action was recently performed in agent history",
        "function": check_action_history
    }
}
DroidAgentState fields:
  • step_number - Current execution step
  • action_history - List of executed actions
  • action_outcomes - Success/failure per action
  • memory - Agent memory dict
  • custom_variables - User-provided variables
  • visited_packages - Apps visited
  • current_package_name - Current app package
  • plan - Current Manager plan
  • More in droidrun/agent/droid/state.py

Common Patterns

API Integration

import requests

def fetch_weather(city: str, **kwargs) -> str:
    """Fetch weather data from API."""
    try:
        # Using OpenWeatherMap API example
        api_key = "your_api_key"
        url = f"https://api.openweathermap.org/data/2.5/weather?q={city}&appid={api_key}"

        response = requests.get(url, timeout=10)
        response.raise_for_status()

        data = response.json()
        temp = data["main"]["temp"] - 273.15  # Convert to Celsius
        weather = data["weather"][0]["description"]

        return f"Weather in {city}: {weather}, {temp:.1f}°C"
    except Exception as e:
        return f"Error: {str(e)}"

custom_tools = {
    "fetch_weather": {
        "parameters": {
            "city": {"type": "string", "required": True},
        },
        "description": "Fetch current weather data for a given city",
        "function": fetch_weather
    }
}

Database Query

import sqlite3

def query_database(query: str, **kwargs) -> str:
    """Query local database."""
    try:
        conn = sqlite3.connect("app.db")
        cursor = conn.execute(query)
        results = cursor.fetchall()
        conn.close()

        return f"Found {len(results)} results"
    except Exception as e:
        return f"Database error: {str(e)}"

custom_tools = {
    "query_database": {
        "parameters": {
            "query": {"type": "string", "required": True},
        },
        "description": "Execute SQL query on local database and return results",
        "function": query_database
    }
}

Async Operations

import aiohttp

async def fetch_async(url: str, **kwargs) -> str:
    """Fetch data asynchronously."""
    try:
        async with aiohttp.ClientSession() as session:
            async with session.get(url, timeout=10) as response:
                data = await response.text()
                return f"Fetched {len(data)} bytes from {url}"
    except Exception as e:
        return f"Error: {str(e)}"

custom_tools = {
    "fetch_async": {
        "parameters": {
            "url": {"type": "string", "required": True},
        },
        "description": "Asynchronously fetch data from a URL",
        "function": fetch_async
    }
}

Best Practices

1. Clear Descriptions

Write descriptive, specific descriptions:
# Good
"description": "Send POST request to webhook URL with JSON data payload"

# Bad
"description": "Send webhook"

2. Error Handling

Always catch exceptions:
def robust_tool(url: str, **kwargs) -> str:
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        return f"Success: {response.status_code}"
    except requests.Timeout:
        return "Error: Request timed out"
    except requests.RequestException as e:
        return f"Error: {str(e)}"
    except Exception as e:
        return f"Unexpected error: {str(e)}"

3. Argument Validation

Validate inputs before processing:
def validated_tool(count: int, **kwargs) -> str:
    if not isinstance(count, int):
        return "Error: count must be integer"
    if count < 0 or count > 100:
        return "Error: count must be 0-100"

    return f"Processed {count} items"

4. Logging

Use Python logging for debugging:
import logging
logger = logging.getLogger("droidrun")

def logged_tool(data: str, **kwargs) -> str:
    logger.info(f"Processing: {data[:50]}...")
    # Process data
    logger.info("Complete")
    return "Success"

Advanced Example

Combining ActionContext, shared state, and credentials:
import requests

async def send_authenticated_request(
    url: str,
    data: str,
    *,
    ctx,
    **kwargs
) -> str:
    """Send authenticated API request with credentials."""
    try:
        # Access credentials via ActionContext
        if not ctx.credential_manager:
            return "Error: Credential manager not available"

        api_key = await ctx.credential_manager.resolve_key("API_KEY")

        # Check if we've made too many requests
        if ctx.shared_state.step_number > 15:
            return "Error: Too many API calls"

        # Send authenticated request
        headers = {"Authorization": f"Bearer {api_key}"}
        response = requests.post(url, json={"data": data}, headers=headers, timeout=10)
        response.raise_for_status()

        return f"Request successful: {response.status_code}"
    except Exception as e:
        return f"Error: {str(e)}"

custom_tools = {
    "send_authenticated_request": {
        "parameters": {
            "url": {"type": "string", "required": True},
            "data": {"type": "string", "required": True},
        },
        "description": "Send authenticated API request using stored credentials",
        "function": send_authenticated_request
    }
}

# Usage with credentials
credentials = {"API_KEY": "sk-1234567890"}

agent = DroidAgent(
    goal="Send data to API",
    config=config,
    custom_tools=custom_tools,
    credentials=credentials
)

See Agent Architecture for understanding shared state and custom tools integration.