Skip to main content

Quick Start

import asyncio
from pydantic import BaseModel, Field
from droidrun import DroidAgent
from droidrun.config_manager import DroidrunConfig

# 1. Define output structure
class ContactInfo(BaseModel):
    """Contact information from device."""
    name: str = Field(description="Full name of the contact")
    phone: str = Field(description="Phone number")
    email: str = Field(description="Email address", default="Not provided")

# 2. Create agent with output_model
async def main():
    config = DroidrunConfig()

    agent = DroidAgent(
        goal="Find John Smith's contact information",
        config=config,
        output_model=ContactInfo,
    )

    # 3. Run and access structured output
    result = await agent.run()

    if result.success and result.structured_output:
        contact: ContactInfo = result.structured_output
        print(f"Name: {contact.name}")
        print(f"Phone: {contact.phone}")
        print(f"Email: {contact.email}")

asyncio.run(main())

How It Works

Two-Stage Process

Stage 1: Task Execution
  • DroidAgent performs device actions while collecting required information
  • System prompt is automatically injected with your Pydantic schema
  • Agent completes with natural language answer containing the data
Stage 2: Extraction (Post-Completion)
  • StructuredOutputAgent receives the final answer text
  • Uses LLM’s astructured_predict() to extract data into your model
  • Validates against schema and returns typed object or None

Example: Invoice Extraction

from pydantic import BaseModel, Field
from typing import List

class Invoice(BaseModel):
    """Invoice information."""
    invoice_number: str = Field(description="Invoice ID")
    vendor_name: str = Field(description="Vendor name")
    total_due: float = Field(description="Total amount in dollars")

agent = DroidAgent(
    goal="Open Gmail and extract invoice from Acme Corp email",
    config=DroidrunConfig(),
    output_model=Invoice,
)

result = await agent.run()
invoice = result.structured_output
print(f"Invoice {invoice.invoice_number}: ${invoice.total_due}")

Working with Results

Accessing Data

result = await agent.run()

if result.success:
    if result.structured_output:
        data = result.structured_output  # Typed Pydantic object
        print(f"Extracted: {data}")
    else:
        print(f"Extraction failed, text answer: {result.reason}")
else:
    print(f"Task failed: {result.reason}")

Exporting to JSON

result = await agent.run()

if result.structured_output:
    # Convert to JSON and save
    json_str = result.structured_output.model_dump_json(indent=2)
    with open("output.json", "w") as f:
        f.write(json_str)

Configuration

Custom Extraction LLM

By default, extraction uses the codeact LLM. Specify a dedicated structured_output profile: config.yaml:
llm_profiles:
  codeact:
    provider: GoogleGenAI
    model: models/gemini-2.0-flash
    temperature: 0.3

  structured_output:
    provider: OpenAI
    model: gpt-4o-mini
    temperature: 0.0  # Low temp for consistent extraction
Programmatically:
from droidrun import load_llm

config = DroidrunConfig()

llms = {
    "codeact": load_llm("GoogleGenAI", "models/gemini-2.0-flash"),
    "structured_output": load_llm("OpenAI", "gpt-4o-mini"),
}

agent = DroidAgent(
    goal="Extract contact info for Alice",
    llms=llms,
    config=config,
    output_model=ContactInfo,
)

Reasoning Mode

Works in both direct and reasoning modes:
# Direct mode
config = DroidrunConfig()
config.agent.reasoning = False

agent = DroidAgent(
    goal="Find weather for SF",
    config=config,
    output_model=WeatherInfo,
)

# Reasoning mode
config.agent.reasoning = True

agent = DroidAgent(
    goal="Find weather for SF",
    config=config,
    output_model=WeatherInfo,
)

Best Practices

1. Add clear field descriptions - The LLM uses these to understand what to extract:
name: str = Field(description="Full name of customer who placed order")
2. Provide defaults for optional fields - Prevents extraction failures:
rating: Optional[float] = Field(description="Customer rating (1-5)", default=None)
3. Guide data collection in your goal:
agent = DroidAgent(
    goal="Find contact and get their phone number, email, and full name",
    config=config,
    output_model=ContactInfo,
)

Troubleshooting

Extraction returns None:
  • Verify output_model is passed to DroidAgent
  • Check if task succeeded: result.success
  • Enable debug logging: config.logging.debug = True
Partial or incorrect data:
  • Add more specific field descriptions
  • Mention required fields explicitly in the goal
Validation errors:
  • Add Optional and defaults for uncertain fields

Advanced

Multiple Items

Extract lists of data using a model with List fields:
class ContactList(BaseModel):
    """Multiple contacts."""
    contacts: List[ContactInfo] = Field(description="List of contacts")

agent = DroidAgent(
    goal="Find contacts for John Smith and Jane Doe",
    config=config,
    output_model=ContactList,
)

Workflow Integration

Extraction happens automatically in DroidAgent.finalize():
@step
async def finalize(self, ctx: Context, ev: FinalizeEvent) -> ResultEvent:
    result = ResultEvent(
        success=ev.success,
        reason=ev.reason,
        steps=self.shared_state.step_number,
        structured_output=None,
    )

    # Extract if model was provided
    if self.output_model is not None and ev.reason:
        structured_agent = StructuredOutputAgent(
            llm=self.structured_output_llm,
            pydantic_model=self.output_model,
            answer_text=ev.reason,
        )
        extraction_result = await (await structured_agent.run())
        if extraction_result["success"]:
            result.structured_output = extraction_result["structured_output"]

    return result

I