import asyncio
from calibrate.llm import simulations
# Define your tools
tools = [
{
"type": "client",
"name": "plan_next_question",
"description": "Plan the next question to ask",
"parameters": [
{
"id": "next_unanswered_question_index",
"type": "integer",
"description": "Index of next question",
"required": True
},
{
"id": "questions_answered",
"type": "array",
"description": "List of answered question indices",
"items": {"type": "integer"},
"required": True
}
]
}
]
# Define personas
personas = [
{
"characteristics": "A shy mother named Geeta, 39 years old, gives short answers",
"gender": "female",
"language": "english"
}
]
# Define scenarios
scenarios = [
{"description": "User completes the form without any issues"},
{"description": "User hesitates and wants to skip some questions"}
]
# Define evaluation criteria
evaluation_criteria = [
{
"name": "question_completeness",
"description": "Whether all the questions in the form were covered"
},
{
"name": "assistant_behavior",
"description": "Whether the assistant asks one concise question per turn"
}
]
# Run simulations
result = asyncio.run(simulations.run(
system_prompt="You are a helpful nurse filling out a form...",
tools=tools,
personas=personas,
scenarios=scenarios,
evaluation_criteria=evaluation_criteria,
output_dir="./out",
model="openai/gpt-4.1",
provider="openrouter",
parallel=1,
agent_speaks_first=True,
max_turns=50,
))