diff --git a/fern/assets/styles.css b/fern/assets/styles.css
index ebec09075..8f1df1b39 100644
--- a/fern/assets/styles.css
+++ b/fern/assets/styles.css
@@ -38,6 +38,14 @@
border: 1px solid #C7D2FE;
}
+/* Alpha badge - purple to match dashboard */
+.vapi-badge-alpha {
+ background-color: rgba(168, 85, 247, 0.2) !important;
+ color: #A78BFA !important;
+ border: 1px solid rgba(168, 85, 247, 0.4) !important;
+ border-color: rgba(168, 85, 247, 0.4) !important;
+}
+
/* Dark mode adjustments */
:is(.dark) .vapi-badge-assistant {
background-color: #134E4A;
@@ -57,6 +65,42 @@
border: 1px solid #6366F1;
}
+:is(.dark) .vapi-badge-alpha {
+ background-color: rgba(168, 85, 247, 0.2) !important;
+ color: #C4B5FD !important;
+ border: 1px solid rgba(168, 85, 247, 0.4) !important;
+ border-color: rgba(168, 85, 247, 0.4) !important;
+}
+
+/* Override Fern's pre-release availability badge to show "Alpha" with purple styling */
+.fern-docs-badge[title="Pre-release"] {
+ background-color: rgba(168, 85, 247, 0.2) !important;
+ border-color: rgba(168, 85, 247, 0.4) !important;
+ font-size: 0 !important;
+ position: relative !important;
+ min-width: 52px !important;
+ min-height: 24px !important;
+ display: inline-flex !important;
+}
+
+.fern-docs-badge[title="Pre-release"]::after {
+ content: "Alpha";
+ position: absolute;
+ top: 50%;
+ left: 50%;
+ transform: translate(-50%, -50%);
+ color: #A78BFA;
+ font-size: 0.75rem;
+ line-height: 1;
+ white-space: nowrap;
+}
+
+:is(.dark) .fern-docs-badge[title="Pre-release"] {
+ background-color: rgba(168, 85, 247, 0.2) !important;
+ color: #C4B5FD !important;
+ border-color: rgba(168, 85, 247, 0.4) !important;
+}
+
/* for a grid of videos */
.video-grid {
@@ -233,4 +277,9 @@ html.dark button[data-highlighted] .fern-api-property-meta {
.light .fern-theme-default.fern-container {
background-color: #fff !important;
+}
+
+/* Fix: Make subtitle white on Simulations pages in dark mode */
+:is(.dark) [id*="simulations"] .prose-p\:text-\(color\:--grayscale-a11\) :where(p):not(:where([class~=not-prose],[class~=not-prose] *)) {
+ color: var(--grayscale-12) !important;
}
\ No newline at end of file
diff --git a/fern/docs.yml b/fern/docs.yml
index 458e41963..000aa06cc 100644
--- a/fern/docs.yml
+++ b/fern/docs.yml
@@ -290,6 +290,16 @@ navigation:
- page: Advanced
path: observability/evals-advanced.mdx
icon: fa-light fa-clipboard-check
+ - section: Simulations
+ icon: fa-light fa-flask-vial
+ availability: pre-release
+ contents:
+ - page: Quickstart
+ path: observability/simulations-quickstart.mdx
+ icon: fa-light fa-rocket
+ - page: Advanced
+ path: observability/simulations-advanced.mdx
+ icon: fa-light fa-flask-vial
- page: Boards
path: observability/boards-quickstart.mdx
icon: fa-light fa-chart-line
diff --git a/fern/observability/simulations-advanced.mdx b/fern/observability/simulations-advanced.mdx
new file mode 100644
index 000000000..cdafbf808
--- /dev/null
+++ b/fern/observability/simulations-advanced.mdx
@@ -0,0 +1,822 @@
+---
+title: Simulations advanced
+subtitle: Master testing strategies and best practices for AI voice agent simulations
+slug: observability/simulations-advanced
+availability: pre-release
+---
+
+## Overview
+
+This guide covers advanced simulation strategies, testing patterns, and best practices for building robust test suites that ensure your AI voice agents work reliably in production.
+
+**You'll learn:**
+
+- Advanced scenario configuration (tool mocks, hooks)
+- Strategic testing approaches (smoke, regression, edge cases)
+- Performance optimization techniques
+- CI/CD integration strategies
+- Maintenance and troubleshooting methods
+
+## Advanced scenario configuration
+
+### Tool mocks
+
+Mock tool call responses at the scenario level to test specific paths without calling real APIs. This is useful for:
+
+- Testing error handling paths
+- Simulating unavailable services
+- Deterministic test results
+- Faster test execution (no real API calls)
+
+
+
+
+
+ 1. Go to **Simulations** → **Scenarios**
+ 2. Open the scenario you want to configure
+
+
+
+ 1. Scroll to **Tool Mocks** section
+ 2. Click **Add Tool Mock**
+ 3. **Tool Name**: Enter the exact function name (e.g., `bookAppointment`)
+ 4. **Result**: Enter the JSON response to return:
+ ```json
+ {"status": "success", "confirmationId": "MOCK-12345"}
+ ```
+ 5. **Enabled**: Toggle on/off to control when mock is active
+ 6. Click **Save**
+
+
+
+
+
+```bash
+curl -X POST "https://api.vapi.ai/eval/simulation/scenario" \
+ -H "Authorization: Bearer $VAPI_API_KEY" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "name": "Book Appointment - API Error Path",
+ "instructions": "Try to book an appointment and handle the error gracefully when the system is unavailable.",
+ "evaluations": [
+ {
+ "structuredOutput": {
+ "name": "handled_error_gracefully",
+ "schema": {
+ "type": "boolean",
+ "description": "Whether the assistant apologized and offered alternatives"
+ }
+ },
+ "comparator": "=",
+ "value": true
+ }
+ ],
+ "toolMocks": [
+ {
+ "toolName": "bookAppointment",
+ "result": "{\"error\": \"Service temporarily unavailable\", \"code\": \"503\"}",
+ "enabled": true
+ }
+ ]
+ }'
+```
+
+
+
+**Common tool mock patterns:**
+
+
+
+ ```json
+ {
+ "toolName": "bookAppointment",
+ "result": "{\"status\": \"success\", \"confirmationId\": \"APT-12345\", \"datetime\": \"2024-01-20T14:00:00Z\"}",
+ "enabled": true
+ }
+ ```
+
+
+
+ ```json
+ {
+ "toolName": "bookAppointment",
+ "result": "{\"error\": \"Time slot no longer available\", \"availableSlots\": [\"14:30\", \"15:00\", \"15:30\"]}",
+ "enabled": true
+ }
+ ```
+
+
+
+ ```json
+ {
+ "toolName": "checkInventory",
+ "result": "{\"error\": \"Request timeout\", \"code\": \"ETIMEDOUT\"}",
+ "enabled": true
+ }
+ ```
+
+
+
+ ```json
+ {
+ "toolName": "processOrder",
+ "result": "{\"status\": \"partial\", \"itemsProcessed\": 2, \"itemsFailed\": 1, \"failedReason\": \"Item out of stock\"}",
+ "enabled": true
+ }
+ ```
+
+
+
+
+ **Tool mock tips:**
+ - Mock tool names must exactly match the function name configured in your assistant's tools
+ - Use realistic error responses that match your actual API error formats
+ - Create separate scenarios for success paths and error paths
+ - Disable mocks (`enabled: false`) to test against real APIs
+
+
+### Simulation hooks
+
+Trigger actions on simulation lifecycle events. Hooks are useful for:
+
+- Notifying external systems when tests start/end
+- Logging test execution to your own systems
+- Triggering follow-up workflows
+- Custom analytics and reporting
+
+
+ **Hooks are only supported in voice mode.** Hooks require `vapi.websocket` transport and will not trigger with `vapi.webchat` (chat mode).
+
+
+
+
+
+
+ 1. Go to **Simulations** → **Scenarios**
+ 2. Open your scenario
+ 3. Scroll to **Hooks** section
+ 4. Click **Add Hook**
+
+
+
+ 1. **Event**: Select when to trigger:
+ - `simulation.run.started` - When simulation run begins
+ - `simulation.run.ended` - When simulation run ends
+ 2. **Action Type**: Select `webhook`
+ 3. **Server URL**: Enter your webhook endpoint
+ 4. Click **Save**
+
+
+
+
+
+```bash
+curl -X POST "https://api.vapi.ai/eval/simulation/scenario" \
+ -H "Authorization: Bearer $VAPI_API_KEY" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "name": "Test with Lifecycle Hooks",
+ "instructions": "Complete the booking flow as a standard customer.",
+ "evaluations": [
+ {
+ "structuredOutput": {
+ "name": "booking_completed",
+ "schema": { "type": "boolean" }
+ },
+ "comparator": "=",
+ "value": true
+ }
+ ],
+ "hooks": [
+ {
+ "on": "simulation.run.started",
+ "do": [
+ {
+ "type": "webhook",
+ "server": {
+ "url": "https://your-server.com/webhooks/simulation-started"
+ }
+ }
+ ]
+ },
+ {
+ "on": "simulation.run.ended",
+ "do": [
+ {
+ "type": "webhook",
+ "server": {
+ "url": "https://your-server.com/webhooks/simulation-ended"
+ },
+ "include": {
+ "transcript": true,
+ "messages": true,
+ "recordingUrl": true
+ }
+ }
+ ]
+ }
+ ]
+ }'
+```
+
+
+
+**Webhook payload examples:**
+
+```json
+// simulation.run.started webhook payload
+{
+ "event": "simulation.run.started",
+ "simulationId": "550e8400-e29b-41d4-a716-446655440003",
+ "runId": "550e8400-e29b-41d4-a716-446655440007",
+ "timestamp": "2024-01-15T09:50:05Z"
+}
+
+// simulation.run.ended webhook payload
+{
+ "event": "simulation.run.ended",
+ "simulationId": "550e8400-e29b-41d4-a716-446655440003",
+ "runId": "550e8400-e29b-41d4-a716-446655440007",
+ "timestamp": "2024-01-15T09:52:30Z",
+ "duration": 145,
+ "status": "passed",
+ "transcript": "...", // if include.transcript = true
+ "messages": [...], // if include.messages = true
+ "recordingUrl": "https://..." // if include.recordingUrl = true
+}
+```
+
+### Using existing structured outputs
+
+Instead of defining inline structured outputs in each scenario, you can reference structured outputs you've already created. This provides:
+
+- Reusability across multiple scenarios
+- Centralized management of evaluation criteria
+- Consistency in how data is extracted
+
+
+
+ 1. Go to **Structured Outputs** in the sidebar
+ 2. Create a new structured output or find an existing one
+ 3. Copy the **ID**
+ 4. In your scenario, select **Use Existing** when adding an evaluation
+ 5. Paste the structured output ID
+
+
+
+```bash
+# First, create a reusable structured output
+curl -X POST "https://api.vapi.ai/structured-output" \
+ -H "Authorization: Bearer $VAPI_API_KEY" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "name": "appointment_booked",
+ "schema": {
+ "type": "boolean",
+ "description": "Whether an appointment was successfully booked during the call"
+ }
+ }'
+
+# Response includes the ID
+# { "id": "so-abc123", ... }
+
+# Then reference it in your scenario
+curl -X POST "https://api.vapi.ai/eval/simulation/scenario" \
+ -H "Authorization: Bearer $VAPI_API_KEY" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "name": "Book Appointment",
+ "instructions": "Call to book an appointment for next Monday.",
+ "evaluations": [
+ {
+ "structuredOutputId": "so-abc123",
+ "comparator": "=",
+ "value": true
+ }
+ ]
+ }'
+```
+
+
+
+
+ **When to use existing vs inline:**
+ - **Existing (by ID)**: When the same evaluation criteria is used across multiple scenarios
+ - **Inline**: For scenario-specific evaluations that won't be reused
+
+
+## Testing strategies
+
+### Smoke tests
+
+Quick validation that core functionality works. Run these first to catch obvious issues.
+
+**Purpose:** Verify your assistant responds and basic conversation flow works before running comprehensive tests.
+
+```json
+{
+ "name": "Smoke Test - Basic Response",
+ "instructions": "Say hello and ask if the assistant can hear you.",
+ "evaluations": [
+ {
+ "structuredOutput": {
+ "name": "assistant_responded",
+ "schema": {
+ "type": "boolean",
+ "description": "Whether the assistant provided any response"
+ }
+ },
+ "comparator": "=",
+ "value": true
+ }
+ ]
+}
+```
+
+**Characteristics:**
+- Minimal evaluation criteria (just check for any response)
+- Fast execution (simple instructions)
+- Run before detailed tests
+- Use chat mode for speed
+
+**When to use:**
+- Before running expensive voice test suites
+- After deploying configuration changes
+- As health checks in monitoring
+- Quick validation during development
+
+### Regression tests
+
+Ensure fixes and updates don't break existing functionality.
+
+**Purpose:** Validate that known issues stay fixed and features keep working.
+
+
+
+ 1. Name scenarios with "Regression: " prefix
+ 2. Include issue ticket number in the name
+ 3. Add the exact scenario that previously failed
+ 4. Document what was fixed
+
+ Example:
+ - Name: "Regression: Appointment Parsing Bug #1234"
+ - Instructions: Scenario that triggered the bug
+
+
+
+```bash
+curl -X POST "https://api.vapi.ai/eval/simulation/scenario" \
+ -H "Authorization: Bearer $VAPI_API_KEY" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "name": "Regression: Date Parsing Bug #1234",
+ "instructions": "Request an appointment for 3/15. The assistant should correctly parse this as March 15th, not fail or misinterpret the date.",
+ "evaluations": [
+ {
+ "structuredOutput": {
+ "name": "date_parsed_correctly",
+ "schema": {
+ "type": "boolean",
+ "description": "Whether the date 3/15 was correctly understood as March 15th"
+ }
+ },
+ "comparator": "=",
+ "value": true
+ }
+ ]
+ }'
+```
+
+
+
+**Best practices:**
+- Name tests after bugs they prevent
+- Include ticket/issue numbers
+- Add regression tests when fixing bugs
+- Run full regression suite before major releases
+
+### Edge case testing
+
+Test boundary conditions and unusual inputs your assistant might encounter.
+
+**Common edge cases to test:**
+
+
+
+ ```json
+ {
+ "name": "Edge Case - Ambiguous Request",
+ "instructions": "Make a vague, unclear request like 'I need something done' without specifying what you want.",
+ "evaluations": [
+ {
+ "structuredOutput": {
+ "name": "asked_for_clarification",
+ "schema": {
+ "type": "boolean",
+ "description": "Whether the assistant asked for more details"
+ }
+ },
+ "comparator": "=",
+ "value": true
+ }
+ ]
+ }
+ ```
+
+
+
+ ```json
+ {
+ "name": "Edge Case - Topic Switch",
+ "instructions": "Start asking about booking an appointment, then suddenly switch to asking about cancellation policies mid-conversation.",
+ "evaluations": [
+ {
+ "structuredOutput": {
+ "name": "handled_topic_switch",
+ "schema": {
+ "type": "boolean",
+ "description": "Whether the assistant smoothly transitioned to the new topic"
+ }
+ },
+ "comparator": "=",
+ "value": true
+ }
+ ]
+ }
+ ```
+
+
+
+ ```json
+ {
+ "name": "Edge Case - Interruption Handling",
+ "instructions": "Interrupt the assistant mid-sentence with a new question. See if it handles the interruption gracefully.",
+ "evaluations": [
+ {
+ "structuredOutput": {
+ "name": "handled_interruption",
+ "schema": {
+ "type": "boolean",
+ "description": "Whether the assistant stopped and addressed the interruption"
+ }
+ },
+ "comparator": "=",
+ "value": true
+ }
+ ]
+ }
+ ```
+
+ This edge case requires voice mode (`vapi.websocket`) to test actual audio interruptions.
+
+
+
+ ```json
+ {
+ "name": "Edge Case - Invalid Date",
+ "instructions": "Try to book an appointment for 'the 45th of Octember' - an obviously invalid date.",
+ "evaluations": [
+ {
+ "structuredOutput": {
+ "name": "handled_invalid_date",
+ "schema": {
+ "type": "boolean",
+ "description": "Whether the assistant politely asked for a valid date"
+ }
+ },
+ "comparator": "=",
+ "value": true
+ }
+ ]
+ }
+ ```
+
+
+
+**Edge case categories to cover:**
+- **Input boundaries:** Empty, maximum length, special characters
+- **Data formats:** Invalid dates, malformed phone numbers, unusual names
+- **Conversation patterns:** Interruptions, topic changes, contradictions
+- **Emotional scenarios:** Frustrated caller, confused caller, impatient caller
+
+## Best practices
+
+### Evaluation design principles
+
+
+
+ Each evaluation should test one specific outcome.
+
+ ✅ **Good:** "Was the appointment booked?"
+
+ ❌ **Bad:** "Was the appointment booked, confirmed, and email sent?"
+
+
+
+ Use descriptive names that explain what's being tested.
+
+ ✅ **Good:** "Booking - Handles Unavailable Slot"
+
+ ❌ **Bad:** "Test 1" or "Scenario ABC"
+
+
+
+ Model test personalities after actual customer types.
+
+ Consider: decisive, confused, impatient, detail-oriented, non-native speakers
+
+
+
+ Use boolean or numeric structured outputs that produce clear pass/fail results.
+
+ Avoid subjective criteria that are hard to evaluate consistently.
+
+
+
+### Choosing voice vs chat mode
+
+| Scenario | Recommended Mode | Reason |
+| --- | --- | --- |
+| Rapid iteration during development | Chat (`vapi.webchat`) | Faster, cheaper |
+| Testing speech recognition accuracy | Voice (`vapi.websocket`) | Tests actual STT |
+| Testing voice/TTS quality | Voice (`vapi.websocket`) | Tests actual TTS |
+| Testing interruption handling | Voice (`vapi.websocket`) | Requires audio |
+| CI/CD pipeline tests | Chat (`vapi.webchat`) | Speed and cost |
+| Pre-production validation | Voice (`vapi.websocket`) | Full end-to-end |
+| Testing hooks/webhooks | Voice (`vapi.websocket`) | Hooks require voice |
+
+## CI/CD integration
+
+Automate simulation runs in your deployment pipeline.
+
+### Basic workflow
+
+```yaml
+# .github/workflows/test-assistant.yml
+name: Test Assistant Changes
+
+on:
+ pull_request:
+ paths:
+ - 'assistants/**'
+ - 'prompts/**'
+
+jobs:
+ run-simulations:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Run smoke tests (chat mode)
+ run: |
+ # Create a simulation run
+ RUN_ID=$(curl -s -X POST "https://api.vapi.ai/eval/simulation/run" \
+ -H "Authorization: Bearer ${{ secrets.VAPI_API_KEY }}" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "simulations": [{"type": "simulationSuite", "simulationSuiteId": "${{ vars.SMOKE_TEST_SUITE_ID }}"}],
+ "target": {"type": "assistant", "assistantId": "${{ vars.STAGING_ASSISTANT_ID }}"},
+ "transport": {"provider": "vapi.webchat"}
+ }' | jq -r '.id')
+
+ echo "Run ID: $RUN_ID"
+
+ # Poll for completion
+ while true; do
+ STATUS=$(curl -s "https://api.vapi.ai/eval/simulation/run/$RUN_ID" \
+ -H "Authorization: Bearer ${{ secrets.VAPI_API_KEY }}" | jq -r '.status')
+
+ if [ "$STATUS" = "ended" ]; then
+ break
+ fi
+
+ sleep 10
+ done
+
+ # Check results
+ RESULT=$(curl -s "https://api.vapi.ai/eval/simulation/run/$RUN_ID" \
+ -H "Authorization: Bearer ${{ secrets.VAPI_API_KEY }}")
+
+ PASSED=$(echo $RESULT | jq '.itemCounts.passed')
+ FAILED=$(echo $RESULT | jq '.itemCounts.failed')
+
+ if [ "$FAILED" -gt 0 ]; then
+ echo "Simulations failed: $FAILED"
+ exit 1
+ fi
+
+ echo "All simulations passed: $PASSED"
+```
+
+### Advanced patterns
+
+
+
+ Run full simulation suite against staging before promoting to production:
+
+ ```bash
+ # Run comprehensive tests against staging
+ ./scripts/run-simulation-suite.sh \
+ --suite-id "$REGRESSION_SUITE_ID" \
+ --target-assistant "$STAGING_ASSISTANT_ID" \
+ --transport "vapi.websocket" \
+ --iterations 3
+
+ # Only deploy to production if all pass
+ if [ $? -eq 0 ]; then
+ ./scripts/deploy-to-production.sh
+ fi
+ ```
+
+
+
+ Run full regression suite nightly:
+
+ ```yaml
+ # .github/workflows/nightly-regression.yml
+ on:
+ schedule:
+ - cron: '0 2 * * *' # 2 AM daily
+
+ jobs:
+ regression-suite:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Run full regression (voice mode)
+ run: ./scripts/run-simulation-suite.sh --full-regression
+
+ - name: Notify on failures
+ if: failure()
+ run: |
+ # Send Slack notification
+ curl -X POST $SLACK_WEBHOOK_URL \
+ -d '{"text": "Nightly simulation regression failed!"}'
+ ```
+
+
+
+ Block deployment if pass rate falls below threshold:
+
+ ```bash
+ RESULT=$(curl -s "https://api.vapi.ai/eval/simulation/run/$RUN_ID" \
+ -H "Authorization: Bearer $VAPI_API_KEY")
+
+ TOTAL=$(echo $RESULT | jq '.itemCounts.total')
+ PASSED=$(echo $RESULT | jq '.itemCounts.passed')
+
+ PASS_RATE=$((PASSED * 100 / TOTAL))
+
+ if [ $PASS_RATE -lt 95 ]; then
+ echo "Pass rate $PASS_RATE% below threshold 95%"
+ exit 1
+ fi
+ ```
+
+
+
+## Maintenance strategies
+
+### Regular review cycle
+
+
+
+ Investigate all failures. Update tests if requirements changed, or fix assistant if behavior regressed.
+
+
+
+ Review simulation suite completeness:
+ - All critical user flows covered?
+ - New features have tests?
+ - Deprecated features removed?
+
+
+
+ - Remove duplicate simulations
+ - Update outdated scenarios
+ - Optimize personalities for cost
+ - Document test rationale
+
+
+
+### When to update simulations
+
+| Trigger | Action |
+| --- | --- |
+| Assistant prompt changes | Review affected simulations |
+| New feature added | Create simulations for new feature |
+| Bug fixed | Add regression test |
+| User feedback reveals edge case | Add edge case simulation |
+| Business requirements change | Update evaluation criteria |
+
+## Troubleshooting
+
+### Common issues
+
+| Issue | Cause | Solution |
+| --- | --- | --- |
+| Simulation always fails | Evaluation criteria too strict | Review structured output schema and expected values |
+| Run stuck in "running" | Assistant not responding | Check assistant configuration, verify credentials |
+| Inconsistent results | Non-deterministic behavior | Increase iterations, use more specific instructions |
+| No audio in recording | Using chat mode | Switch to `vapi.websocket` transport |
+| Hooks not triggering | Using chat mode | Hooks require `vapi.websocket` transport |
+| Tool mocks not working | Wrong tool name | Verify tool name matches exactly |
+
+### Debugging tips
+
+
+
+ ```bash
+ curl "https://api.vapi.ai/eval/simulation/run/$RUN_ID" \
+ -H "Authorization: Bearer $VAPI_API_KEY" | jq '.status, .endedReason'
+ ```
+
+
+
+ ```bash
+ curl "https://api.vapi.ai/eval/simulation/run/$RUN_ID/item" \
+ -H "Authorization: Bearer $VAPI_API_KEY" | jq '.[].status'
+ ```
+
+
+
+ In the Dashboard, click on a failed run item to see the full conversation transcript and evaluation results.
+
+
+
+ If simulations consistently fail, test your assistant manually in the Dashboard to verify it's working correctly.
+
+
+
+### Getting help
+
+**Include these details when reporting issues:**
+- Simulation run ID
+- Scenario and personality IDs
+- Transport mode used (voice/chat)
+- Expected vs actual behavior
+- Assistant configuration
+
+**Resources:**
+- [Simulations Quickstart](/observability/simulations-quickstart)
+- [Discord Community](https://discord.gg/pUFNcf2WmH)
+- [Support](mailto:support@vapi.ai)
+
+## Next steps
+
+
+
+ Return to quickstart guide for basic setup
+
+
+
+ Learn about chat-based testing with mock conversations
+
+
+
+ Learn how to define structured outputs for evaluations
+
+
+
+ Create and configure assistants to test
+
+
+
+## Summary
+
+
+ **Key takeaways for advanced simulation testing:**
+
+ **Configuration:**
+ - Use tool mocks to test error paths without real API calls
+ - Use hooks for external notifications (voice mode only)
+ - Reference existing structured outputs for consistency
+
+ **Testing strategy:**
+ - Start with smoke tests, then regression, then edge cases
+ - Use chat mode for speed, voice mode for final validation
+ - Create personalities based on real customer types
+
+ **CI/CD:**
+ - Automate smoke tests in PR pipelines
+ - Run full regression before production deploys
+ - Set quality gate thresholds
+
+ **Maintenance:**
+ - Review failures weekly
+ - Audit coverage monthly
+ - Add regression tests when fixing bugs
+
diff --git a/fern/observability/simulations-quickstart.mdx b/fern/observability/simulations-quickstart.mdx
new file mode 100644
index 000000000..6416004d1
--- /dev/null
+++ b/fern/observability/simulations-quickstart.mdx
@@ -0,0 +1,757 @@
+---
+title: Simulations quickstart
+subtitle: Test your AI assistants with realistic AI-powered callers
+slug: observability/simulations-quickstart
+availability: pre-release
+---
+
+## Overview
+
+This quickstart guide will help you test your AI assistants and squads using realistic, AI-powered callers. In just a few minutes, you'll create test scenarios, define evaluation criteria, and validate your agents work correctly under different conditions.
+
+### What are Simulations?
+
+Simulations is Vapi's voice agent testing framework that enables you to systematically test assistants and squads using AI-powered callers that follow defined instructions and evaluate outcomes using structured outputs. Instead of relying on manual testing or rigid scripts, Simulations recreate real conversations and measure whether your assistant behaves correctly. Test your agents by:
+
+1. **Creating personalities** - Define a full assistant configuration for the AI tester (voice, model, system prompt)
+2. **Defining scenarios** - Specify instructions for the tester and evaluations using structured outputs
+3. **Creating simulations** - Pair scenarios with personalities
+4. **Running simulations** - Execute tests against your assistant or squad in voice or chat mode
+5. **Reviewing results** - Analyze pass/fail outcomes based on structured output evaluations
+
+### When are Simulations useful?
+
+Simulations help you maintain quality and catch issues early:
+
+- **Pre-deployment testing** - Validate new assistant configurations before going live
+- **Regression testing** - Ensure prompt or tool changes don't break existing behaviors
+- **Conversation flow validation** - Test multi-turn interactions and complex scenarios
+- **Personality-based testing** - Verify your agent handles different caller types appropriately
+- **Squad handoff testing** - Ensure smooth transitions between squad members
+- **Performance monitoring** - Track success rates over time and identify regressions
+
+### Voice vs Chat mode
+
+Simulations support two transport modes:
+
+
+
+ - Full voice simulation with audio
+ - Realistic end-to-end testing
+ - Tests speech recognition and synthesis
+ - Produces call recordings
+
+
+ - Text-based chat simulation
+ - Faster execution
+ - Lower cost (no audio processing)
+ - Ideal for rapid iteration
+
+
+
+
+ Use **chat mode** during development for quick iteration, then switch to **voice mode** for final validation before deployment.
+
+
+### What you'll build
+
+A simulation suite for an appointment booking assistant that tests:
+
+- Different caller personalities (confused user, impatient customer)
+- Evaluation criteria using structured outputs with comparators
+- Real-time monitoring of test runs
+- Both voice and chat mode execution
+
+## Prerequisites
+
+
+
+ Sign up at [dashboard.vapi.ai](https://dashboard.vapi.ai)
+
+
+ Get your API key from **API Keys** in sidebar
+
+
+
+
+ You'll also need an existing assistant or squad to test. You can create one in
+ the Dashboard or use the API.
+
+
+## Step 1: Create a personality
+
+Personalities define how the AI tester behaves during a simulation. A personality is a full assistant configuration that controls the tester's voice, model, and behavior via system prompt.
+
+
+
+
+
+ 1. Log in to [dashboard.vapi.ai](https://dashboard.vapi.ai)
+ 2. Click on **Simulations** in the left sidebar
+ 3. Click the **Personalities** tab
+
+
+
+ 1. Click **Create Personality**
+ 2. **Name**: Enter "Impatient Customer"
+ 3. **Assistant Configuration**: Configure the tester assistant:
+ - **Model**: Select your preferred LLM (e.g., GPT-4o)
+ - **System Prompt**: Define the personality behavior:
+ ```
+ You are an impatient customer who wants quick answers.
+ Speak directly and may interrupt if responses are too long.
+ You expect immediate solutions to your problems.
+ ```
+ - **Voice**: Select a voice for the tester (optional for chat mode)
+ 4. Click **Save**
+
+
+
+
+ Start with the built-in default personalities to get familiar with the system before creating custom ones.
+
+
+
+
+```bash
+curl -X POST "https://api.vapi.ai/eval/simulation/personality" \
+ -H "Authorization: Bearer $VAPI_API_KEY" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "name": "Impatient Customer",
+ "assistant": {
+ "model": {
+ "provider": "openai",
+ "model": "gpt-4o",
+ "messages": [
+ {
+ "role": "system",
+ "content": "You are an impatient customer who wants quick answers. Speak directly and may interrupt if responses are too long. You expect immediate solutions to your problems."
+ }
+ ]
+ },
+ "voice": {
+ "provider": "cartesia",
+ "voiceId": "sonic-english"
+ }
+ }
+ }'
+```
+
+**Response:**
+
+```json
+{
+ "id": "550e8400-e29b-41d4-a716-446655440001",
+ "orgId": "org-456",
+ "name": "Impatient Customer",
+ "assistant": {
+ "model": {
+ "provider": "openai",
+ "model": "gpt-4o",
+ "messages": [...]
+ },
+ "voice": {
+ "provider": "cartesia",
+ "voiceId": "sonic-english"
+ }
+ },
+ "createdAt": "2024-01-15T09:30:00Z",
+ "updatedAt": "2024-01-15T09:30:00Z"
+}
+```
+
+Save the returned `id` - you'll need it when creating simulations.
+
+
+
+
+ **Personality types:** Consider creating personalities for different customer types you encounter: decisive buyers, confused users, detail-oriented customers, or frustrated callers.
+
+
+## Step 2: Create a scenario
+
+Scenarios define what the test is evaluating. A scenario contains:
+- **Instructions**: What the tester should do during the call
+- **Evaluations**: Structured outputs with expected values to validate outcomes
+
+
+
+
+
+ 1. In **Simulations**, click the **Scenarios** tab
+ 2. Click **Create Scenario**
+
+
+
+ 1. **Name**: Enter "Book Appointment"
+ 2. **Instructions**: Define what the tester should do:
+ ```
+ You are calling to book an appointment for next Monday at 2pm.
+ Confirm your identity when asked and provide any required information.
+ End the call once you receive a confirmation number.
+ ```
+
+
+
+ Evaluations use structured outputs to extract data from the conversation and compare against expected values.
+
+ 1. Click **Add Evaluation**
+ 2. Create or select a structured output:
+ - **Name**: "appointment_booked"
+ - **Schema Type**: boolean
+ 3. Set the **Comparator**: `=`
+ 4. Set the **Expected Value**: `true`
+ 5. Mark as **Required**: Yes
+ 6. Add another evaluation for confirmation number:
+ - **Name**: "confirmation_provided"
+ - **Schema Type**: boolean
+ - **Comparator**: `=`
+ - **Expected Value**: `true`
+ 7. Click **Save Scenario**
+
+
+
+
+
+```bash
+curl -X POST "https://api.vapi.ai/eval/simulation/scenario" \
+ -H "Authorization: Bearer $VAPI_API_KEY" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "name": "Book Appointment",
+ "instructions": "You are calling to book an appointment for next Monday at 2pm. Confirm your identity when asked and provide any required information. End the call once you receive a confirmation number.",
+ "evaluations": [
+ {
+ "structuredOutput": {
+ "name": "appointment_booked",
+ "schema": {
+ "type": "boolean",
+ "description": "Whether an appointment was successfully booked"
+ }
+ },
+ "comparator": "=",
+ "value": true,
+ "required": true
+ },
+ {
+ "structuredOutput": {
+ "name": "confirmation_provided",
+ "schema": {
+ "type": "boolean",
+ "description": "Whether a confirmation number was provided"
+ }
+ },
+ "comparator": "=",
+ "value": true,
+ "required": true
+ }
+ ]
+ }'
+```
+
+**Response:**
+
+```json
+{
+ "id": "550e8400-e29b-41d4-a716-446655440002",
+ "orgId": "org-456",
+ "name": "Book Appointment",
+ "instructions": "You are calling to book an appointment for next Monday at 2pm...",
+ "evaluations": [
+ {
+ "structuredOutput": {
+ "name": "appointment_booked",
+ "schema": { "type": "boolean", "description": "..." }
+ },
+ "comparator": "=",
+ "value": true,
+ "required": true
+ },
+ {
+ "structuredOutput": {
+ "name": "confirmation_provided",
+ "schema": { "type": "boolean", "description": "..." }
+ },
+ "comparator": "=",
+ "value": true,
+ "required": true
+ }
+ ],
+ "createdAt": "2024-01-15T09:35:00Z",
+ "updatedAt": "2024-01-15T09:35:00Z"
+}
+```
+
+Save the returned `id` - you'll need it when creating simulations.
+
+
+
+### Evaluation structure
+
+Each evaluation consists of:
+
+| Field | Description |
+| --- | --- |
+| `structuredOutputId` | Reference to an existing structured output (mutually exclusive with `structuredOutput`) |
+| `structuredOutput` | Inline structured output definition (mutually exclusive with `structuredOutputId`) |
+| `comparator` | Comparison operator: `=`, `!=`, `>`, `<`, `>=`, `<=` |
+| `value` | Expected value (string, number, or boolean) |
+| `required` | Whether this evaluation must pass for the simulation to pass (default: `true`) |
+
+
+ **Schema type restrictions:** Evaluations only support primitive schema types: `string`, `number`, `integer`, `boolean`. Objects and arrays are not supported.
+
+
+### Comparator options
+
+| Comparator | Description | Supported Types |
+| --- | --- | --- |
+| `=` | Equals | string, number, integer, boolean |
+| `!=` | Not equals | string, number, integer, boolean |
+| `>` | Greater than | number, integer |
+| `<` | Less than | number, integer |
+| `>=` | Greater than or equal | number, integer |
+| `<=` | Less than or equal | number, integer |
+
+
+ **Evaluation tips:** Use boolean structured outputs for pass/fail checks like "appointment_booked" or "issue_resolved". Use numeric outputs with comparators for metrics like "satisfaction_score >= 4".
+
+
+## Step 3: Create a simulation
+
+Simulations pair a scenario with a personality. The target assistant or squad is specified when you run the simulation.
+
+
+
+
+
+ 1. In **Simulations**, click the **Simulations** tab
+ 2. Click **Create Simulation**
+
+
+
+ 1. **Name**: Enter "Appointment Booking - Impatient Customer" (optional)
+ 2. **Scenario**: Select "Book Appointment" from the dropdown
+ 3. **Personality**: Select "Impatient Customer" from the dropdown
+ 4. Click **Save Simulation**
+
+
+
+
+
+```bash
+curl -X POST "https://api.vapi.ai/eval/simulation" \
+ -H "Authorization: Bearer $VAPI_API_KEY" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "name": "Appointment Booking - Impatient Customer",
+ "scenarioId": "550e8400-e29b-41d4-a716-446655440002",
+ "personalityId": "550e8400-e29b-41d4-a716-446655440001"
+ }'
+```
+
+**Response:**
+
+```json
+{
+ "id": "550e8400-e29b-41d4-a716-446655440003",
+ "orgId": "org-456",
+ "name": "Appointment Booking - Impatient Customer",
+ "scenarioId": "550e8400-e29b-41d4-a716-446655440002",
+ "personalityId": "550e8400-e29b-41d4-a716-446655440001",
+ "createdAt": "2024-01-15T09:40:00Z",
+ "updatedAt": "2024-01-15T09:40:00Z"
+}
+```
+
+Save the returned `id` - you'll need it when running simulations.
+
+
+
+
+ **Multiple simulations:** Create several simulations with different personality and scenario combinations to thoroughly test your assistant across various conditions.
+
+
+## Step 4: Create a simulation suite (optional)
+
+Simulation suites group multiple simulations into a single batch that runs together.
+
+
+
+
+
+ 1. In **Simulations**, click the **Suites** tab
+ 2. Click **Create Suite**
+
+
+
+ 1. **Name**: Enter "Appointment Booking Regression Suite"
+ 2. Click **Add Simulations**
+ 3. Select the simulations you want to include:
+ - "Appointment Booking - Impatient Customer"
+ - "Appointment Booking - Confused User"
+ - "Appointment Booking - Decisive Customer"
+ 4. Click **Save Suite**
+
+
+
+
+
+```bash
+curl -X POST "https://api.vapi.ai/eval/simulation/suite" \
+ -H "Authorization: Bearer $VAPI_API_KEY" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "name": "Appointment Booking Regression Suite",
+ "simulationIds": [
+ "550e8400-e29b-41d4-a716-446655440003",
+ "550e8400-e29b-41d4-a716-446655440004",
+ "550e8400-e29b-41d4-a716-446655440005"
+ ]
+ }'
+```
+
+**Response:**
+
+```json
+{
+ "id": "550e8400-e29b-41d4-a716-446655440006",
+ "orgId": "org-456",
+ "name": "Appointment Booking Regression Suite",
+ "simulationIds": [
+ "550e8400-e29b-41d4-a716-446655440003",
+ "550e8400-e29b-41d4-a716-446655440004",
+ "550e8400-e29b-41d4-a716-446655440005"
+ ],
+ "createdAt": "2024-01-15T09:45:00Z",
+ "updatedAt": "2024-01-15T09:45:00Z"
+}
+```
+
+Save the returned `id` - you'll need it to run the suite.
+
+
+
+
+ **Suite organization:** Group related simulations together. For example, create separate suites for "Booking Tests", "Cancellation Tests", and "Rescheduling Tests".
+
+
+## Step 5: Run a simulation
+
+Execute simulations against your assistant or squad. You can run individual simulations or entire suites.
+
+
+
+
+
+ 1. Navigate to your simulation or suite
+ 2. Click **Run**
+ 3. Select the **Target**:
+ - Choose **Assistant** or **Squad**
+ - Select from the dropdown
+ 4. Configure **Transport** (optional):
+ - **Voice**: `vapi.websocket` (default)
+ - **Chat**: `vapi.webchat` (faster, no audio)
+ 5. Set **Iterations** (optional): Number of times to run each simulation
+ 6. Click **Start Run**
+
+
+
+ 1. Click the **Runs** tab to see live status updates
+ 2. Watch as each simulation progresses:
+ - **Queued** - Waiting to start
+ - **Running** - Test in progress
+ - **Ended** - Test finished
+ 3. For voice mode, click **Listen** on any running test to hear the call live
+
+
+
+
+
+**Run a single simulation in voice mode:**
+
+```bash
+curl -X POST "https://api.vapi.ai/eval/simulation/run" \
+ -H "Authorization: Bearer $VAPI_API_KEY" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "simulations": [
+ {
+ "type": "simulation",
+ "simulationId": "550e8400-e29b-41d4-a716-446655440003"
+ }
+ ],
+ "target": {
+ "type": "assistant",
+ "assistantId": "your-assistant-id"
+ },
+ "transport": {
+ "provider": "vapi.websocket"
+ }
+ }'
+```
+
+**Run a simulation in chat mode (faster, no audio):**
+
+```bash
+curl -X POST "https://api.vapi.ai/eval/simulation/run" \
+ -H "Authorization: Bearer $VAPI_API_KEY" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "simulations": [
+ {
+ "type": "simulation",
+ "simulationId": "550e8400-e29b-41d4-a716-446655440003"
+ }
+ ],
+ "target": {
+ "type": "assistant",
+ "assistantId": "your-assistant-id"
+ },
+ "transport": {
+ "provider": "vapi.webchat"
+ }
+ }'
+```
+
+**Run a suite with multiple iterations:**
+
+```bash
+curl -X POST "https://api.vapi.ai/eval/simulation/run" \
+ -H "Authorization: Bearer $VAPI_API_KEY" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "simulations": [
+ {
+ "type": "simulationSuite",
+ "simulationSuiteId": "550e8400-e29b-41d4-a716-446655440006"
+ }
+ ],
+ "target": {
+ "type": "assistant",
+ "assistantId": "your-assistant-id"
+ },
+ "iterations": 3
+ }'
+```
+
+**Response:**
+
+```json
+{
+ "id": "550e8400-e29b-41d4-a716-446655440007",
+ "orgId": "org-456",
+ "status": "queued",
+ "simulations": [
+ {
+ "type": "simulation",
+ "simulationId": "550e8400-e29b-41d4-a716-446655440003"
+ }
+ ],
+ "target": {
+ "type": "assistant",
+ "assistantId": "your-assistant-id"
+ },
+ "transport": {
+ "provider": "vapi.websocket"
+ },
+ "queuedAt": "2024-01-15T09:50:00Z",
+ "createdAt": "2024-01-15T09:50:00Z",
+ "updatedAt": "2024-01-15T09:50:00Z"
+}
+```
+
+**Check run status:**
+
+```bash
+curl -X GET "https://api.vapi.ai/eval/simulation/run/550e8400-e29b-41d4-a716-446655440007" \
+ -H "Authorization: Bearer $VAPI_API_KEY"
+```
+
+
+
+## Step 6: Review results
+
+Analyze the results of your simulation runs to understand how your assistant performed.
+
+### Successful run
+
+When all evaluations pass, you'll see:
+
+```json
+{
+ "id": "550e8400-e29b-41d4-a716-446655440007",
+ "status": "ended",
+ "itemCounts": {
+ "total": 3,
+ "passed": 3,
+ "failed": 0,
+ "running": 0,
+ "queued": 0,
+ "canceled": 0
+ },
+ "startedAt": "2024-01-15T09:50:05Z",
+ "endedAt": "2024-01-15T09:52:30Z"
+}
+```
+
+**Pass criteria:**
+
+- `status` is "ended"
+- `itemCounts.passed` equals `itemCounts.total`
+- All required evaluations show `passed: true`
+
+### Failed run
+
+When evaluation fails, you'll see details about what went wrong:
+
+```json
+{
+ "id": "550e8400-e29b-41d4-a716-446655440008",
+ "status": "ended",
+ "itemCounts": {
+ "total": 3,
+ "passed": 2,
+ "failed": 1,
+ "running": 0,
+ "queued": 0,
+ "canceled": 0
+ }
+}
+```
+
+**Failure indicators:**
+
+- `itemCounts.failed` > 0
+- Individual run items show which evaluations failed and why
+
+
+
+
+
+ 1. Navigate to the **Runs** tab
+ 2. Click on a completed run to see details
+ 3. View the summary showing pass/fail counts
+
+
+
+ 1. Click on any failed simulation
+ 2. Review the **Conversation** to see the full transcript
+ 3. Check which evaluations failed and their actual vs expected values
+ 4. For voice mode, click **Listen to Recording** to hear the full call
+
+
+
+ 1. Go to the main **Simulations** page
+ 2. View historical runs and their pass rates
+ 3. Monitor trends to identify regressions
+
+
+
+
+
+**List all runs:**
+
+```bash
+curl -X GET "https://api.vapi.ai/eval/simulation/run" \
+ -H "Authorization: Bearer $VAPI_API_KEY"
+```
+
+**Get detailed results for a specific run:**
+
+```bash
+curl -X GET "https://api.vapi.ai/eval/simulation/run/550e8400-e29b-41d4-a716-446655440007" \
+ -H "Authorization: Bearer $VAPI_API_KEY"
+```
+
+**Filter runs by status:**
+
+```bash
+curl -X GET "https://api.vapi.ai/eval/simulation/run?status=ended" \
+ -H "Authorization: Bearer $VAPI_API_KEY"
+```
+
+
+
+
+ Full conversation transcripts are available for all simulation runs, making it easy to understand exactly what happened during each test.
+
+
+## Next steps
+
+
+
+ Learn about tool mocks, hooks, CI/CD integration, and testing strategies
+
+
+
+ Create and configure assistants to test
+
+
+
+ Learn about chat-based testing with mock conversations
+
+
+
+ Learn how to define structured outputs for evaluations
+
+
+
+## Tips for success
+
+
+ **Best practices for effective simulation testing:**
+
+ - **Start with chat mode** - Use `vapi.webchat` for rapid iteration, then validate with voice
+ - **Use realistic personalities** - Model your test callers after actual customer types
+ - **Define clear evaluations** - Use specific, measurable structured outputs
+ - **Group related tests** - Organize suites by feature or user flow
+ - **Monitor trends** - Track pass rates over time to catch regressions early
+ - **Test after changes** - Run your simulation suites after updating prompts or tools
+ - **Listen to recordings** - Audio recordings reveal issues that metrics alone miss
+ - **Iterate on failures** - Use failed tests to improve both your assistant and test design
+
+
+## Frequently asked questions
+
+
+
+ Simulation concurrency follows your organization's call concurrency limits. Each voice simulation uses 2 concurrent call slots (one for the AI tester, one for your assistant being tested). Chat mode simulations are more efficient since they don't require audio processing. If you need higher concurrency limits, contact support.
+
+
+
+ Simulations use AI-powered testers that have actual conversations with your assistant, producing real call recordings and transcripts. Evals use mock conversations with predefined messages and judge the responses. Use Simulations for realistic end-to-end testing; use Evals for faster, more controlled validation.
+
+
+
+ Yes! You can either define inline structured outputs in your scenario evaluations, or reference existing structured outputs by ID using the `structuredOutputId` field.
+
+
+
+ Create a simulation that targets a squad instead of an assistant. Use the `target.type: "squad"` and `target.squadId` fields when creating a run.
+
+
+
+## Get help
+
+Need assistance? We're here to help:
+
+- [Discord Community](https://discord.gg/pUFNcf2WmH)
+- [Support](mailto:support@vapi.ai)