diff --git a/.assets/plan.png b/.assets/plan.png new file mode 100644 index 00000000..4a1da307 Binary files /dev/null and b/.assets/plan.png differ diff --git a/.env.template b/.env.template new file mode 100644 index 00000000..97495493 --- /dev/null +++ b/.env.template @@ -0,0 +1,221 @@ +# ======================================== +# FRIEND-LITE MASTER CONFIGURATION +# ======================================== +# This is the master configuration template for the entire Friend-Lite project. +# Copy this file to .env and customize values, then run 'make config' to generate +# all service-specific configuration files. + +# ======================================== +# DEPLOYMENT SETTINGS +# ======================================== + +# Infrastructure namespaces +INFRASTRUCTURE_NAMESPACE=infrastructure +APPLICATION_NAMESPACE=friend-lite + +# Deployment mode: docker-compose, kubernetes, or distributed +DEPLOYMENT_MODE=docker-compose + +# Container registry (for kubernetes/skaffold) +CONTAINER_REGISTRY=localhost:32000 + +# ======================================== +# NETWORK CONFIGURATION +# ======================================== + +# Primary domain/IP for all services +# Examples: localhost, 192.168.1.100, friend-lite.example.com, 100.x.x.x (Tailscale) +DOMAIN=localhost + +# Service ports (Docker Compose mode) +BACKEND_PORT=8000 +WEBUI_PORT=5173 +SPEAKER_PORT=8085 +MONGODB_PORT=27017 +QDRANT_PORT=6333 +NGROK_PORT=4040 + +# Kubernetes node ports (for LoadBalancer services) +BACKEND_NODEPORT=30270 +WEBUI_NODEPORT=31011 +SPEAKER_NODEPORT=30285 + +# Generated service URLs (computed from above) +BACKEND_HOST=${DOMAIN} +WEBUI_HOST=${DOMAIN} +SPEAKER_HOST=speaker.${DOMAIN} +BACKEND_URL=http://${DOMAIN}:${BACKEND_PORT} +WEBUI_URL=http://${DOMAIN}:${WEBUI_PORT} +SPEAKER_SERVICE_URL=http://${DOMAIN}:${SPEAKER_PORT} + +# ======================================== +# AUTHENTICATION & SECURITY +# ======================================== + +# JWT secret key - make this random and long +AUTH_SECRET_KEY=your-super-secret-jwt-key-here-make-it-random-and-long + +# Admin account +ADMIN_EMAIL=admin@example.com +ADMIN_PASSWORD=secure-admin-password + +# CORS origins (auto-generated based on DOMAIN and ports) +CORS_ORIGINS=http://${DOMAIN}:${WEBUI_PORT},http://${DOMAIN}:3000,http://localhost:${WEBUI_PORT},http://localhost:3000 + +# ======================================== +# LLM CONFIGURATION +# ======================================== + +# LLM Provider: openai, ollama, or groq +LLM_PROVIDER=openai + +# OpenAI configuration +OPENAI_API_KEY=your-openai-key-here +OPENAI_BASE_URL=https://api.openai.com/v1 +OPENAI_MODEL=gpt-4o-mini + +# Ollama configuration (when LLM_PROVIDER=ollama) +OLLAMA_BASE_URL=http://ollama:11434 +OLLAMA_MODEL=llama3.1:latest + +# Chat-specific settings +CHAT_LLM_MODEL=${OPENAI_MODEL} +CHAT_TEMPERATURE=0.7 + +# ======================================== +# SPEECH-TO-TEXT CONFIGURATION +# ======================================== + +# Primary transcription provider: deepgram, mistral, or parakeet +TRANSCRIPTION_PROVIDER=deepgram + +# Deepgram configuration +DEEPGRAM_API_KEY=your-deepgram-key-here + +# Mistral configuration (when TRANSCRIPTION_PROVIDER=mistral) +MISTRAL_API_KEY=your-mistral-key-here +MISTRAL_MODEL=voxtral-mini-2507 + +# Parakeet ASR configuration (when TRANSCRIPTION_PROVIDER=parakeet) +PARAKEET_ASR_URL=http://host.docker.internal:8767 + +# ======================================== +# DATABASE CONFIGURATION +# ======================================== + +# MongoDB configuration +MONGODB_URI=mongodb://mongo:${MONGODB_PORT} +MONGODB_K8S_URI=mongodb://mongodb.${INFRASTRUCTURE_NAMESPACE}.svc.cluster.local:27017/friend-lite + +# Qdrant configuration +QDRANT_BASE_URL=qdrant +QDRANT_K8S_URL=qdrant.${INFRASTRUCTURE_NAMESPACE}.svc.cluster.local + +# Neo4j configuration (optional) +NEO4J_HOST=neo4j-mem0 +NEO4J_USER=neo4j +NEO4J_PASSWORD=neo4j-password + +# ======================================== +# MEMORY PROVIDER CONFIGURATION +# ======================================== + +# Memory Provider: friend_lite or openmemory_mcp +MEMORY_PROVIDER=friend_lite + +# OpenMemory MCP configuration (when MEMORY_PROVIDER=openmemory_mcp) +OPENMEMORY_MCP_URL=http://host.docker.internal:8765 +OPENMEMORY_CLIENT_NAME=friend_lite +OPENMEMORY_USER_ID=openmemory +OPENMEMORY_TIMEOUT=30 + +# ======================================== +# SPEAKER RECOGNITION CONFIGURATION +# ======================================== + +# Hugging Face token for speaker recognition models +HF_TOKEN=your-huggingface-token-here + +# Speaker recognition settings +SIMILARITY_THRESHOLD=0.15 +COMPUTE_MODE=cpu + +# Speaker service configuration +SPEAKER_SERVICE_HOST=speaker-service +SPEAKER_SERVICE_PORT=${SPEAKER_PORT} + +# React UI settings for speaker recognition +REACT_UI_HOST=0.0.0.0 +REACT_UI_PORT=5174 +REACT_UI_HTTPS=false +WEBUI_CORS_ORIGIN=* + +# Vite configuration +VITE_ALLOWED_HOSTS=localhost 127.0.0.1 ${DOMAIN} +# ======================================== +# OPTIONAL SERVICES +# ======================================== + +# Groq API (alternative LLM provider) +GROQ_API_KEY=your-groq-api-key-here + +# Langfuse telemetry +LANGFUSE_PUBLIC_KEY= +LANGFUSE_SECRET_KEY= +LANGFUSE_HOST=http://x.x.x.x:3002 +LANGFUSE_ENABLE_TELEMETRY=false + +# Ngrok for external access +NGROK_AUTHTOKEN=your-ngrok-token-here + +# ======================================== +# AUDIO PROCESSING SETTINGS +# ======================================== + +NEW_CONVERSATION_TIMEOUT_MINUTES=1.5 +AUDIO_CROPPING_ENABLED=true +MIN_SPEECH_SEGMENT_DURATION=1.0 +CROPPING_CONTEXT_PADDING=0.1 + +# ======================================== +# DEVELOPMENT & DEBUG SETTINGS +# ======================================== + +# Environment +ENVIRONMENT=dev +NODE_ENV=production + +# Debug settings +DEBUG_DIR=./data/debug_dir +MEM0_TELEMETRY=false + +# Host IP for public access (Docker host IP) +HOST_IP=${DOMAIN} +BACKEND_PUBLIC_PORT=${BACKEND_PORT} + +# Storage settings +PERSISTENCE_SIZE=10Gi +STORAGE_CLASS=openebs-hostpath + +# ======================================== +# KUBERNETES-SPECIFIC SETTINGS +# ======================================== + +# Image pull policy +IMAGE_PULL_POLICY=Always + +# Resource limits +BACKEND_CPU_LIMIT=1000m +BACKEND_MEMORY_LIMIT=2Gi +BACKEND_CPU_REQUEST=200m +BACKEND_MEMORY_REQUEST=1Gi + +WEBUI_CPU_LIMIT=500m +WEBUI_MEMORY_LIMIT=512Mi +WEBUI_CPU_REQUEST=100m +WEBUI_MEMORY_REQUEST=128Mi + +SPEAKER_CPU_LIMIT=2000m +SPEAKER_MEMORY_LIMIT=4Gi +SPEAKER_CPU_REQUEST=500m +SPEAKER_MEMORY_REQUEST=2Gi \ No newline at end of file diff --git a/.github/workflows/README.md b/.github/workflows/README.md new file mode 100644 index 00000000..3b645800 --- /dev/null +++ b/.github/workflows/README.md @@ -0,0 +1,91 @@ +# GitHub Actions CI/CD Setup for Friend Lite + +This sets up **automatic GitHub releases** with APK/IPA files whenever you push code. + +## 🚀 How This Works + +1. You push code to GitHub +2. GitHub automatically builds **both Android APK and iOS IPA** +3. **Creates GitHub Releases** with both files attached +4. You download directly from the **Releases** tab! + +## 🎯 Quick Setup (2 Steps) + +### Step 1: Get Expo Token +1. Go to [expo.dev](https://expo.dev) and sign in/create account +2. Go to [Access Tokens](https://expo.dev/accounts/[account]/settings/access-tokens) +3. Create a new token and copy it + +### Step 2: Add GitHub Secret +1. In your GitHub repo: **Settings** → **Secrets and variables** → **Actions** +2. Click **New repository secret** +3. Name: `EXPO_TOKEN` +4. Value: Paste your token from Step 1 +5. Click **Add secret** + +## ⚡ That's It! +# GitHub Actions Workflows + +## Integration Tests + +### Automatic Integration Tests (`integration-tests.yml`) +- **Triggers**: Push/PR to `main` or `develop` branches affecting backend code +- **Timeout**: 15 minutes +- **Mode**: Cached mode (better for CI environment) +- **Dependencies**: Requires `DEEPGRAM_API_KEY` and `OPENAI_API_KEY` secrets + +## Required Secrets + +Add these secrets in your GitHub repository settings: + +``` +DEEPGRAM_API_KEY=your-deepgram-api-key +OPENAI_API_KEY=your-openai-api-key +``` + +## Test Environment + +- **Runtime**: Ubuntu latest with Docker support +- **Python**: 3.12 with uv package manager +- **Services**: MongoDB (port 27018), Qdrant (ports 6335/6336), Backend (port 8001) +- **Test Data**: Isolated test directories and databases +- **Audio**: 4-minute glass blowing tutorial for end-to-end validation + +## Modes + +### Cached Mode (Recommended for CI) +- Reuses containers and data between test runs +- Faster startup time +- Better for containerized CI environments +- Used by default in automatic workflows + +### Fresh Mode (Recommended for Local Development) +- Completely clean environment each run +- Removes all test data and containers +- Slower but more reliable for debugging +- Can be selected in manual workflow + +## Troubleshooting + +1. **Test Timeout**: Increase `timeout_minutes` in manual workflow +2. **Memory Issues**: Check container logs in failed run artifacts +3. **API Key Issues**: Verify secrets are set correctly in repository settings +4. **Fresh Mode Fails**: Try cached mode for comparison + +## Local Testing + +To run the same tests locally: + +```bash +cd backends/advanced-backend + +# Install dependencies +uv sync --dev + +# Set up environment (copy from .env.template) +cp .env.template .env.test +# Add your API keys to .env.test + +# Run test (modify CACHED_MODE in test_integration.py if needed) +uv run pytest test_integration.py::test_full_pipeline_integration -v -s +``` \ No newline at end of file diff --git a/.github/workflows/android-apk-build.yml b/.github/workflows/android-apk-build.yml new file mode 100644 index 00000000..4434eb13 --- /dev/null +++ b/.github/workflows/android-apk-build.yml @@ -0,0 +1,93 @@ +name: Android APK Build + +permissions: + contents: write + +on: + push: + branches: [main, develop] + paths: ['app/**'] + pull_request: + branches: [main] + paths: ['app/**'] + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + defaults: + run: + working-directory: ./app + + steps: + - name: Setup repo + uses: actions/checkout@v4 + + - name: Setup node + uses: actions/setup-node@v4.0.2 + with: + node-version: 20.x + cache: 'npm' + cache-dependency-path: ./app/package-lock.json + + - name: Set up JDK 17 + uses: actions/setup-java@v4 + with: + java-version: '17' + distribution: 'temurin' + + - name: Setup Android SDK + uses: android-actions/setup-android@v3 + + - name: Setup Expo + uses: expo/expo-github-action@v8 + with: + expo-version: latest + eas-version: latest + token: ${{ secrets.EXPO_TOKEN }} + + - name: Install dependencies + run: npm ci + + - name: Initialize EAS + run: eas init --force --non-interactive + + - name: Build Android APK + run: eas build --platform android --profile local --local --output ${{ github.workspace }}/app-release.apk --non-interactive + + - name: Generate release tag + id: tag + run: | + echo "RELEASE_TAG=android-v1.0.0-$(date +'%Y%m%d-%H%M%S')" >> $GITHUB_OUTPUT + echo "RELEASE_NAME=Friend Lite Android $(date +'%Y-%m-%d %H:%M:%S')" >> $GITHUB_OUTPUT + echo "BUILD_TIME=$(date +'%Y-%m-%d %H:%M:%S UTC')" >> $GITHUB_OUTPUT + + - name: Create Release + id: create_release + if: github.event_name == 'push' && github.ref == 'refs/heads/main' + uses: actions/create-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: ${{ steps.tag.outputs.RELEASE_TAG }} + release_name: ${{ steps.tag.outputs.RELEASE_NAME }} + body: | + ## 📱 Android APK Build + + **Built from commit:** ${{ github.sha }} + **Branch:** ${{ github.ref_name }} + **Build time:** ${{ steps.tag.outputs.BUILD_TIME }} + + Ready to install on Android devices! + draft: false + prerelease: true + + - name: Upload Android APK to Release + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ steps.create_release.outputs.upload_url }} + asset_path: ${{ github.workspace }}/app-release.apk + asset_name: friend-lite-android.apk + asset_content_type: application/vnd.android.package-archive \ No newline at end of file diff --git a/.github/workflows/build-all-platforms.yml b/.github/workflows/build-all-platforms.yml new file mode 100644 index 00000000..e73e6147 --- /dev/null +++ b/.github/workflows/build-all-platforms.yml @@ -0,0 +1,194 @@ +name: Build All Platforms + +on: + push: + branches: [main] + paths: ['app/**'] + workflow_dispatch: + inputs: + build_android: + description: 'Build Android APK' + required: false + default: true + type: boolean + build_ios: + description: 'Build iOS IPA' + required: false + default: true + type: boolean + +jobs: + build-android: + if: ${{ github.event_name != 'workflow_dispatch' || inputs.build_android }} + runs-on: ubuntu-latest + outputs: + upload_url: ${{ steps.create_release.outputs.upload_url }} + defaults: + run: + working-directory: ./app + + steps: + - name: Setup repo + uses: actions/checkout@v4 + + - name: Setup node + uses: actions/setup-node@v4.0.2 + with: + node-version: 20.x + cache: 'npm' + cache-dependency-path: ./app/package-lock.json + + - name: Set up JDK 17 + uses: actions/setup-java@v4 + with: + java-version: '17' + distribution: 'temurin' + + - name: Setup Android SDK + uses: android-actions/setup-android@v3 + + - name: Setup Expo + uses: expo/expo-github-action@v8 + with: + expo-version: latest + eas-version: latest + token: ${{ secrets.EXPO_TOKEN }} + + - name: Install dependencies + run: npm ci + + - name: Initialize EAS + run: eas init --force --non-interactive + + - name: Build Android APK + run: eas build --platform android --profile local --local --output ${{ github.workspace }}/friend-lite-android.apk --non-interactive + + - name: Generate release tag + id: tag + run: | + echo "RELEASE_TAG=v1.0.0-$(date +'%Y%m%d-%H%M%S')" >> $GITHUB_OUTPUT + echo "RELEASE_NAME=Friend Lite Build $(date +'%Y-%m-%d %H:%M:%S')" >> $GITHUB_OUTPUT + echo "BUILD_TIME=$(date +'%Y-%m-%d %H:%M:%S UTC')" >> $GITHUB_OUTPUT + + - name: Create Release + id: create_release + uses: actions/create-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: ${{ steps.tag.outputs.RELEASE_TAG }} + release_name: ${{ steps.tag.outputs.RELEASE_NAME }} + body: | + ## 🚀 Automated Build + + **Built from commit:** ${{ github.sha }} + **Branch:** ${{ github.ref_name }} + **Build time:** ${{ steps.tag.outputs.BUILD_TIME }} + + ### 📱 Downloads + - **Android APK**: Ready for installation on Android devices + + ### 🔧 Build Info + - Built with GitHub Actions + - Debug build (unsigned) + - Safe for testing and development + draft: false + prerelease: true + + - name: Upload Android APK to Release + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ steps.create_release.outputs.upload_url }} + asset_path: ${{ github.workspace }}/friend-lite-android.apk + asset_name: friend-lite-android.apk + asset_content_type: application/vnd.android.package-archive + + build-ios: + if: ${{ github.event_name != 'workflow_dispatch' || inputs.build_ios }} + needs: build-android + runs-on: macos-14 + defaults: + run: + working-directory: ./app + + steps: + - name: Setup repo + uses: actions/checkout@v4 + + - name: Setup node + uses: actions/setup-node@v4.0.2 + with: + node-version: 20.x + cache: 'npm' + cache-dependency-path: ./app/package-lock.json + + - name: Select Xcode version + run: sudo xcode-select -s /Applications/Xcode_16.1.app/Contents/Developer + + - name: Setup Expo + uses: expo/expo-github-action@v8 + with: + expo-version: latest + eas-version: latest + token: ${{ secrets.EXPO_TOKEN }} + + - name: Install dependencies + run: npm ci + + - name: Initialize EAS + run: eas init --force --non-interactive + + - name: Build iOS IPA + run: eas build --platform ios --profile local --local --non-interactive --output ${{ github.workspace }}/friend-lite-ios.ipa + + - name: Upload iOS IPA to Existing Release + if: needs.build-android.result == 'success' + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ needs.build-android.outputs.upload_url }} + asset_path: ${{ github.workspace }}/friend-lite-ios.ipa + asset_name: friend-lite-ios.ipa + asset_content_type: application/octet-stream + + - name: Generate iOS release info + if: needs.build-android.result != 'success' + id: ios_tag + run: | + echo "IOS_RELEASE_TAG=ios-v1.0.0-$(date +'%Y%m%d-%H%M%S')" >> $GITHUB_OUTPUT + echo "IOS_RELEASE_NAME=Friend Lite iOS $(date +'%Y-%m-%d %H:%M:%S')" >> $GITHUB_OUTPUT + echo "IOS_BUILD_TIME=$(date +'%Y-%m-%d %H:%M:%S UTC')" >> $GITHUB_OUTPUT + + - name: Create iOS-only Release + if: needs.build-android.result != 'success' + id: create_ios_release + uses: actions/create-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: ${{ steps.ios_tag.outputs.IOS_RELEASE_TAG }} + release_name: ${{ steps.ios_tag.outputs.IOS_RELEASE_NAME }} + body: | + ## 🍎 iOS IPA Build + + **Built from commit:** ${{ github.sha }} + **Branch:** ${{ github.ref_name }} + **Build time:** ${{ steps.ios_tag.outputs.IOS_BUILD_TIME }} + + For iOS Simulator testing! + draft: false + prerelease: true + + - name: Upload iOS IPA to New Release + if: needs.build-android.result != 'success' + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ steps.create_ios_release.outputs.upload_url }} + asset_path: ${{ github.workspace }}/friend-lite-ios.ipa + asset_name: friend-lite-ios.ipa + asset_content_type: application/octet-stream \ No newline at end of file diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml new file mode 100644 index 00000000..ae36c007 --- /dev/null +++ b/.github/workflows/claude.yml @@ -0,0 +1,50 @@ +name: Claude Code + +on: + issue_comment: + types: [created] + pull_request_review_comment: + types: [created] + issues: + types: [opened, assigned] + pull_request_review: + types: [submitted] + +jobs: + claude: + if: | + (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) || + (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) || + (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) || + (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude'))) + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: read + issues: read + id-token: write + actions: read # Required for Claude to read CI results on PRs + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Run Claude Code + id: claude + uses: anthropics/claude-code-action@v1 + with: + claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} + + # This is an optional setting that allows Claude to read CI results on PRs + additional_permissions: | + actions: read + + # Optional: Give a custom prompt to Claude. If this is not specified, Claude will perform the instructions specified in the comment that tagged it. + # prompt: 'Update the pull request description to include a summary of changes.' + + # Optional: Add claude_args to customize behavior and configuration + # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md + # or https://docs.anthropic.com/en/docs/claude-code/sdk#command-line for available options + # claude_args: '--model claude-opus-4-1-20250805 --allowed-tools Bash(gh pr:*)' + diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml new file mode 100644 index 00000000..38f1eccb --- /dev/null +++ b/.github/workflows/integration-tests.yml @@ -0,0 +1,65 @@ +name: Integration Tests + +on: + push: + branches: [ main, develop ] + paths: + - 'backends/advanced/src/**' + - 'backends/advanced/run-test.sh' + - '.github/workflows/integration-tests.yml' + pull_request: + branches: [ main, develop ] + paths: + - 'backends/advanced/src/**' + - 'backends/advanced/run-test.sh' + - '.github/workflows/integration-tests.yml' + +jobs: + integration-tests: + runs-on: ubuntu-latest + timeout-minutes: 20 + + services: + docker: + image: docker:dind + options: --privileged + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Install uv + uses: astral-sh/setup-uv@v4 + with: + version: "latest" + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install PortAudio dependencies + run: sudo apt-get update && sudo apt-get install -y portaudio19-dev + + - name: Run Advanced Backend Integration Tests + env: + DEEPGRAM_API_KEY: ${{ secrets.DEEPGRAM_API_KEY }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + run: | + cd backends/advanced + chmod +x run-test.sh + ./run-test.sh + + - name: Upload test logs on failure + if: failure() + uses: actions/upload-artifact@v4 + with: + name: integration-test-logs + path: | + backends/advanced/test_integration.log + backends/advanced/docker-compose-test.yml + backends/advanced/.env.test + retention-days: 7 \ No newline at end of file diff --git a/.github/workflows/ios-ipa-build.yml b/.github/workflows/ios-ipa-build.yml new file mode 100644 index 00000000..dbd0c5bb --- /dev/null +++ b/.github/workflows/ios-ipa-build.yml @@ -0,0 +1,86 @@ +name: iOS IPA Build + +permissions: + contents: write + +on: + push: + branches: [main, develop] + paths: ['app/**'] + pull_request: + branches: [main] + paths: ['app/**'] + workflow_dispatch: + +jobs: + build: + runs-on: macos-14 + defaults: + run: + working-directory: ./app + + steps: + - name: Setup repo + uses: actions/checkout@v4 + + - name: Setup node + uses: actions/setup-node@v4.0.2 + with: + node-version: 20.x + cache: 'npm' + cache-dependency-path: ./app/package-lock.json + + - name: Select Xcode version + run: sudo xcode-select -s /Applications/Xcode_16.1.app/Contents/Developer + + - name: Setup Expo + uses: expo/expo-github-action@v8 + with: + expo-version: latest + eas-version: latest + token: ${{ secrets.EXPO_TOKEN }} + + - name: Install dependencies + run: npm ci + + - name: Initialize EAS + run: eas init --force --non-interactive + + - name: Build iOS IPA + run: eas build --platform ios --profile local --local --non-interactive --output ${{ github.workspace }}/app-release.ipa + + - name: Generate release tag + id: tag + run: | + echo "RELEASE_TAG=ios-v1.0.0-$(date +'%Y%m%d-%H%M%S')" >> $GITHUB_OUTPUT + echo "RELEASE_NAME=Friend Lite iOS $(date +'%Y-%m-%d %H:%M:%S')" >> $GITHUB_OUTPUT + echo "BUILD_TIME=$(date +'%Y-%m-%d %H:%M:%S UTC')" >> $GITHUB_OUTPUT + + - name: Create Release + id: create_release + uses: actions/create-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: ${{ steps.tag.outputs.RELEASE_TAG }} + release_name: ${{ steps.tag.outputs.RELEASE_NAME }} + body: | + ## 🍎 iOS IPA Build + + **Built from commit:** ${{ github.sha }} + **Branch:** ${{ github.ref_name }} + **Build time:** ${{ steps.tag.outputs.BUILD_TIME }} + + For iOS Simulator testing! + draft: false + prerelease: true + + - name: Upload iOS IPA to Release + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ steps.create_release.outputs.upload_url }} + asset_path: ${{ github.workspace }}/app-release.ipa + asset_name: friend-lite-ios.ipa + asset_content_type: application/octet-stream \ No newline at end of file diff --git a/.github/workflows/speaker-recognition-tests.yml b/.github/workflows/speaker-recognition-tests.yml new file mode 100644 index 00000000..f7342848 --- /dev/null +++ b/.github/workflows/speaker-recognition-tests.yml @@ -0,0 +1,78 @@ +name: Speaker Recognition Tests + +on: + push: + branches: [ main, develop ] + paths: + - 'extras/speaker-recognition/src/**' + - 'extras/speaker-recognition/tests/**' + - 'extras/speaker-recognition/pyproject.toml' + - 'extras/speaker-recognition/docker-compose.yml' + - 'extras/speaker-recognition/docker-compose-test.yml' + - 'extras/speaker-recognition/Dockerfile' + - 'extras/speaker-recognition/run-test.sh' + - '.github/workflows/speaker-recognition-tests.yml' + pull_request: + branches: [ main, develop ] + paths: + - 'extras/speaker-recognition/src/**' + - 'extras/speaker-recognition/tests/**' + - 'extras/speaker-recognition/pyproject.toml' + - 'extras/speaker-recognition/docker-compose.yml' + - 'extras/speaker-recognition/docker-compose-test.yml' + - 'extras/speaker-recognition/Dockerfile' + - 'extras/speaker-recognition/run-test.sh' + - '.github/workflows/speaker-recognition-tests.yml' + +jobs: + speaker-recognition-tests: + runs-on: ubuntu-latest + timeout-minutes: 30 + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Install uv + uses: astral-sh/setup-uv@v4 + with: + version: "latest" + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Run Speaker Recognition Integration Tests + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + DEEPGRAM_API_KEY: ${{ secrets.DEEPGRAM_API_KEY }} + run: | + cd extras/speaker-recognition + ./run-test.sh + + - name: Debug Docker build failure + if: failure() + run: | + cd extras/speaker-recognition + echo "=== Docker images ===" + docker images + echo "=== Docker containers ===" + docker ps -a + echo "=== Docker Compose logs ===" + docker compose -f docker-compose-test.yml logs || true + echo "=== Docker system info ===" + docker system df || true + + - name: Upload test logs on failure + if: failure() + uses: actions/upload-artifact@v4 + with: + name: speaker-recognition-test-logs + path: | + extras/speaker-recognition/docker-compose-test.yml + extras/speaker-recognition/.env + retention-days: 7 \ No newline at end of file diff --git a/.gitignore b/.gitignore index c67c4e17..41ac1fea 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,75 @@ **/__pycache__ *.wav **/*.env +!**/.env.template +**/memory_config.yaml +!**/memory_config.yaml.template example/* **/node_modules/* **/ollama-data/* **/qdrant_data/* -backends/examples/ai_server/* -backends/examples/example-satellite/* +**/model_cache/* +.vscode/* +**/audio_chunks/* +**/mongo_data/* +.cursor/* +.locks +blobs +transcription_results.csv +**cache/* +*.ckpt +**/hub/* +*.log +**/speaker_data*/** +**/.venv/* +**metrics_report** + +*.db +**/advanced_omi_backend.egg-info/ +**/dist/* +**/build/* +**/*.egg-info/ + +untracked/* +backends/advanced/data/* +backends/advanced/diarization_config.json +extras/havpe-relay/firmware/secrets.yaml +extras/test-audios/* + +## Python Build stuff +**/*egg-info/* + +# Speaker Recognition +extras/speaker-omni-experimental/data/* +extras/speaker-omni-experimental/cache/* + +# AI Stuff +.claude + +# SSL +extras/speaker-recognition/ssl/* +backends/advanced/ssl/* + +# nginx +extras/speaker-recognition/nginx.conf + +# Cache +extras/speaker-recognition/cache/* +extras/speaker-recognition/outputs/* + +# my backup +backends/advanced/src/_webui_original/* +backends/advanced-backend/data/neo4j_data/* +backends/advanced-backend/data/speaker_model_cache/ + +*.bin +*.sqlite3 +*checkpoints + + +# k8s config +backends/charts/advanced-backend/env-configmap.yaml + +extras/openmemory-mcp/data/* +.env.backup.* + diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..6d699ff5 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,18 @@ +repos: + - repo: https://github.com/psf/black + rev: 24.4.2 + hooks: + - id: black + files: ^backends/advanced-backend/src/.*\.py$ + - repo: https://github.com/PyCQA/isort + rev: 5.13.2 + hooks: + - id: isort + files: ^backends/advanced-backend/src/.*\.py$ + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: trailing-whitespace + files: ^backends/advanced-backend/src/.* + - id: end-of-file-fixer + files: ^backends/advanced-backend/src/.* \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 29fbbdb3..00000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "mypy-type-checker.args": [ - "\"mypy-type-checker.args\" = [\"--config-file=./backend/pyproject.toml\"]" - ] -} \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..8ee8193c --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,1249 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +Friend-Lite is at the core an AI-powered personal system - various devices, incuding but not limited to wearables from OMI can be used for at the very least audio capture, speaker specific transcription, memory extraction and retriaval. +On top of that - it is being designed to support other services, that can help a user with these inputs such as reminders, action items, personal diagnosis etc. + +This supports a comprehensive web dashboard for management. + +**⚠️ Active Development Notice**: This project is under active development. Do not create migration scripts or assume stable APIs. Only offer suggestions and improvements when requested. + +**❌ No Backward Compatibility**: Do NOT add backward compatibility code unless explicitly requested. This includes fallback logic, legacy field support, or compatibility layers. Always ask before adding backward compatibility - in most cases the answer is no during active development. + +## Development Commands + +### Backend Development (Advanced Backend - Primary) +```bash +cd backends/advanced + +# Start full stack with Docker +docker compose up --build -d + +uv run python src/main.py + +# Code formatting and linting +uv run black src/ +uv run isort src/ + +# Run tests +uv run pytest +uv run pytest tests/test_memory_service.py # Single test file + +# Run integration tests (local script mirrors CI) +./run-test.sh # Complete integration test suite + +# Environment setup +cp .env.template .env # Configure environment variables + +# Reset data (development) +sudo rm -rf backends/advanced/data/ +``` + +### Testing Infrastructure + +#### Local Test Scripts +The project includes simplified test scripts that mirror CI workflows: + +```bash +# Run all tests from project root +./run-test.sh [advanced-backend|speaker-recognition|all] + +# Advanced backend tests only +./run-test.sh advanced-backend + +# Speaker recognition tests only +./run-test.sh speaker-recognition + +# Run all test suites (default) +./run-test.sh all +``` + +#### Advanced Backend Integration Tests +```bash +cd backends/advanced + +# Requires .env file with DEEPGRAM_API_KEY and OPENAI_API_KEY +cp .env.template .env # Configure API keys + +# Run full integration test suite +./run-test.sh + +# Manual test execution (for debugging) +source .env && export DEEPGRAM_API_KEY && export OPENAI_API_KEY +uv run pytest tests/test_integration.py::test_full_pipeline_integration -v -s +``` + +#### Speaker Recognition Tests +```bash +cd extras/speaker-recognition + +# Requires .env file with HF_TOKEN and DEEPGRAM_API_KEY +cp .env.template .env # Configure tokens + +# Run speaker recognition test suite +./run-test.sh +``` + +#### Test Script Features +- **Environment Compatibility**: Works with both local .env files and CI environment variables +- **Simplified Configuration**: Uses environment variables directly, no temporary .env.test files +- **Docker Cleanup**: Uses lightweight Alpine container for reliable permission-free cleanup +- **Automatic Cleanup**: Stops and removes test containers after execution +- **Colored Output**: Clear progress indicators and error reporting +- **Timeout Protection**: 15-minute timeout for advanced backend, 30-minute for speaker recognition +- **Fresh Testing**: Uses CACHED_MODE=False for clean test environments + +#### Debugging Integration Tests +For advanced debugging, you can still use the cached mode approach: + +1. **Edit tests/test_integration.py**: Set CACHED_MODE = True +2. **Run test manually**: `uv run pytest tests/test_integration.py -v -s --tb=short` +3. **Debug containers**: `docker logs advanced-backend-friend-backend-test-1 --tail=100` +4. **Test endpoints**: `curl -X GET http://localhost:8001/health` +5. **Clean up**: `docker compose -f docker-compose-test.yml down -v` + +### Mobile App Development +```bash +cd app + +# Start Expo development server +npm start + +# Platform-specific builds +npm run android +npm run ios +npm run web +``` + +### Additional Services +```bash +# ASR Services +cd extras/asr-services +docker compose up parakeet # Offline ASR with Parakeet + +# Speaker Recognition (with tests) +cd extras/speaker-recognition +docker compose up --build +./run-test.sh # Run speaker recognition integration tests + +# HAVPE Relay (ESP32 bridge) +cd extras/havpe-relay +docker compose up --build +``` + +## Architecture Overview + +### Core Structure +- **backends/advanced-backend/**: Primary FastAPI backend with real-time audio processing + - `src/main.py`: Central FastAPI application with WebSocket audio streaming + - `src/auth.py`: Email-based authentication with JWT tokens + - `src/memory/`: LLM-powered conversation memory system using mem0 + - `webui/`: React-based web dashboard for conversation and user management + +### Key Components +- **Audio Pipeline**: Real-time Opus/PCM → Application-level processing → Deepgram/Mistral transcription → memory extraction +- **Wyoming Protocol**: WebSocket communication uses Wyoming protocol (JSONL + binary) for structured audio sessions +- **Application-Level Processing**: Centralized processors for audio, transcription, memory, and cropping +- **Task Management**: BackgroundTaskManager tracks all async tasks to prevent orphaned processes +- **Unified Transcription**: Deepgram/Mistral transcription with fallback to offline ASR services +- **Memory System**: Pluggable providers (Friend-Lite native or OpenMemory MCP) +- **Authentication**: Email-based login with MongoDB ObjectId user system +- **Client Management**: Auto-generated client IDs as `{user_id_suffix}-{device_name}`, centralized ClientManager +- **Data Storage**: MongoDB (`audio_chunks` collection for conversations), vector storage (Qdrant or OpenMemory) +- **Web Interface**: React-based web dashboard with authentication and real-time monitoring + +### Service Dependencies +```yaml +Required: + - MongoDB: User data and conversations + - FastAPI Backend: Core audio processing + - LLM Service: Memory extraction and action items (OpenAI or Ollama) + +Recommended: + - Vector Storage: Qdrant (Friend-Lite provider) or OpenMemory MCP server + - Transcription: Deepgram, Mistral, or offline ASR services + +Optional: + - Parakeet ASR: Offline transcription service + - Speaker Recognition: Voice identification service + - Nginx Proxy: Load balancing and routing + - OpenMemory MCP: For cross-client memory compatibility +``` + +## Data Flow Architecture + +1. **Audio Ingestion**: OMI devices stream audio via WebSocket using Wyoming protocol with JWT auth +2. **Wyoming Protocol Session Management**: Clients send audio-start/audio-stop events for session boundaries +3. **Application-Level Processing**: Global queues and processors handle all audio/transcription/memory tasks +4. **Speech-Driven Conversation Creation**: User-facing conversations only created when speech is detected +5. **Dual Storage System**: Audio sessions always stored in `audio_chunks`, conversations created in `conversations` collection only with speech +6. **Versioned Processing**: Transcript and memory versions tracked with active version pointers +7. **Memory Processing**: Pluggable providers (Friend-Lite native with individual facts or OpenMemory MCP delegation) +8. **Memory Storage**: Direct Qdrant (Friend-Lite) or OpenMemory server (MCP provider) +9. **Action Items**: Automatic task detection with "Simon says" trigger phrases +10. **Audio Optimization**: Speech segment extraction removes silence automatically +11. **Task Tracking**: BackgroundTaskManager ensures proper cleanup of all async operations + +### Speech-Driven Architecture + +**Core Principle**: Conversations are only created when speech is detected, eliminating noise-only sessions from user interfaces. + +**Storage Architecture**: +- **`audio_chunks` Collection**: Always stores audio sessions by `audio_uuid` (raw audio capture) +- **`conversations` Collection**: Only created when speech is detected, identified by `conversation_id` +- **Speech Detection**: Analyzes transcript content, duration, and meaningfulness before conversation creation +- **Automatic Filtering**: No user-facing conversations for silence, noise, or brief audio without speech + +**Benefits**: +- Clean user experience with only meaningful conversations displayed +- Reduced noise in conversation lists and memory processing +- Efficient storage utilization for speech-only content +- Automatic quality filtering without manual intervention + +### Versioned Transcript and Memory System + +**Version Architecture**: +- **`transcript_versions`**: Array of transcript processing attempts with timestamps and providers +- **`memory_versions`**: Array of memory extraction attempts with different models/prompts +- **`active_transcript_version`**: Pointer to currently displayed transcript +- **`active_memory_version`**: Pointer to currently active memory extraction + +**Reprocessing Capabilities**: +- **Transcript Reprocessing**: Re-run speech-to-text with different providers or settings +- **Memory Reprocessing**: Re-extract memories using different LLM models or prompts +- **Version Management**: Switch between different processing results +- **Backward Compatibility**: Legacy fields auto-populated from active versions + +**Data Consistency**: +- All reprocessing operations use `conversation_id` (not `audio_uuid`) +- DateTime objects stored as ISO strings for MongoDB/JSON compatibility +- Legacy field support ensures existing integrations continue working + +### Database Schema Details + +**Collections Overview**: +- **`audio_chunks`**: All audio sessions by `audio_uuid` (always created) +- **`conversations`**: Speech-detected conversations by `conversation_id` (created conditionally) +- **`users`**: User accounts and authentication data + +**Speech-Driven Schema**: +```javascript +// audio_chunks collection (always created) +{ + "_id": ObjectId, + "audio_uuid": "uuid", // Primary identifier + "user_id": ObjectId, + "client_id": "user_suffix-device_name", + "audio_file_path": "/path/to/audio.wav", + "created_at": ISODate, + "transcript": "fallback transcript", // For non-speech audio + "segments": [...], // Speaker segments + "has_speech": boolean, // Speech detection result + "speech_analysis": {...}, // Detection metadata + "conversation_id": "conv_id" | null // Link to conversations collection +} + +// conversations collection (speech-detected only) +{ + "_id": ObjectId, + "conversation_id": "conv_uuid", // Primary identifier for user-facing operations + "audio_uuid": "audio_uuid", // Link to audio_chunks + "user_id": ObjectId, + "client_id": "user_suffix-device_name", + "created_at": ISODate, + + // Versioned Transcript System + "transcript_versions": [ + { + "version_id": "uuid", + "transcript": "text content", + "segments": [...], // Speaker diarization + "provider": "deepgram|mistral|parakeet", + "model": "nova-3|voxtral-mini-2507", + "created_at": ISODate, + "processing_time_seconds": 12.5, + "metadata": {...} + } + ], + "active_transcript_version": "uuid", // Points to current version + + // Versioned Memory System + "memory_versions": [ + { + "version_id": "uuid", + "memory_count": 5, + "transcript_version_id": "uuid", // Which transcript was used + "provider": "friend_lite|openmemory_mcp", + "model": "gpt-4o-mini|ollama-llama3", + "created_at": ISODate, + "processing_time_seconds": 45.2, + "metadata": {...} + } + ], + "active_memory_version": "uuid", // Points to current version + + // Legacy Fields (auto-populated from active versions) + "transcript": "text", // From active_transcript_version + "segments": [...], // From active_transcript_version + "memories": [...], // From active_memory_version + "memory_count": 5 // From active_memory_version +} +``` + +**Key Architecture Benefits**: +- **Clean Separation**: Raw audio storage vs user-facing conversations +- **Speech Filtering**: Only meaningful conversations appear in UI +- **Version History**: Complete audit trail of processing attempts +- **Backward Compatibility**: Legacy fields ensure existing code works +- **Reprocessing Support**: Easy to re-run with different providers/models +- **Service Decoupling**: Conversation creation independent of memory processing +- **Error Isolation**: Memory service failures don't affect conversation storage + +## Authentication & Security + +- **User System**: Email-based authentication with MongoDB ObjectId user IDs +- **Client Registration**: Automatic `{objectid_suffix}-{device_name}` format +- **Data Isolation**: All data scoped by user_id with efficient permission checking +- **API Security**: JWT tokens required for all endpoints and WebSocket connections +- **Admin Bootstrap**: Automatic admin account creation with ADMIN_EMAIL/ADMIN_PASSWORD + +## Configuration + +### Required Environment Variables +```bash +# Authentication +AUTH_SECRET_KEY=your-super-secret-jwt-key-here +ADMIN_PASSWORD=your-secure-admin-password +ADMIN_EMAIL=admin@example.com + +# LLM Configuration +LLM_PROVIDER=openai # or ollama +OPENAI_API_KEY=your-openai-key-here +OPENAI_BASE_URL=https://api.openai.com/v1 +OPENAI_MODEL=gpt-4o-mini + +# Speech-to-Text +DEEPGRAM_API_KEY=your-deepgram-key-here +# Optional: PARAKEET_ASR_URL=http://host.docker.internal:8767 +# Optional: TRANSCRIPTION_PROVIDER=deepgram + +# Memory Provider (New) +MEMORY_PROVIDER=friend_lite # or openmemory_mcp + +# Database +MONGODB_URI=mongodb://mongo:27017 +# Database name: friend-lite +QDRANT_BASE_URL=qdrant + +# Network Configuration +HOST_IP=localhost +BACKEND_PUBLIC_PORT=8000 +WEBUI_PORT=5173 +CORS_ORIGINS=http://localhost:3000,http://localhost:5173 +``` + +### Memory Provider Configuration + +Friend-Lite now supports two pluggable memory backends: + +#### Friend-Lite Memory Provider (Default) +```bash +# Use Friend-Lite memory provider (default) +MEMORY_PROVIDER=friend_lite + +# LLM Configuration for memory extraction +LLM_PROVIDER=openai +OPENAI_API_KEY=your-openai-key-here +OPENAI_MODEL=gpt-4o-mini + +# Vector Storage +QDRANT_BASE_URL=qdrant +``` + +#### OpenMemory MCP Provider +```bash +# Use OpenMemory MCP provider +MEMORY_PROVIDER=openmemory_mcp + +# OpenMemory MCP Server Configuration +OPENMEMORY_MCP_URL=http://host.docker.internal:8765 +OPENMEMORY_CLIENT_NAME=friend_lite +OPENMEMORY_USER_ID=openmemory +OPENMEMORY_TIMEOUT=30 + +# OpenAI key for OpenMemory server +OPENAI_API_KEY=your-openai-key-here +``` + +#### OpenMemory MCP Interface Patterns + +**Important**: OpenMemory MCP stores memories **per-app**, not globally. Understanding this architecture is critical for proper integration. + +**App-Based Storage Architecture:** +- All memories are stored under specific "apps" (namespaces) +- Generic endpoints (`/api/v1/memories/`) return empty results +- App-specific endpoints (`/api/v1/apps/{app_id}/memories`) contain the actual memories + +**Hardcoded Values and Configuration:** +```bash +# Default app name (configurable via OPENMEMORY_CLIENT_NAME) +Default: "friend_lite" + +# Hardcoded metadata (NOT configurable) +"source": "friend_lite" # Always hardcoded in Friend-Lite + +# User ID for OpenMemory MCP server +OPENMEMORY_USER_ID=openmemory # Configurable +``` + +**API Interface Pattern:** +```python +# 1. App Discovery - Find app by client_name +GET /api/v1/apps/ +# Response: {"apps": [{"id": "uuid", "name": "friend_lite", ...}]} + +# 2. Memory Creation - Uses generic endpoint but assigns to app +POST /api/v1/memories/ +{ + "user_id": "openmemory", + "text": "memory content", + "app": "friend_lite", # Uses OPENMEMORY_CLIENT_NAME + "metadata": { + "source": "friend_lite", # Hardcoded + "client": "friend_lite" # Uses OPENMEMORY_CLIENT_NAME + } +} + +# 3. Memory Retrieval - Must use app-specific endpoint +GET /api/v1/apps/{app_id}/memories?user_id=openmemory&page=1&size=10 + +# 4. Memory Search - Must use app-specific endpoint with search_query +GET /api/v1/apps/{app_id}/memories?user_id=openmemory&search_query=keyword&page=1&size=10 +``` + +**Friend-Lite Integration Flow:** +1. **App Discovery**: Query `/api/v1/apps/` to find app matching `OPENMEMORY_CLIENT_NAME` +2. **Fallback**: If client app not found, use first available app +3. **Operations**: All memory operations use the app-specific endpoints with discovered `app_id` + +**Testing OpenMemory MCP Integration:** +```bash +# Configure .env file with OpenMemory MCP settings +cp .env.template .env +# Edit .env to set MEMORY_PROVIDER=openmemory_mcp and configure OPENMEMORY_* variables + +# Start OpenMemory MCP server +cd extras/openmemory-mcp && docker compose up -d + +# Run integration tests (reads configuration from .env file) +cd backends/advanced && ./run-test.sh + +# Manual testing - Check app structure +curl -s "http://localhost:8765/api/v1/apps/" | jq + +# Test memory creation +curl -X POST "http://localhost:8765/api/v1/memories/" \ + -H "Content-Type: application/json" \ + -d '{"user_id": "openmemory", "text": "test memory", "app": "friend_lite"}' + +# Retrieve memories (replace app_id with actual ID from apps endpoint) +curl -s "http://localhost:8765/api/v1/apps/{app_id}/memories?user_id=openmemory" | jq +``` + +### Transcription Provider Configuration + +Friend-Lite supports multiple transcription services: + +```bash +# Option 1: Deepgram (High quality, recommended) +TRANSCRIPTION_PROVIDER=deepgram +DEEPGRAM_API_KEY=your-deepgram-key-here + +# Option 2: Mistral (Voxtral models) +TRANSCRIPTION_PROVIDER=mistral +MISTRAL_API_KEY=your-mistral-key-here +MISTRAL_MODEL=voxtral-mini-2507 + +# Option 3: Local ASR (Parakeet) +PARAKEET_ASR_URL=http://host.docker.internal:8767 +``` + +### Additional Service Configuration +```bash +# LLM Processing +OLLAMA_BASE_URL=http://ollama:11434 + +# Speaker Recognition +SPEAKER_SERVICE_URL=http://speaker-recognition:8085 +``` + +## Transcription Architecture + +### Provider System +Friend-Lite supports multiple transcription providers: + +**Online Providers (API-based):** +- **Deepgram**: High-quality transcription using Nova-3 model with real-time streaming +- **Mistral**: Voxtral models for transcription with REST API processing + +**Offline Providers (Local processing):** +- **Parakeet**: Local speech recognition service available in extras/asr-services + +**Provider Interface:** +The transcription system handles: +- Connection management and health checks +- Audio format handling (streaming vs batch) +- Error handling and reconnection +- Unified transcript format normalization + +## Wyoming Protocol Implementation + +### Overview +The system uses Wyoming protocol for WebSocket communication between mobile apps and backends. Wyoming is a peer-to-peer protocol for voice assistants that combines JSONL headers with binary audio payloads. + +### Protocol Format +``` +{JSON_HEADER}\n + +``` + +### Supported Events + +#### Audio Session Events +- **audio-start**: Signals the beginning of an audio recording session + ```json + {"type": "audio-start", "data": {"rate": 16000, "width": 2, "channels": 1}, "payload_length": null} + ``` + +- **audio-chunk**: Contains raw audio data with format metadata + ```json + {"type": "audio-chunk", "data": {"rate": 16000, "width": 2, "channels": 1}, "payload_length": 320} + <320 bytes of PCM/Opus audio data> + ``` + +- **audio-stop**: Signals the end of an audio recording session + ```json + {"type": "audio-stop", "data": {"timestamp": 1234567890}, "payload_length": null} + ``` + +### Backend Implementation + +#### Advanced Backend (`/ws_pcm`) +- **Full Wyoming Protocol Support**: Parses all Wyoming events for session management +- **Session Tracking**: Only processes audio chunks when session is active (after audio-start) +- **Conversation Boundaries**: Uses audio-start/stop events to define conversation segments +- **Backward Compatibility**: Fallback to raw binary audio for older clients + +#### Simple Backend (`/ws`) +- **Minimal Wyoming Support**: Parses audio-chunk events, ignores others +- **Opus Processing**: Handles Opus-encoded audio chunks from Wyoming protocol +- **Graceful Degradation**: Falls back to raw Opus packets for compatibility + +### Mobile App Integration + +Mobile apps should implement Wyoming protocol for proper session management: + +```javascript +// Start audio session +const audioStart = { + type: "audio-start", + data: { rate: 16000, width: 2, channels: 1 }, + payload_length: null +}; +websocket.send(JSON.stringify(audioStart) + '\n'); + +// Send audio chunks +const audioChunk = { + type: "audio-chunk", + data: { rate: 16000, width: 2, channels: 1 }, + payload_length: audioData.byteLength +}; +websocket.send(JSON.stringify(audioChunk) + '\n'); +websocket.send(audioData); + +// End audio session +const audioStop = { + type: "audio-stop", + data: { timestamp: Date.now() }, + payload_length: null +}; +websocket.send(JSON.stringify(audioStop) + '\n'); +``` + +### Benefits +- **Clear Session Boundaries**: No timeout-based conversation detection needed +- **Structured Communication**: Consistent protocol across all audio streaming +- **Future Extensibility**: Room for additional event types (pause, resume, metadata) +- **Backward Compatibility**: Works with existing raw audio streaming clients + +## Memory System Architecture + +### Overview +Friend-Lite supports two pluggable memory backends that can be selected via configuration: + +#### 1. Friend-Lite Memory Provider (`friend_lite`) +The sophisticated in-house memory implementation with full control and customization: + +**Features:** +- Custom LLM-powered memory extraction with enhanced prompts +- Individual fact storage (no JSON blobs) +- Smart deduplication algorithms +- Intelligent memory updates (ADD/UPDATE/DELETE decisions) +- **Semantic search** with relevance threshold filtering +- **Memory count API** with total count tracking from native Qdrant +- Direct Qdrant vector storage with accurate similarity scoring +- Custom memory prompts and processing +- No external dependencies + +**Architecture Flow:** +1. **Audio Input** → Transcription via Deepgram/Parakeet +2. **Memory Extraction** → LLM processes transcript using custom prompts +3. **Fact Parsing** → XML/JSON parsing into individual memory entries +4. **Deduplication** → Smart algorithms prevent duplicate memories +5. **Vector Storage** → Direct Qdrant storage with embeddings +6. **Memory Updates** → LLM-driven action proposals (ADD/UPDATE/DELETE) + +#### 2. OpenMemory MCP Provider (`openmemory_mcp`) +Thin client that delegates all memory processing to external OpenMemory MCP server: + +**Features:** +- Professional memory extraction (handled by OpenMemory) +- Battle-tested deduplication (handled by OpenMemory) +- Semantic vector search (handled by OpenMemory) +- ACL-based user isolation (handled by OpenMemory) +- Cross-client compatibility (Claude Desktop, Cursor, Windsurf) +- Web UI for memory management at http://localhost:8765 + +**Architecture Flow:** +1. **Audio Input** → Transcription via Deepgram/Parakeet +2. **MCP Delegation** → Send enriched transcript to OpenMemory MCP server +3. **External Processing** → OpenMemory handles extraction, deduplication, storage +4. **Result Mapping** → Convert MCP results to Friend-Lite MemoryEntry format +5. **Client Management** → Automatic user context switching via MCP client + +### Memory Provider Comparison + +| Feature | Friend-Lite | OpenMemory MCP | +|---------|-------------|----------------| +| **Processing** | Custom LLM extraction | Delegates to OpenMemory | +| **Deduplication** | Custom algorithms | OpenMemory handles | +| **Vector Storage** | Direct Qdrant | OpenMemory handles | +| **Search Features** | Semantic search with threshold filtering | Semantic search with relevance scoring | +| **Memory Count** | Native Qdrant count API | Varies by OpenMemory support | +| **Dependencies** | Qdrant + MongoDB | External OpenMemory server | +| **Customization** | Full control | Limited to OpenMemory features | +| **Cross-client** | Friend-Lite only | Works with Claude Desktop, Cursor, etc | +| **Web UI** | Friend-Lite WebUI with advanced search | OpenMemory UI + Friend-Lite WebUI | +| **Memory Format** | Individual facts | OpenMemory format | +| **Setup Complexity** | Medium | High (external server required) | + +### Switching Memory Providers + +You can switch providers by changing the `MEMORY_PROVIDER` environment variable: + +```bash +# Switch to OpenMemory MCP +echo "MEMORY_PROVIDER=openmemory_mcp" >> .env + +# Switch back to Friend-Lite +echo "MEMORY_PROVIDER=friend_lite" >> .env +``` + +**Note:** Existing memories are not automatically migrated between providers. Each provider maintains its own memory storage. + +### OpenMemory MCP Setup + +To use the OpenMemory MCP provider: + +```bash +# 1. Start external OpenMemory MCP server +cd extras/openmemory-mcp +docker compose up -d + +# 2. Configure Friend-Lite to use OpenMemory MCP +cd backends/advanced +echo "MEMORY_PROVIDER=openmemory_mcp" >> .env + +# 3. Start Friend-Lite backend +docker compose up --build -d +``` + +### When to Use Each Provider + +**Use Friend-Lite when:** +- You want full control over memory processing +- You need custom memory extraction logic +- You prefer fewer external dependencies +- You want to customize memory prompts and algorithms +- You need individual fact-based memory storage + +**Use OpenMemory MCP when:** +- You want professional, battle-tested memory processing +- You need cross-client compatibility (Claude Desktop, Cursor, etc.) +- You prefer to leverage external expertise rather than maintain custom logic +- You want access to OpenMemory's web interface +- You're already using OpenMemory in other tools + +## Versioned Processing System + +### Overview + +Friend-Lite implements a comprehensive versioning system for both transcript and memory processing, allowing multiple processing attempts with different providers, models, or settings while maintaining a clean user experience. + +### Version Data Structure + +**Transcript Versions**: +```json +{ + "transcript_versions": [ + { + "version_id": "uuid", + "transcript": "processed text", + "segments": [...], + "provider": "deepgram|mistral|parakeet", + "model": "nova-3|voxtral-mini-2507", + "created_at": "2025-01-15T10:30:00Z", + "processing_time_seconds": 12.5, + "metadata": { + "confidence_scores": [...], + "speaker_diarization": true + } + } + ], + "active_transcript_version": "uuid" +} +``` + +**Memory Versions**: +```json +{ + "memory_versions": [ + { + "version_id": "uuid", + "memory_count": 5, + "transcript_version_id": "uuid", + "provider": "friend_lite|openmemory_mcp", + "model": "gpt-4o-mini|ollama-llama3", + "created_at": "2025-01-15T10:32:00Z", + "processing_time_seconds": 45.2, + "metadata": { + "prompt_version": "v2.1", + "extraction_quality": "high" + } + } + ], + "active_memory_version": "uuid" +} +``` + +### Reprocessing Workflows + +**Transcript Reprocessing**: +1. Trigger via API: `POST /api/conversations/{conversation_id}/reprocess-transcript` +2. System creates new transcript version with different provider/model +3. New version added to `transcript_versions` array +4. User can activate any version via `activate-transcript` endpoint +5. Legacy `transcript` field automatically updated from active version + +**Memory Reprocessing**: +1. Trigger via API: `POST /api/conversations/{conversation_id}/reprocess-memory` +2. Specify which transcript version to use as input +3. System creates new memory version using specified transcript +4. New version added to `memory_versions` array +5. User can activate any version via `activate-memory` endpoint +6. Legacy `memories` field automatically updated from active version + +### Legacy Field Compatibility + +**Automatic Population**: +- `transcript`: Auto-populated from active transcript version +- `segments`: Auto-populated from active transcript version +- `memories`: Auto-populated from active memory version +- `memory_count`: Auto-populated from active memory version + +**Backward Compatibility**: +- Existing API clients continue working without modification +- WebUI displays active versions by default +- Advanced users can access version history and switch between versions + +## Development Notes + +### Package Management +- **Backend**: Uses `uv` for Python dependency management (faster than pip) +- **Mobile**: Uses `npm` with React Native and Expo +- **Docker**: Primary deployment method with docker-compose + +### Testing Strategy +- **Local Test Scripts**: Simplified scripts (`./run-test.sh`) mirror CI workflows for local development +- **End-to-End Integration**: `test_integration.py` validates complete audio processing pipeline +- **Speaker Recognition Tests**: `test_speaker_service_integration.py` validates speaker identification +- **Environment Flexibility**: Tests work with both local .env files and CI environment variables +- **Automated Cleanup**: Test containers are automatically removed after execution +- **CI/CD Integration**: GitHub Actions use the same local test scripts for consistency + +### Code Style +- **Python**: Black formatter with 100-character line length, isort for imports +- **TypeScript**: Standard React Native conventions +- **Import Guidelines**: + - NEVER import modules in the middle of functions or files + - ALL imports must be at the top of the file after the docstring + - Use lazy imports sparingly and only when absolutely necessary for circular import issues + - Group imports: standard library, third-party, local imports +- **Error Handling Guidelines**: + - **Always raise errors, never silently ignore**: Use explicit error handling with proper exceptions rather than silent failures + - **Understand data structures**: Research and understand input/response or class structure instead of adding defensive `hasattr()` checks + +### Docker Build Cache Management +- **Default Behavior**: Docker automatically detects file changes in Dockerfile COPY/ADD instructions and invalidates cache as needed +- **No --no-cache by Default**: Only use `--no-cache` when explicitly needed (e.g., package updates, dependency issues) +- **Smart Caching**: Docker checks file modification times and content hashes to determine when rebuilds are necessary +- **Development Efficiency**: Trust Docker's cache system - it handles most development scenarios correctly + +### Health Monitoring +The system includes comprehensive health checks: +- `/readiness`: Service dependency validation +- `/health`: Basic application status +- Memory debug system for transcript processing monitoring + +### Integration Test Infrastructure +- **Unified Test Scripts**: Local `./run-test.sh` scripts mirror GitHub Actions workflows +- **Test Environment**: `docker-compose-test.yml` provides isolated services on separate ports +- **Test Database**: Uses `test_db` database with isolated collections +- **Service Ports**: Backend (8001), MongoDB (27018), Qdrant (6335/6336), WebUI (5174) +- **Test Credentials**: Auto-generated `.env.test` files with secure test configurations +- **Ground Truth**: Expected transcript established via `scripts/test_deepgram_direct.py` +- **AI Validation**: OpenAI-powered transcript similarity comparison +- **Test Audio**: 4-minute glass blowing tutorial (`extras/test-audios/DIY*mono*.wav`) +- **CI Compatibility**: Same test logic runs locally and in GitHub Actions + +### Cursor Rule Integration +Project includes `.cursor/rules/always-plan-first.mdc` requiring understanding before coding. Always explain the task and confirm approach before implementation. + + +## API Reference + +### Health & Status Endpoints +- **GET /health**: Basic application health check +- **GET /readiness**: Service dependency validation (MongoDB, Qdrant, etc.) +- **GET /api/metrics**: System metrics and debug tracker status (Admin only) +- **GET /api/processor/status**: Processor queue status and health (Admin only) +- **GET /api/processor/tasks**: All active processing tasks (Admin only) +- **GET /api/processor/tasks/{client_id}**: Processing task status for specific client (Admin only) + +### WebSocket Endpoints +- **WS /ws_pcm**: Primary audio streaming endpoint (Wyoming protocol + raw PCM fallback) +- **WS /ws**: Simple audio streaming endpoint (Opus packets + Wyoming audio-chunk events) + +### Memory & Conversation Debugging +- **GET /api/admin/memories**: All memories across all users with debug stats (Admin only) +- **GET /api/memories/unfiltered**: User's memories without filtering +- **GET /api/memories/search**: Semantic memory search with relevance scoring +- **GET /api/conversations**: User's conversations with transcripts +- **GET /api/conversations/{conversation_id}**: Specific conversation details +- **POST /api/conversations/{conversation_id}/reprocess-transcript**: Re-run transcript processing +- **POST /api/conversations/{conversation_id}/reprocess-memory**: Re-extract memories with different parameters +- **GET /api/conversations/{conversation_id}/versions**: Get all transcript and memory versions +- **POST /api/conversations/{conversation_id}/activate-transcript**: Switch to a different transcript version +- **POST /api/conversations/{conversation_id}/activate-memory**: Switch to a different memory version + +### Client Management +- **GET /api/clients/active**: Currently active WebSocket clients +- **GET /api/users**: List all users (Admin only) + +### File Processing +- **POST /api/process-audio-files**: Upload and process audio files (Admin only) + - Note: Processes files sequentially, may timeout for large files + - Client timeout: 5 minutes, Server processing: up to 3x audio duration + 60s + - Example usage: + ```bash + # Step 1: Read .env file for ADMIN_EMAIL and ADMIN_PASSWORD + # Step 2: Get auth token + # Step 3: Use token in file upload + curl -X POST \ + -H "Authorization: Bearer YOUR_TOKEN_HERE" \ + -F "files=@/path/to/audio.wav" \ + -F "device_name=test-upload" \ + http://localhost:8000/api/process-audio-files + ``` + +### Authentication +- **POST /auth/jwt/login**: Email-based login (returns JWT token) +- **GET /users/me**: Get current authenticated user +- **GET /api/auth/config**: Authentication configuration + +### Step-by-Step API Testing Guide + +When testing API endpoints that require authentication, follow these steps: + +#### Step 1: Read credentials from .env file +```bash +# Use the Read tool to view the .env file and identify credentials +# Look for: +# ADMIN_EMAIL=admin@example.com +# ADMIN_PASSWORD=your-password-here +``` + +#### Step 2: Get authentication token +```bash +curl -s -X POST \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -d "username=admin@example.com&password=your-password-here" \ + http://localhost:8000/auth/jwt/login +``` +This returns: +```json +{"access_token":"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...","token_type":"bearer"} +``` + +#### Step 3: Use the token in API calls +```bash +# Extract the token from the response above and use it: +curl -s -H "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9..." \ + http://localhost:8000/api/conversations + +# For reprocessing endpoints: +curl -s -X POST \ + -H "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9..." \ + -H "Content-Type: application/json" \ + http://localhost:8000/api/conversations/{conversation_id}/reprocess-transcript +``` + +**Important**: Always read the .env file first using the Read tool rather than using shell commands like `grep` or `cut`. This ensures you see the exact values and can copy them accurately. + +#### Step 4: Testing Reprocessing Endpoints +Once you have the auth token, you can test the reprocessing functionality: + +```bash +# Get list of conversations to find a conversation_id +curl -s -H "Authorization: Bearer YOUR_TOKEN" \ + http://localhost:8000/api/conversations + +# Test transcript reprocessing (uses conversation_id) +curl -s -X POST \ + -H "Authorization: Bearer YOUR_TOKEN" \ + -H "Content-Type: application/json" \ + http://localhost:8000/api/conversations/YOUR_CONVERSATION_ID/reprocess-transcript + +# Test memory reprocessing (uses conversation_id and transcript_version_id) +curl -s -X POST \ + -H "Authorization: Bearer YOUR_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"transcript_version_id": "VERSION_ID"}' \ + http://localhost:8000/api/conversations/YOUR_CONVERSATION_ID/reprocess-memory + +# Get transcript and memory versions +curl -s -H "Authorization: Bearer YOUR_TOKEN" \ + http://localhost:8000/api/conversations/YOUR_CONVERSATION_ID/versions + +# Activate a specific transcript version +curl -s -X POST \ + -H "Authorization: Bearer YOUR_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"transcript_version_id": "VERSION_ID"}' \ + http://localhost:8000/api/conversations/YOUR_CONVERSATION_ID/activate-transcript + +# Activate a specific memory version +curl -s -X POST \ + -H "Authorization: Bearer YOUR_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"memory_version_id": "VERSION_ID"}' \ + http://localhost:8000/api/conversations/YOUR_CONVERSATION_ID/activate-memory +``` + +### Development Reset Endpoints +Useful endpoints for resetting state during development: + +#### Data Cleanup +- **DELETE /api/admin/memory/delete-all**: Delete all memories for the current user +- **DELETE /api/memories/{memory_id}**: Delete a specific memory +- **DELETE /api/conversations/{conversation_id}**: Delete a specific conversation (keeps original audio file in audio_chunks) +- **DELETE /api/chat/sessions/{session_id}**: Delete a chat session and all its messages +- **DELETE /api/users/{user_id}**: Delete a user (Admin only) + - Optional query params: `delete_conversations=true`, `delete_memories=true` + +#### Quick Reset Commands +```bash +# Reset all data (development only) +cd backends/advanced +sudo rm -rf data/ + +# Reset Docker volumes +docker compose down -v +docker compose up --build -d +``` + + +## Speaker Recognition Service Features + +### Speaker Analysis & Visualization +The speaker recognition service now includes advanced analysis capabilities: + +#### Embedding Analysis (/speakers/analysis endpoint) +- **2D/3D Visualization**: Interactive embedding plots using UMAP, t-SNE, or PCA +- **Clustering Analysis**: Automatic clustering using DBSCAN or K-means +- **Speaker Similarity Detection**: Identifies speakers with similar embeddings +- **Quality Metrics**: Embedding separation quality and confidence scores +- **Interactive Controls**: Adjustable analysis parameters and visualization options + +Access via: `extras/speaker-recognition/webui` → Speakers → Embedding Analysis tab + +#### Live Inference Feature (/infer-live page) +Real-time speaker identification and transcription: +- **WebRTC Audio Capture**: Live microphone access with waveform visualization +- **Deepgram Streaming**: Real-time transcription with speaker diarization +- **Live Speaker ID**: Identifies enrolled speakers in real-time using internal service +- **Session Statistics**: Live metrics for words, speakers, and confidence scores +- **Configurable Settings**: Adjustable confidence thresholds and audio parameters + +Access via: `extras/speaker-recognition/webui` → Live Inference + +### Technical Implementation + +#### Backend (Python) +- **Analysis Utils**: `src/simple_speaker_recognition/utils/analysis.py` + - UMAP/t-SNE dimensionality reduction + - DBSCAN/K-means clustering + - Cosine similarity analysis + - Quality metrics calculation +- **API Endpoint**: `/speakers/analysis` - Returns processed embedding analysis +- **Dependencies**: Added `umap-learn` for dimensionality reduction + +#### Frontend (React/TypeScript) +- **EmbeddingPlot Component**: Interactive Plotly.js visualizations +- **LiveAudioCapture Component**: WebRTC audio recording with waveform +- **DeepgramStreaming Service**: WebSocket integration for real-time transcription +- **InferLive Page**: Complete live inference interface + +### Usage Instructions + +#### Setting up Live Inference +1. Navigate to Live Inference page +2. Configure Deepgram API key in settings +3. Adjust speaker identification settings (confidence threshold) +4. Start live session to begin real-time transcription and speaker ID + +**Technical Details:** +- **Audio Processing**: Uses browser's native sample rate (typically 44.1kHz or 48kHz) +- **Buffer Retention**: 120 seconds of audio for improved utterance capture +- **Real-time Updates**: Live transcription with speaker identification results + +#### Using Speaker Analysis +1. Go to Speakers page → Embedding Analysis tab +2. Select analysis method (UMAP, t-SNE, PCA) +3. Choose clustering algorithm (DBSCAN, K-means) +4. Adjust similarity threshold for speaker detection +5. View interactive plots and quality metrics + +### Deployment Notes +- Requires Docker rebuild to pick up new Python dependencies +- Frontend dependencies (Plotly.js) already included +- Live inference requires Deepgram API key for streaming transcription +- Speaker identification uses existing enrolled speakers from database + +### Live Inference Troubleshooting +- **"NaN:NaN" timestamps**: Fixed in recent updates, ensure you're using latest version +- **Poor speaker identification**: Try adjusting confidence threshold or re-enrolling speakers +- **Audio processing delays**: Check browser console for sample rate detection logs +- **Buffer overflow issues**: Extended to 120-second retention for better performance +- **"extraction_failed" errors**: Usually indicates audio buffer timing issues - check console logs for buffer availability + +## Distributed Self-Hosting Architecture + +Friend-Lite supports distributed deployment across multiple machines, allowing you to separate GPU-intensive services from lightweight backend components. This is ideal for scenarios where you have a dedicated GPU machine and want to run the main backend on a VPS or Raspberry Pi. + +### Architecture Patterns + +#### Single Machine (Default) +All services run on one machine using Docker Compose - ideal for development and simple deployments. + +#### Distributed GPU Setup +**GPU Machine (High-performance):** +- LLM services (Ollama with GPU acceleration) +- ASR services (Parakeet with GPU) +- Speaker recognition service +- Deepgram fallback can remain on backend machine + +**Backend Machine (Lightweight - VPS/RPi):** +- Friend-Lite backend (FastAPI) +- React WebUI +- MongoDB +- Qdrant vector database + +### Networking with Tailscale + +Tailscale VPN provides secure, encrypted networking between distributed services: + +**Benefits:** +- **Zero configuration networking**: Services discover each other automatically +- **Encrypted communication**: All inter-service traffic is encrypted +- **Firewall friendly**: Works behind NATs and firewalls +- **Access control**: Granular permissions for service access +- **CORS support**: Built-in support for Tailscale IP ranges (100.x.x.x) + +**Installation:** +```bash +# On each machine +curl -fsSL https://tailscale.com/install.sh | sh +sudo tailscale up +``` + +### Distributed Service Configuration + +#### GPU Machine Services +```bash +# .env on GPU machine +OLLAMA_BASE_URL=http://0.0.0.0:11434 # Expose to Tailscale network +SPEAKER_SERVICE_URL=http://0.0.0.0:8085 + +# Enable GPU acceleration for Ollama +docker run -d --gpus=all -p 11434:11434 ollama/ollama:latest +``` + +#### Backend Machine Configuration +```bash +# .env on backend machine +OLLAMA_BASE_URL=http://100.x.x.x:11434 # GPU machine Tailscale IP +SPEAKER_SERVICE_URL=http://100.x.x.x:8085 # GPU machine Tailscale IP + +# Parakeet ASR services can also be distributed (if using offline ASR) +# PARAKEET_ASR_URL=http://100.x.x.x:8767 + +# CORS automatically supports Tailscale IPs (no configuration needed) +``` + +#### Service URL Examples + +**Common remote service configurations:** +```bash +# LLM Processing (GPU machine) +OLLAMA_BASE_URL=http://100.64.1.100:11434 +OPENAI_BASE_URL=http://100.64.1.100:8080 # For vLLM/OpenAI-compatible APIs + +# Speech Recognition (GPU machine) +# PARAKEET_ASR_URL=http://100.64.1.100:8767 # If using Parakeet ASR +SPEAKER_SERVICE_URL=http://100.64.1.100:8085 + +# Database services (can be on separate machine) +MONGODB_URI=mongodb://100.64.1.200:27017 # Database name: friend-lite +QDRANT_BASE_URL=http://100.64.1.200:6333 +``` + +### Deployment Steps + +#### 1. Set up Tailscale on all machines +```bash +# Install and connect each machine to your Tailscale network +curl -fsSL https://tailscale.com/install.sh | sh +sudo tailscale up +``` + +#### 2. Deploy GPU services +```bash +# On GPU machine - start GPU-accelerated services +cd extras/asr-services && docker compose up parakeet -d +cd extras/speaker-recognition && docker compose up --build -d + +# Start Ollama with GPU support +docker run -d --gpus=all -p 11434:11434 \ + -v ollama:/root/.ollama \ + ollama/ollama:latest +``` + +#### 3. Configure backend machine +```bash +# Update .env with Tailscale IPs of GPU machine +OLLAMA_BASE_URL=http://[gpu-machine-tailscale-ip]:11434 +SPEAKER_SERVICE_URL=http://[gpu-machine-tailscale-ip]:8085 + +# Start lightweight backend services +docker compose up --build -d +``` + +#### 4. Verify connectivity +```bash +# Test service connectivity from backend machine +curl http://[gpu-machine-ip]:11434/api/tags # Ollama +curl http://[gpu-machine-ip]:8085/health # Speaker recognition +``` + +### Performance Considerations + +**Network Latency:** +- Tailscale adds minimal latency (typically <5ms between nodes) +- LLM inference: Network time negligible compared to GPU processing +- ASR streaming: Use local fallback for latency-sensitive applications + +**Bandwidth Usage:** +- Audio streaming: ~128kbps for Opus, ~512kbps for PCM +- LLM requests: Typically <1MB per conversation +- Memory embeddings: ~3KB per memory vector + +**Processing Time Expectations:** +- Transcription (Deepgram): 2-5 seconds for 4-minute audio +- Transcription (Parakeet): 5-10 seconds for 4-minute audio +- Memory extraction (OpenAI GPT-4o-mini): 30-40 seconds for typical conversation +- Memory extraction (Ollama local): 45-90 seconds depending on model and GPU +- Full pipeline (4-min audio): 40-60 seconds with cloud services, 60-120 seconds with local models + +### Security Best Practices + +**Tailscale Access Control:** +```json +{ + "acls": [ + { + "action": "accept", + "src": ["tag:backend"], + "dst": ["tag:gpu:11434", "tag:gpu:8085", "tag:gpu:8767"] + } + ], + "tagOwners": { + "tag:backend": ["your-email@example.com"], + "tag:gpu": ["your-email@example.com"] + } +} +``` + +**Service Isolation:** +- Run GPU services in containers with limited network access +- Use Tailscale subnet routing for additional security +- Monitor service access logs for unauthorized requests + +### Troubleshooting Distributed Setup + +**Common Issues:** +- **CORS errors**: Tailscale IPs are automatically supported, but verify CORS_ORIGINS if using custom IPs +- **Service discovery**: Use `tailscale ip` to find machine IPs +- **Port conflicts**: Ensure services use different ports on shared machines +- **Authentication**: Services must be accessible without authentication for inter-service communication + +**Debugging Commands:** +```bash +# Check Tailscale connectivity +tailscale ping [machine-name] +tailscale status + +# Test service endpoints +curl http://[tailscale-ip]:11434/api/tags +curl http://[tailscale-ip]:8085/health + +# Check Docker networks +docker network ls +docker ps --format "table {{.Names}}\t{{.Ports}}" +``` + +## Notes for Claude +Check if the src/ is volume mounted. If not, do compose build so that code changes are reflected. Do not simply run `docker compose restart` as it will not rebuild the image. +Check backends/advanced/Docs for up to date information on advanced backend. +All docker projects have .dockerignore following the exclude pattern. That means files need to be included for them to be visible to docker. +The uv package manager is used for all python projects. Wherever you'd call `python3 main.py` you'd call `uv run python main.py` + +**Docker Build Guidelines:** +- Use `docker compose build` without `--no-cache` by default for faster builds +- Only use `--no-cache` when explicitly needed (e.g., if cached layers are causing issues or when troubleshooting build problems) +- Docker's build cache is efficient and saves significant time during development + +- Remember that whenever there's a python command, you should use uv run python3 instead \ No newline at end of file diff --git a/Docs/features.md b/Docs/features.md new file mode 100644 index 00000000..25c5671c --- /dev/null +++ b/Docs/features.md @@ -0,0 +1,282 @@ +# Friend-Lite Features & Architecture + +## Core Features + +Friend-Lite supports AI-powered personal systems through multiple OMI-compatible audio devices: + +**Memory System:** +- **Advanced memory system** with pluggable providers (Friend-Lite native or OpenMemory MCP) +- **Memory extraction** from conversations with individual fact storage +- **Semantic memory search** with relevance threshold filtering and live results +- **Memory count display** with total count tracking from native providers +- **Speaker-based memory filtering** to control processing based on participant presence + +**Audio Processing:** +- **Action item detection** and tracking +- **Multi-device support** for comprehensive audio capture +- **Cross-client compatibility** (optional with OpenMemory MCP) + +**Device Support:** +- OMI pendants and wearables +- Smart glasses with audio capture +- Any Bluetooth-enabled audio device + +## Architecture Overview + +![Architecture Diagram](../.assets/plan.png) + +DevKit2 streams audio via Bluetooth using OPUS codec. The processing pipeline includes: + +**Audio Processing:** +- Bluetooth audio capture from OMI devices +- OPUS codec streaming to backend services +- WebSocket-based real-time audio transport + +**Transcription Services:** +- Cloud-based: Deepgram API for high-quality transcription +- Self-hosted: Local ASR services (Parakeet, Moonshine) + +**AI Processing:** +- LLM-based conversation analysis (OpenAI or local Ollama) +- **Dual memory system**: Friend-Lite native or OpenMemory MCP integration +- Enhanced memory extraction with individual fact storage +- **Semantic search** with relevance scoring and threshold filtering +- Smart deduplication and memory updates (ADD/UPDATE/DELETE) +- Action item detection + +**Data Storage:** +- MongoDB: User data, conversations, and transcripts +- Qdrant: Vector storage for semantic memory search +- Audio files: Optional conversation recording + +## Repository Structure + +### 📱 Mobile App (`app/`) +- **React Native app** for connecting to OMI devices via Bluetooth +- Streams audio in OPUS format to selected backend +- Cross-platform (iOS/Android) support +- Uses React Native Bluetooth SDK + +### 🖥️ Backends (`backends/`) + +Choose one based on your needs: + +#### **Simple Backend** (`backends/simple-backend/`) +**Use case:** Getting started, basic audio processing, learning + +**Features:** +- ✅ Basic audio ingestion (OPUS → PCM → WAV chunks) +- ✅ File-based storage (30-second segments) +- ✅ Minimal dependencies +- ✅ Quick setup + +**Requirements:** +- Minimal resource usage +- No external services + +**Limitations:** +- No transcription +- No memory/conversation management +- No speaker recognition +- Manual file management + +--- + +#### **Advanced Backend** (`backends/advanced/`) **RECOMMENDED** +**Use case:** Production use, full feature set + +**Features:** +- Audio processing pipeline with real-time WebSocket support +- **Pluggable memory system**: Choose between Friend-Lite native or OpenMemory MCP +- Enhanced memory extraction with individual fact storage (no generic fallbacks) +- **Semantic memory search** with relevance threshold filtering and total count display +- **Speaker-based memory filtering**: Optional control over processing based on participant presence +- Smart memory updates with LLM-driven action proposals (ADD/UPDATE/DELETE) +- Speaker recognition and enrollment +- Action items extraction from conversations +- Audio cropping (removes silence, keeps speech) +- Conversation management with session timeouts +- Modern React web UI with live recording and advanced search +- Multiple ASR options (Deepgram API + offline ASR) +- MongoDB for structured data storage +- RESTful API for all operations +- **Cross-client compatibility** (with OpenMemory MCP provider) + +**Requirements:** +- Multiple services (MongoDB, Qdrant, Ollama) +- Higher resource usage +- Authentication configuration + +--- + +#### **OMI-Webhook-Compatible Backend** (`backends/omi-webhook-compatible/`) +**Use case:** Existing OMI users, migration from official OMI backend + +**Features:** +- ✅ Compatible with official OMI app webhook system +- ✅ Drop-in replacement for OMI backend +- ✅ Audio file storage +- ✅ ngrok integration for public endpoints + +**Requirements:** +- ngrok for public access + +**Limitations:** +- Limited features compared to advanced backend +- No built-in AI features + +--- + +#### **Example Satellite Backend** (`backends/example-satellite/`) +**Use case:** Distributed setups, external ASR integration + +**Features:** +- ✅ Audio streaming satellite +- ✅ Streams audio to remote ASR servers +- ✅ Bluetooth OMI device discovery +- ✅ Integration with external voice processing systems + +**Requirements:** +- Separate ASR server + +**Limitations:** +- Limited standalone functionality + +### 🔧 Additional Services (`extras/`) + +#### **ASR Services** (`extras/asr-services/`) +- **Self-hosted** ASR services +- **Moonshine** - Fast offline ASR +- **Parakeet** - Alternative offline ASR +- Self-hosted transcription options + +#### **Speaker Recognition Service** (`extras/speaker-recognition/`) +- Standalone speaker identification service +- Used by advanced backend +- REST API for speaker operations + +#### **HAVPE Relay** (`extras/havpe-relay/`) +- Audio relay service +- Protocol bridging capabilities + +## Audio Streaming Protocol + +Backends and ASR services use standardized audio streaming: +- Consistent audio streaming format +- Interoperable with external systems +- Modular ASR service architecture +- Easy to swap ASR providers + +## Deployment Scenarios + +### Single Machine (Recommended for beginners) +1. **Clone the repository** +2. **Run interactive setup**: `uv run --with-requirements setup-requirements.txt python init.py` +3. **Start all services**: `python services.py start --all --build` +4. **Access WebUI**: `http://localhost:5173` for the React web dashboard + +### Distributed Setup (Advanced users with multiple machines) +1. **GPU Machine**: Deploy LLM services (Ollama, ASR, Speaker Recognition) + ```bash + # Ollama with GPU + docker run -d --gpus=all -p 11434:11434 ollama/ollama:latest + + # ASR services + cd extras/asr-services && docker compose up moonshine -d + + # Speaker recognition + cd extras/speaker-recognition && docker compose up --build -d + ``` + +2. **Backend Machine**: Deploy lightweight services + ```bash + cd backends/advanced + + # Configure distributed services in .env + OLLAMA_BASE_URL=http://[gpu-machine-tailscale-ip]:11434 + SPEAKER_SERVICE_URL=http://[gpu-machine-tailscale-ip]:8001 + + docker compose up --build -d + ``` + +3. **Tailscale Networking**: Connect machines securely + ```bash + # On each machine + curl -fsSL https://tailscale.com/install.sh | sh + sudo tailscale up + ``` + +## Use Case Recommendations + +### For Beginners +1. Start with **Simple Backend** to understand the basics +2. Use **mobile app** to connect your OMI device +3. Examine saved audio chunks in `./audio_chunks/` + +### For Production Use +1. Use **Advanced Backend** for full features +2. Run the orchestrated setup: `uv run --with-requirements setup-requirements.txt python init.py` +3. Start all services: `python services.py start --all --build` +4. Access the Web UI at http://localhost:5173 for conversation management + +### For OMI Users +1. Use **OMI-Webhook-Compatible Backend** for easy migration +2. Configure ngrok for public webhook access +3. Point your OMI app to the webhook URL + +### For Home Assistant Users +1. Use **Example Satellite Backend** for audio streaming +2. Set up ASR services from `extras/asr-services/` +3. Configure external voice processing integration + +### For Distributed/Self-Hosting Users +1. Use **Advanced Backend** for full feature set +2. **Separate GPU services**: Run LLM/ASR on dedicated GPU machine +3. **Lightweight backend**: Deploy FastAPI/WebUI on VPS or Raspberry Pi +4. **Tailscale networking**: Secure VPN connection between services (automatic CORS support) +5. **Service examples**: Ollama on GPU machine, backend on lightweight server + +## Service Ports & Access + +### Advanced Backend (Primary) + +**HTTP Mode (Default):** +``` +Web Dashboard: http://localhost:5173 +Backend API: http://localhost:8000 +MongoDB: localhost:27017 +Qdrant: localhost:6333 (HTTP), 6334 (gRPC) +``` + +**HTTPS Mode:** +``` +Web Dashboard: https://localhost/ or https://your-ip/ +Backend API: https://localhost/api/ or https://your-ip/api/ +(Internal services same as HTTP mode) +``` + +### Speaker Recognition Service + +**HTTP Mode:** +``` +Web UI: http://localhost:5174 +API: http://localhost:8085 +``` + +**HTTPS Mode (nginx proxy):** +``` +Web UI: https://localhost:8444/ or https://your-ip:8444/ +API: https://localhost:8444/api/ +HTTP: http://localhost:8081/ (redirects to HTTPS) +``` + +### Additional Services + +``` +Parakeet ASR: http://localhost:8767 +OpenMemory MCP: http://localhost:8765 (API + WebUI) +``` + +**Note:** HTTPS mode requires SSL certificate setup. See individual service documentation for SSL configuration details. + +For detailed port configuration, see [ports-and-access.md](ports-and-access.md). \ No newline at end of file diff --git a/Docs/init-system.md b/Docs/init-system.md new file mode 100644 index 00000000..fbcbcbe9 --- /dev/null +++ b/Docs/init-system.md @@ -0,0 +1,217 @@ +# Friend-Lite Initialization System + +## Quick Links + +- **👉 [Start Here: Quick Start Guide](../quickstart.md)** - Main setup path for new users +- **📚 [Full Documentation](../CLAUDE.md)** - Comprehensive reference +- **🏗️ [Architecture Details](features.md)** - Technical deep dive + +--- + +## Overview + +Friend-Lite uses a unified initialization system with clean separation of concerns: + +- **Configuration** (`init.py`) - Set up service configurations, API keys, and .env files +- **Service Management** (`services.py`) - Start, stop, and manage running services + +The root orchestrator handles service selection and delegates configuration to individual service scripts. In general, setup scripts only configure and do not start services automatically. Exceptions: `extras/asr-services` and `extras/openmemory-mcp` are startup scripts. This prevents unnecessary resource usage and gives you control over when services actually run. + +> **New to Friend-Lite?** Most users should start with the [Quick Start Guide](../quickstart.md) instead of this detailed reference. + +## Architecture + +### Root Orchestrator +- **Location**: `/init.py` +- **Purpose**: Service selection and delegation only +- **Does NOT**: Handle service-specific configuration or duplicate setup logic + +### Service Scripts +- **Backend**: `backends/advanced/init.py` - Complete Python-based interactive setup +- **Speaker Recognition**: `extras/speaker-recognition/setup.sh` - Simple bash setup +- **ASR Services**: `extras/asr-services/setup.sh` - Service startup script +- **OpenMemory MCP**: `extras/openmemory-mcp/setup.sh` - External server startup + +## Usage + +### Orchestrated Setup (Recommended) +Set up multiple services together with automatic URL coordination: + +```bash +# From project root +uv run --with-requirements setup-requirements.txt python init.py +``` + +The orchestrator will: +1. Show service status and availability +2. Let you select which services to configure +3. Automatically pass service URLs between services +4. Display next steps for starting services + +### Individual Service Setup +Each service can be configured independently: + +```bash +# Advanced Backend only +cd backends/advanced +uv run --with-requirements setup-requirements.txt python init.py + +# Speaker Recognition only +cd extras/speaker-recognition +./setup.sh + +# ASR Services only +cd extras/asr-services +./setup.sh + +# OpenMemory MCP only +cd extras/openmemory-mcp +./setup.sh +``` + +## Service Details + +### Advanced Backend +- **Interactive setup** for authentication, LLM, transcription, and memory providers +- **Accepts arguments**: `--speaker-service-url`, `--parakeet-asr-url` +- **Generates**: Complete `.env` file with all required configuration +- **Default ports**: Backend (8000), WebUI (5173) + +### Speaker Recognition +- **Prompts for**: Hugging Face token, compute mode (cpu/gpu) +- **Service port**: 8085 +- **WebUI port**: 5173 +- **Requires**: HF_TOKEN for pyannote models + +### ASR Services +- **Starts**: Parakeet ASR service via Docker Compose +- **Service port**: 8767 +- **Purpose**: Offline speech-to-text processing +- **No configuration required** + +### OpenMemory MCP +- **Starts**: External OpenMemory MCP server +- **Service port**: 8765 +- **WebUI**: Available at http://localhost:8765 +- **Purpose**: Cross-client memory compatibility + +## Automatic URL Coordination + +When using the orchestrated setup, service URLs are automatically configured: + +| Service Selected | Backend Gets Configured With | +|----------------------|-------------------------------------------------------------------| +| Speaker Recognition | `SPEAKER_SERVICE_URL=http://host.docker.internal:8085` | +| ASR Services | `PARAKEET_ASR_URL=http://host.docker.internal:8767` | + +This eliminates the need to manually configure service URLs when running services on the same machine. +Note (Linux): If `host.docker.internal` is unavailable, add `extra_hosts: - "host.docker.internal:host-gateway"` to the relevant services in `docker-compose.yml`. + +## Key Benefits + +✅ **No Unnecessary Building** - Services are only started when you explicitly request them +✅ **Resource Efficient** - Parakeet ASR won't start if you're using cloud transcription +✅ **Clean Separation** - Configuration vs service management are separate concerns +✅ **Unified Control** - Single command to start/stop all services +✅ **Selective Starting** - Choose which services to run based on your current needs + +## Service URLs + +### Default Service Endpoints +- **Backend API**: http://localhost:8000 +- **Backend WebUI**: http://localhost:5173 +- **Speaker Recognition**: http://localhost:8085 +- **Speaker Recognition WebUI**: http://localhost:5173 +- **Parakeet ASR**: http://localhost:8767 +- **OpenMemory MCP**: http://localhost:8765 + +### Container-to-Container Communication +Services use `host.docker.internal` for inter-container communication: +- `http://host.docker.internal:8085` - Speaker Recognition +- `http://host.docker.internal:8767` - Parakeet ASR +- `http://host.docker.internal:8765` - OpenMemory MCP + +## Service Management + +Friend-Lite now separates **configuration** from **service lifecycle management**: + +### Unified Service Management +Use the `services.py` script for all service operations: + +```bash +# Start all configured services +uv run --with-requirements setup-requirements.txt python services.py start --all --build + +# Start specific services +uv run --with-requirements setup-requirements.txt python services.py start backend speaker-recognition + +# Check service status +uv run --with-requirements setup-requirements.txt python services.py status + +# Stop all services +uv run --with-requirements setup-requirements.txt python services.py stop --all + +# Stop specific services +uv run --with-requirements setup-requirements.txt python services.py stop asr-services openmemory-mcp +``` + +### Manual Service Management +You can also manage services individually: + +```bash +# Advanced Backend +cd backends/advanced && docker compose up --build -d + +# Speaker Recognition +cd extras/speaker-recognition && docker compose up --build -d + +# ASR Services (only if using offline transcription) +cd extras/asr-services && docker compose up --build -d + +# OpenMemory MCP (only if using openmemory_mcp provider) +cd extras/openmemory-mcp && docker compose up --build -d +``` + +## Configuration Files + +### Generated Files +- `backends/advanced/.env` - Backend configuration with all services +- `extras/speaker-recognition/.env` - Speaker service configuration +- All services backup existing `.env` files automatically + +### Required Dependencies +- **Root**: `setup-requirements.txt` (rich>=13.0.0) +- **Backend**: `setup-requirements.txt` (rich>=13.0.0, pyyaml>=6.0.0) +- **Extras**: No additional setup dependencies required + +## Troubleshooting + +### Common Issues +- **Port conflicts**: Check if services are already running on default ports +- **Permission errors**: Ensure scripts are executable (`chmod +x setup.sh`) +- **Missing dependencies**: Install uv and ensure setup-requirements.txt dependencies available +- **Service startup failures**: Check Docker is running and has sufficient resources + +### Service Health Checks +```bash +# Backend health +curl http://localhost:8000/health + +# Speaker Recognition health +curl http://localhost:8085/health + +# ASR service health +curl http://localhost:8767/health +``` + +### Logs and Debugging +```bash +# View service logs +docker compose logs [service-name] + +# Backend logs +cd backends/advanced && docker compose logs friend-backend + +# Speaker Recognition logs +cd extras/speaker-recognition && docker compose logs speaker-service +``` \ No newline at end of file diff --git a/Docs/ports-and-access.md b/Docs/ports-and-access.md new file mode 100644 index 00000000..f93137b7 --- /dev/null +++ b/Docs/ports-and-access.md @@ -0,0 +1,143 @@ +# Friend-Lite Port Configuration & User Journey + +## User Journey: Git Clone to Running Services + +### 1. Clone & Setup +```bash +git clone +cd friend-lite + +# Configure all services +uv run --with-requirements setup-requirements.txt python init.py + +# Start all configured services +uv run --with-requirements setup-requirements.txt python services.py start --all --build +``` + +### 2. Service Access Points + +## HTTP Mode (Default - No SSL Required) + +| Service | API Port | Web UI Port | Access URL | +|---------|----------|-------------|------------| +| **Advanced Backend** | 8000 | 5173 | http://localhost:8000 (API)
http://localhost:5173 (Dashboard) | +| **Speaker Recognition** | 8085 | 5175* | http://localhost:8085 (API)
http://localhost:5175 (WebUI) | +| **Parakeet ASR** | 8767 | - | http://localhost:8767 (API) | +| **OpenMemory MCP** | 8765 | 8765 | http://localhost:8765 (API + WebUI) | + +*Note: Speaker Recognition WebUI port is configurable via REACT_UI_PORT (default varies by mode) + +**🌐 Main Dashboard**: http://localhost:5173 +**🎤 Speaker Recognition**: http://localhost:5174 +**❌ No microphone access** - browsers require HTTPS for microphone + +--- + +## HTTPS Mode (For Microphone Access) + +| Service | HTTP Port | HTTPS Port | Access URL | Microphone Access | +|---------|-----------|------------|------------|-------------------| +| **Advanced Backend** | 80→443 | 443 | https://localhost/ (Main)
https://localhost/api/ (API) | ✅ Yes | +| **Speaker Recognition** | 8081→8444 | 8444 | https://localhost:8444/ (Main)
https://localhost:8444/api/ (API) | ✅ Yes | + +**IMPORTANT**: nginx services start automatically with the standard docker compose command + +**🌐 Main Dashboard**: https://localhost/ (Advanced Backend with SSL) +**🎤 Speaker Recognition**: https://localhost:8444/ (Speaker Recognition with SSL) +**✅ Full microphone access** - both services secured with SSL + +### Port Details (HTTPS Mode) +- **Advanced Backend nginx**: Ports 80 (HTTP redirect) + 443 (HTTPS) +- **Speaker Recognition nginx**: Ports 8081 (HTTP redirect) + 8444 (HTTPS) +- **No port conflicts** - different port ranges for each service + +--- + +## Why Two Modes? + +### HTTP Mode (Default) +✅ **Simple setup** - No SSL certificates needed +✅ **Development friendly** - Quick start for testing +❌ **No microphone access** - Browsers require HTTPS for microphone + +### HTTPS Mode (Advanced) +✅ **Microphone access** - Browsers allow mic access over HTTPS +✅ **Production ready** - Secure for real deployments +❌ **Complex setup** - Requires SSL certificate generation + +--- + +## Configuration Files + +### Speaker Recognition Modes + +The speaker recognition service supports both modes via configuration: + +**HTTP Mode (.env)**: +```bash +REACT_UI_PORT=5174 # Direct HTTP access +REACT_UI_HTTPS=false +``` + +**HTTPS Mode (.env)**: +```bash +REACT_UI_PORT=5175 # Internal HTTPS port (proxied through nginx) +REACT_UI_HTTPS=true +# nginx provides external access on ports 8081 (HTTP redirect) and 8444 (HTTPS) +# Start with: docker compose up -d +``` + +--- + +## Service Management Commands + +```bash +# Check what's running +uv run --with-requirements setup-requirements.txt python services.py status + +# Start all services +uv run --with-requirements setup-requirements.txt python services.py start --all --build + +# Start only specific services +uv run --with-requirements setup-requirements.txt python services.py start backend speaker-recognition + +# Stop all services +uv run --with-requirements setup-requirements.txt python services.py stop --all +``` + +--- + +## Microphone Access Requirements + +For **speaker recognition** and **live audio features** to work: + +1. **Local development**: Use HTTP mode, access via `http://localhost:5174` + - Some browsers allow localhost microphone access over HTTP + +2. **Production/Remote access**: Use HTTPS mode, access via `https://localhost:8444` + - All browsers require HTTPS for microphone access over network + +3. **Mixed setup**: Keep backend on HTTP, only enable HTTPS for speaker recognition when needed + +--- + +## Port Conflict Resolution + +If you encounter port conflicts: + +1. **Check running services**: `uv run --with-requirements setup-requirements.txt python services.py status` +2. **Stop conflicting services**: `uv run --with-requirements setup-requirements.txt python services.py stop --all` +3. **Change ports in .env files** if needed +4. **Restart services**: `uv run --with-requirements setup-requirements.txt python services.py start --all` + +--- + +## Summary: Default User Experience + +After `git clone` and running init + services: + +🌐 **Main Application**: http://localhost:5173 +🎤 **Speaker Recognition**: http://localhost:5174 (HTTP) or https://localhost:8444 (HTTPS) +🔧 **Backend API**: http://localhost:8000 +📝 **ASR Service**: http://localhost:8767 +🧠 **Memory Service**: http://localhost:8765 \ No newline at end of file diff --git a/Docs/redis-streams-transcription.md b/Docs/redis-streams-transcription.md new file mode 100644 index 00000000..6fc2d918 --- /dev/null +++ b/Docs/redis-streams-transcription.md @@ -0,0 +1,698 @@ +# Redis Streams Multi-Provider Audio Transcription Implementation Guide + +## Overview + +This guide implements a Redis Streams-based architecture for real-time audio transcription using multiple providers (Deepgram, Whisper, Parakeet, etc.). The system provides intelligent routing, automatic fallbacks, and optimized performance for different use cases. + +## Architecture Benefits + +- **Real-time Processing**: Time-ordered audio chunks with millisecond precision +- **Multi-Provider Support**: Intelligent routing between Deepgram, Whisper, AssemblyAI, etc. +- **Automatic Failover**: Zero data loss with provider fallback chains +- **Scalability**: Consumer groups enable horizontal scaling +- **Persistence**: Audio chunks aren't lost if consumers disconnect +- **Load Balancing**: Distribute load based on provider strengths and capacity + +## Core Architecture + +``` +Audio Input → Redis Streams Router → Provider-Specific Streams → Consumer Groups → Results Aggregation + ↓ + [deepgram_stream] → [deepgram_workers] + [whisper_stream] → [whisper_workers] + [parakeet_stream] → [parakeet_workers] +``` + +## Implementation + +### 1. Core Dependencies + +```python +# requirements.txt +redis>=4.5.0 +faster-whisper>=0.10.0 +deepgram-sdk>=3.0.0 +assemblyai>=0.17.0 +pydantic>=2.0.0 +asyncio +uuid +``` + +### 2. Provider Configuration + +```python +from dataclasses import dataclass +from enum import Enum +from typing import List, Optional +import redis +import time +import uuid +import json + +class TranscriptionProvider(Enum): + DEEPGRAM = "deepgram" + WHISPER = "whisper" + PARAKEET = "parakeet" + ASSEMBLYAI = "assemblyai" + +@dataclass +class AudioChunk: + data: bytes + session_id: str + chunk_id: str + timestamp: float + preferred_provider: Optional[TranscriptionProvider] = None + fallback_providers: Optional[List[TranscriptionProvider]] = None + real_time_required: bool = False + accuracy_critical: bool = False + cost_sensitive: bool = False + +@dataclass +class TranscriptionResult: + text: str + confidence: float + provider: str + processing_time: float + chunk_id: str + session_id: str +``` + +### 3. Redis Streams Producer + +```python +class AudioStreamProducer: + def __init__(self, redis_host='localhost', redis_port=6379): + self.redis_client = redis.Redis(host=redis_host, port=redis_port, decode_responses=True) + + def add_audio_chunk(self, chunk: AudioChunk): + """Add audio chunk to appropriate provider stream""" + provider = self.select_optimal_provider(chunk) + + stream_name = f"audio:stream:{provider.value}" + + # Prepare chunk data for Redis + chunk_data = { + "chunk_data": chunk.data.hex(), # Convert bytes to hex string + "session_id": chunk.session_id, + "chunk_id": chunk.chunk_id, + "timestamp": chunk.timestamp, + "real_time_required": str(chunk.real_time_required), + "accuracy_critical": str(chunk.accuracy_critical), + "cost_sensitive": str(chunk.cost_sensitive), + "fallback_providers": ",".join([p.value for p in chunk.fallback_providers or []]) + } + + # Add to stream + message_id = self.redis_client.xadd(stream_name, chunk_data) + + print(f"Added chunk {chunk.chunk_id} to {provider.value} stream: {message_id}") + return message_id + + def select_optimal_provider(self, chunk: AudioChunk) -> TranscriptionProvider: + """Intelligent provider selection""" + + # Explicit preference + if chunk.preferred_provider: + return chunk.preferred_provider + + # Real-time requirements → Deepgram (lowest latency) + if chunk.real_time_required: + return TranscriptionProvider.DEEPGRAM + + # High accuracy requirements → Whisper (highest accuracy) + if chunk.accuracy_critical: + return TranscriptionProvider.WHISPER + + # Cost optimization → Whisper (self-hosted) + if chunk.cost_sensitive: + return TranscriptionProvider.WHISPER + + # Load balancing - use least busy provider + return self.get_least_loaded_provider() + + def get_least_loaded_provider(self) -> TranscriptionProvider: + """Select provider with shortest queue""" + queue_lengths = {} + + for provider in TranscriptionProvider: + stream_name = f"audio:stream:{provider.value}" + try: + length = self.redis_client.xlen(stream_name) + queue_lengths[provider] = length + except: + queue_lengths[provider] = 0 + + return min(queue_lengths, key=queue_lengths.get) +``` + +### 4. Provider-Specific Consumers + +```python +import os +import threading +from abc import ABC, abstractmethod + +class BaseTranscriptionConsumer(ABC): + def __init__(self, provider_name: str, redis_host='localhost', redis_port=6379): + self.provider_name = provider_name + self.redis_client = redis.Redis(host=redis_host, port=redis_port, decode_responses=True) + self.stream_name = f"audio:stream:{provider_name}" + self.group_name = f"{provider_name}_workers" + self.consumer_name = f"{provider_name}-worker-{os.getpid()}" + + # Create consumer group + try: + self.redis_client.xgroup_create( + self.stream_name, + self.group_name, + "0", + mkstream=True + ) + except redis.exceptions.ResponseError: + pass # Group already exists + + @abstractmethod + def transcribe_audio(self, audio_data: bytes) -> TranscriptionResult: + """Implement provider-specific transcription""" + pass + + def start_consuming(self): + """Start consuming messages from the stream""" + print(f"Starting {self.provider_name} consumer: {self.consumer_name}") + + while True: + try: + messages = self.redis_client.xreadgroup( + self.group_name, + self.consumer_name, + {self.stream_name: ">"}, + count=1, + block=1000 + ) + + for stream, msgs in messages: + for message_id, fields in msgs: + self.process_message(message_id, fields) + + except Exception as e: + print(f"Error in {self.provider_name} consumer: {e}") + time.sleep(1) + + def process_message(self, message_id: str, fields: dict): + """Process a single message""" + try: + start_time = time.time() + + # Convert hex string back to bytes + audio_data = bytes.fromhex(fields['chunk_data']) + + # Transcribe + result = self.transcribe_audio(audio_data) + result.chunk_id = fields['chunk_id'] + result.session_id = fields['session_id'] + result.provider = self.provider_name + result.processing_time = time.time() - start_time + + # Store result + self.store_result(result) + + # Acknowledge message + self.redis_client.xack(self.group_name, self.stream_name, message_id) + + print(f"Processed {fields['chunk_id']} with {self.provider_name}") + + except Exception as e: + print(f"Failed to process {fields['chunk_id']} with {self.provider_name}: {e}") + self.handle_failure(fields, str(e)) + + def store_result(self, result: TranscriptionResult): + """Store transcription result""" + result_data = { + "text": result.text, + "confidence": result.confidence, + "provider": result.provider, + "processing_time": result.processing_time, + "chunk_id": result.chunk_id, + "session_id": result.session_id, + "timestamp": time.time() + } + + # Store in results stream + self.redis_client.xadd( + f"transcription:results:{result.session_id}", + result_data + ) + + # Also store in global results stream for monitoring + self.redis_client.xadd("transcription:results:all", result_data) + + def handle_failure(self, fields: dict, error: str): + """Handle transcription failure with fallback""" + fallback_providers = fields.get('fallback_providers', '').split(',') + fallback_providers = [p for p in fallback_providers if p and p != self.provider_name] + + if not fallback_providers: + fallback_providers = self.get_default_fallback_chain() + + if fallback_providers: + next_provider = fallback_providers[0] + + # Route to fallback provider + fallback_data = { + **fields, + "original_provider": self.provider_name, + "retry_count": str(int(fields.get("retry_count", "0")) + 1), + "fallback_providers": ",".join(fallback_providers[1:]), + "error_history": f"{fields.get('error_history', '')};{self.provider_name}:{error}" + } + + self.redis_client.xadd(f"audio:stream:{next_provider}", fallback_data) + + # Log failure + self.redis_client.xadd( + "transcription:failures", + { + "failed_provider": self.provider_name, + "chunk_id": fields['chunk_id'], + "error": error, + "timestamp": time.time() + } + ) + + def get_default_fallback_chain(self): + """Default fallback chain for this provider""" + fallback_chains = { + "deepgram": ["whisper", "assemblyai"], + "whisper": ["deepgram", "assemblyai"], + "parakeet": ["whisper", "deepgram"], + "assemblyai": ["deepgram", "whisper"] + } + return fallback_chains.get(self.provider_name, ["whisper"]) + +class DeepgramConsumer(BaseTranscriptionConsumer): + def __init__(self, api_key: str, **kwargs): + super().__init__("deepgram", **kwargs) + from deepgram import Deepgram + self.deepgram = Deepgram(api_key) + + def transcribe_audio(self, audio_data: bytes) -> TranscriptionResult: + """Transcribe using Deepgram API""" + try: + # Deepgram API call + response = self.deepgram.transcription.sync_prerecorded( + {"buffer": audio_data, "mimetype": "audio/wav"}, + {"punctuate": True, "model": "nova-2"} + ) + + transcript = response["results"]["channels"][0]["alternatives"][0]["transcript"] + confidence = response["results"]["channels"][0]["alternatives"][0]["confidence"] + + return TranscriptionResult( + text=transcript, + confidence=confidence, + provider="deepgram", + processing_time=0, # Will be set in process_message + chunk_id="", + session_id="" + ) + + except Exception as e: + raise Exception(f"Deepgram transcription failed: {e}") + +class WhisperConsumer(BaseTranscriptionConsumer): + def __init__(self, model_size="large-v3", **kwargs): + super().__init__("whisper", **kwargs) + from faster_whisper import WhisperModel + self.model = WhisperModel(model_size, device="cuda", compute_type="float16") + + def transcribe_audio(self, audio_data: bytes) -> TranscriptionResult: + """Transcribe using Whisper""" + try: + # Save audio data to temporary file + import tempfile + import os + + with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file: + tmp_file.write(audio_data) + tmp_path = tmp_file.name + + try: + segments, info = self.model.transcribe(tmp_path, beam_size=5) + + transcript = "" + total_confidence = 0 + segment_count = 0 + + for segment in segments: + transcript += segment.text + " " + if hasattr(segment, 'avg_logprob'): + total_confidence += segment.avg_logprob + segment_count += 1 + + confidence = total_confidence / segment_count if segment_count > 0 else 0 + + return TranscriptionResult( + text=transcript.strip(), + confidence=confidence, + provider="whisper", + processing_time=0, + chunk_id="", + session_id="" + ) + + finally: + os.unlink(tmp_path) + + except Exception as e: + raise Exception(f"Whisper transcription failed: {e}") +``` + +### 5. Results Aggregation + +```python +class TranscriptionResultsConsumer: + def __init__(self, redis_host='localhost', redis_port=6379): + self.redis_client = redis.Redis(host=redis_host, port=redis_port, decode_responses=True) + + def get_session_results(self, session_id: str, wait_timeout: int = 30): + """Get all transcription results for a session""" + stream_name = f"transcription:results:{session_id}" + + try: + # Read all messages from the beginning + messages = self.redis_client.xrange(stream_name) + + results = [] + for message_id, fields in messages: + results.append({ + "message_id": message_id, + "text": fields['text'], + "confidence": float(fields['confidence']), + "provider": fields['provider'], + "processing_time": float(fields['processing_time']), + "chunk_id": fields['chunk_id'], + "timestamp": float(fields['timestamp']) + }) + + # Sort by chunk_id or timestamp + results.sort(key=lambda x: x['timestamp']) + return results + + except Exception as e: + print(f"Error getting results for session {session_id}: {e}") + return [] + + def get_realtime_results(self, session_id: str, last_id: str = "0"): + """Get new results since last_id for real-time streaming""" + stream_name = f"transcription:results:{session_id}" + + try: + messages = self.redis_client.xread({stream_name: last_id}, count=10, block=1000) + + results = [] + new_last_id = last_id + + for stream, msgs in messages: + for message_id, fields in msgs: + results.append({ + "message_id": message_id, + "text": fields['text'], + "confidence": float(fields['confidence']), + "provider": fields['provider'], + "chunk_id": fields['chunk_id'] + }) + new_last_id = message_id + + return results, new_last_id + + except Exception as e: + print(f"Error getting realtime results: {e}") + return [], last_id +``` + +### 6. Multi-Provider Orchestration + +```python +class MultiProviderOrchestrator: + def __init__(self, redis_host='localhost', redis_port=6379): + self.producer = AudioStreamProducer(redis_host, redis_port) + self.results_consumer = TranscriptionResultsConsumer(redis_host, redis_port) + self.redis_client = redis.Redis(host=redis_host, port=redis_port, decode_responses=True) + + def transcribe_with_consensus(self, audio_data: bytes, session_id: str, providers: List[TranscriptionProvider] = None): + """Send same audio to multiple providers for consensus""" + if not providers: + providers = [TranscriptionProvider.DEEPGRAM, TranscriptionProvider.WHISPER] + + correlation_id = str(uuid.uuid4()) + chunk_id = f"consensus_{correlation_id}" + + # Send to multiple providers + for provider in providers: + chunk = AudioChunk( + data=audio_data, + session_id=f"{session_id}_consensus_{correlation_id}", + chunk_id=chunk_id, + timestamp=time.time(), + preferred_provider=provider + ) + self.producer.add_audio_chunk(chunk) + + return correlation_id + + def get_consensus_result(self, correlation_id: str, timeout: int = 30): + """Wait for and aggregate consensus results""" + session_id = f"*_consensus_{correlation_id}" + + # Wait for results from all providers + start_time = time.time() + results = [] + + while len(results) < 2 and (time.time() - start_time) < timeout: + # Check for new results + all_results = self.redis_client.keys(f"transcription:results:*_consensus_{correlation_id}") + + for result_stream in all_results: + session_results = self.results_consumer.get_session_results(result_stream.split(':')[-1]) + results.extend(session_results) + + if len(results) < 2: + time.sleep(0.5) + + if len(results) >= 2: + return self.select_best_consensus_result(results) + + return results[0] if results else None + + def select_best_consensus_result(self, results: List[dict]) -> dict: + """Select best result from consensus""" + # Simple strategy: highest confidence + return max(results, key=lambda x: x['confidence']) +``` + +### 7. Production Setup + +```python +import threading +import signal +import sys + +class TranscriptionService: + def __init__(self, config: dict): + self.config = config + self.consumers = [] + self.producer = AudioStreamProducer( + config['redis']['host'], + config['redis']['port'] + ) + + def start_all_consumers(self): + """Start all provider consumers""" + + # Deepgram Consumer + if 'deepgram' in self.config['providers']: + deepgram_consumer = DeepgramConsumer( + api_key=self.config['providers']['deepgram']['api_key'], + redis_host=self.config['redis']['host'], + redis_port=self.config['redis']['port'] + ) + consumer_thread = threading.Thread(target=deepgram_consumer.start_consuming) + consumer_thread.daemon = True + consumer_thread.start() + self.consumers.append(consumer_thread) + + # Whisper Consumer + if 'whisper' in self.config['providers']: + whisper_consumer = WhisperConsumer( + model_size=self.config['providers']['whisper']['model_size'], + redis_host=self.config['redis']['host'], + redis_port=self.config['redis']['port'] + ) + consumer_thread = threading.Thread(target=whisper_consumer.start_consuming) + consumer_thread.daemon = True + consumer_thread.start() + self.consumers.append(consumer_thread) + + print(f"Started {len(self.consumers)} consumer threads") + + def shutdown(self): + """Graceful shutdown""" + print("Shutting down transcription service...") + # Consumers will stop when main thread exits (daemon threads) + +# Example configuration +config = { + "redis": { + "host": "localhost", + "port": 6379 + }, + "providers": { + "deepgram": { + "api_key": "your_deepgram_api_key" + }, + "whisper": { + "model_size": "large-v3" + } + } +} + +# Production startup +if __name__ == "__main__": + service = TranscriptionService(config) + + # Handle graceful shutdown + def signal_handler(sig, frame): + service.shutdown() + sys.exit(0) + + signal.signal(signal.SIGINT, signal_handler) + signal.signal(signal.SIGTERM, signal_handler) + + # Start all consumers + service.start_all_consumers() + + print("Transcription service running. Press Ctrl+C to stop.") + + # Keep main thread alive + try: + while True: + time.sleep(1) + except KeyboardInterrupt: + service.shutdown() +``` + +### 8. Usage Examples + +```python +# Example: Basic transcription +def basic_example(): + producer = AudioStreamProducer() + + # Load audio file + with open("audio.wav", "rb") as f: + audio_data = f.read() + + # Create chunk + chunk = AudioChunk( + data=audio_data, + session_id="session_123", + chunk_id="chunk_001", + timestamp=time.time(), + real_time_required=True # Will route to Deepgram + ) + + # Send for transcription + message_id = producer.add_audio_chunk(chunk) + print(f"Sent audio chunk: {message_id}") + +# Example: Real-time streaming +def realtime_streaming_example(): + producer = AudioStreamProducer() + results_consumer = TranscriptionResultsConsumer() + + session_id = "realtime_session_456" + last_id = "0" + + # Simulate real-time audio chunks + for i in range(10): + # In real implementation, this would come from microphone + audio_chunk = simulate_audio_chunk(i) + + chunk = AudioChunk( + data=audio_chunk, + session_id=session_id, + chunk_id=f"chunk_{i:03d}", + timestamp=time.time(), + real_time_required=True + ) + + producer.add_audio_chunk(chunk) + + # Check for results + results, last_id = results_consumer.get_realtime_results(session_id, last_id) + for result in results: + print(f"Real-time result: {result['text']}") + + time.sleep(2) # 2-second chunks + +def simulate_audio_chunk(chunk_num): + # Placeholder - replace with actual audio data + return b"fake_audio_data_" + str(chunk_num).encode() + +if __name__ == "__main__": + # Run examples + basic_example() + realtime_streaming_example() +``` + +## Monitoring and Troubleshooting + +### Key Redis Commands for Monitoring + +```bash +# Check stream lengths +XLEN audio:stream:deepgram +XLEN audio:stream:whisper + +# Check consumer group info +XINFO GROUPS audio:stream:deepgram + +# Check pending messages +XPENDING audio:stream:deepgram deepgram_workers + +# Monitor failures +XRANGE transcription:failures - + + +# View recent results +XREVRANGE transcription:results:all + - COUNT 10 +``` + +### Performance Tuning + +1. **Redis Configuration**: + - Set `maxmemory-policy allkeys-lru` + - Use `XTRIM` to prevent streams from growing unbounded + - Enable persistence with AOF for reliability + +2. **Consumer Scaling**: + - Run multiple consumer processes per provider + - Use different Redis consumer names for each process + - Monitor queue lengths and scale accordingly + +3. **Provider Selection**: + - Deepgram: Best for real-time, low latency + - Whisper: Best for accuracy, supports many languages + - AssemblyAI: Good balance of speed and accuracy + +## Deployment Checklist + +- [ ] Redis server configured and running +- [ ] Provider API keys configured +- [ ] Consumer processes started for each provider +- [ ] Monitoring dashboards set up +- [ ] Fallback chains tested +- [ ] Stream retention policies configured +- [ ] Error handling and logging in place +- [ ] Load testing completed + +This architecture provides a robust, scalable foundation for multi-provider audio transcription with Redis Streams handling all the orchestration! \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..4f470f94 --- /dev/null +++ b/Makefile @@ -0,0 +1,356 @@ +# ======================================== +# Friend-Lite Management System +# ======================================== +# Central management interface for Friend-Lite project +# Handles configuration, deployment, and maintenance tasks + +# Load environment variables from .env file +ifneq (,$(wildcard ./.env)) + include .env + export $(shell sed 's/=.*//' .env | grep -v '^\s*$$' | grep -v '^\s*\#') +endif + +# Load configuration definitions +include config.env +# Export all variables from config.env +export $(shell sed 's/=.*//' config.env | grep -v '^\s*$$' | grep -v '^\s*\#') + +# Script directories +SCRIPTS_DIR := scripts +K8S_SCRIPTS_DIR := $(SCRIPTS_DIR)/k8s + +.PHONY: help menu setup-k8s setup-infrastructure setup-rbac setup-storage-pvc config config-docker config-k8s config-all clean deploy deploy-docker deploy-k8s deploy-k8s-full deploy-infrastructure deploy-apps check-infrastructure check-apps build-backend up-backend down-backend k8s-status k8s-cleanup k8s-purge audio-manage test-robot test-robot-integration test-robot-unit test-robot-endpoints test-robot-specific test-robot-clean + +# Default target +.DEFAULT_GOAL := menu + +menu: ## Show interactive menu (default) + @echo "🎯 Friend-Lite Management System" + @echo "================================" + @echo + @echo "📋 Quick Actions:" + @echo " setup-k8s 🏗️ Complete Kubernetes setup (registry + infrastructure + RBAC)" + @echo " config 📝 Generate all configuration files" + @echo " deploy 🚀 Deploy using configured mode ($(DEPLOYMENT_MODE))" + @echo " k8s-status 📊 Check Kubernetes cluster status" + @echo " k8s-cleanup 🧹 Clean up Kubernetes resources" + @echo " audio-manage 🎵 Manage audio files" + @echo + @echo "🧪 Testing:" + @echo " test-robot 🧪 Run all Robot Framework tests" + @echo " test-robot-integration 🔬 Run integration tests only" + @echo " test-robot-endpoints 🌐 Run endpoint tests only" + @echo + @echo "📝 Configuration:" + @echo " config-docker 🐳 Generate Docker Compose .env files" + @echo " config-k8s ☸️ Generate Kubernetes files (Skaffold env + ConfigMap/Secret)" + @echo + @echo "🚀 Deployment:" + @echo " deploy-docker 🐳 Deploy with Docker Compose" + @echo " deploy-k8s ☸️ Deploy to Kubernetes with Skaffold" + @echo " deploy-k8s-full 🏗️ Deploy infrastructure + applications" + @echo + @echo "🔧 Utilities:" + @echo " k8s-purge 🗑️ Purge unused images (registry + container)" + @echo " check-infrastructure 🔍 Check infrastructure services" + @echo " check-apps 🔍 Check application services" + @echo " clean 🧹 Clean up generated files" + @echo + @echo "Current configuration:" + @echo " DOMAIN: $(DOMAIN)" + @echo " DEPLOYMENT_MODE: $(DEPLOYMENT_MODE)" + @echo " CONTAINER_REGISTRY: $(CONTAINER_REGISTRY)" + @echo " SPEAKER_NODE: $(SPEAKER_NODE)" + @echo " INFRASTRUCTURE_NAMESPACE: $(INFRASTRUCTURE_NAMESPACE)" + @echo " APPLICATION_NAMESPACE: $(APPLICATION_NAMESPACE)" + @echo + @echo "💡 Tip: Run 'make help' for detailed help on any target" + +help: ## Show detailed help for all targets + @echo "🎯 Friend-Lite Management System - Detailed Help" + @echo "================================================" + @echo + @echo "🏗️ KUBERNETES SETUP:" + @echo " setup-k8s Complete initial Kubernetes setup" + @echo " - Configures insecure registry access" + @echo " - Sets up infrastructure services (MongoDB, Qdrant)" + @echo " - Creates shared models PVC" + @echo " - Sets up cross-namespace RBAC" + @echo " - Generates and applies configuration" + @echo " setup-infrastructure Deploy infrastructure services (MongoDB, Qdrant)" + @echo " setup-rbac Set up cross-namespace RBAC" + @echo " setup-storage-pvc Create shared models PVC" + @echo + @echo "📝 CONFIGURATION:" + @echo " config Generate all configuration files (Docker + K8s)" + @echo " config-docker Generate Docker Compose .env files" + @echo " config-k8s Generate Kubernetes files (Skaffold env + ConfigMap/Secret)" + @echo + @echo "🚀 DEPLOYMENT:" + @echo " deploy Deploy using configured deployment mode" + @echo " deploy-docker Deploy with Docker Compose" + @echo " deploy-k8s Deploy to Kubernetes with Skaffold" + @echo " deploy-k8s-full Deploy infrastructure + applications" + @echo + @echo "🔧 KUBERNETES UTILITIES:" + @echo " k8s-status Check Kubernetes cluster status and health" + @echo " k8s-cleanup Clean up Kubernetes resources and storage" + @echo " k8s-purge Purge unused images (registry + container)" + @echo + @echo "🎵 AUDIO MANAGEMENT:" + @echo " audio-manage Interactive audio file management" + @echo + @echo "🧪 ROBOT FRAMEWORK TESTING:" + @echo " test-robot Run all Robot Framework tests" + @echo " test-robot-integration Run integration tests only" + @echo " test-robot-endpoints Run endpoint tests only" + @echo " test-robot-specific FILE=path Run specific test file" + @echo " test-robot-clean Clean up test results" + @echo + @echo "🔍 MONITORING:" + @echo " check-infrastructure Check if infrastructure services are running" + @echo " check-apps Check if application services are running" + @echo + @echo "🧹 CLEANUP:" + @echo " clean Clean up generated configuration files" + +# ======================================== +# KUBERNETES SETUP +# ======================================== + +setup-k8s: ## Initial Kubernetes setup (registry + infrastructure) + @echo "🏗️ Starting Kubernetes initial setup..." + @echo "This will set up the complete infrastructure for Friend-Lite" + @echo + @echo "📋 Setup includes:" + @echo " • Insecure registry configuration" + @echo " • Infrastructure services (MongoDB, Qdrant)" + @echo " • Shared models PVC for speaker recognition" + @echo " • Cross-namespace RBAC" + @echo " • Configuration generation and application" + @echo + @read -p "Enter your Kubernetes node IP address: " node_ip; \ + if [ -z "$$node_ip" ]; then \ + echo "❌ Node IP is required"; \ + exit 1; \ + fi; \ + echo "🔧 Step 1: Configuring insecure registry access on $$node_ip..."; \ + $(SCRIPTS_DIR)/configure-insecure-registry-remote.sh $$node_ip; \ + echo "📦 Step 2: Setting up storage for speaker recognition..."; \ + $(K8S_SCRIPTS_DIR)/setup-storage.sh; \ + echo "📝 Step 3: Generating configuration files..."; \ + $(MAKE) config-k8s; \ + echo "🏗️ Step 4: Setting up infrastructure services..."; \ + $(MAKE) setup-infrastructure; \ + echo "🔐 Step 5: Setting up cross-namespace RBAC..."; \ + $(MAKE) setup-rbac; \ + echo "💾 Step 6: Creating shared models PVC..."; \ + $(MAKE) setup-storage-pvc; \ + echo "✅ Kubernetes initial setup completed!" + @echo + @echo "🎯 Next steps:" + @echo " • Run 'make deploy' to deploy applications" + @echo " • Run 'make k8s-status' to check cluster status" + @echo " • Run 'make help' for more options" + +setup-infrastructure: ## Set up infrastructure services (MongoDB, Qdrant) + @echo "🏗️ Setting up infrastructure services..." + @echo "Deploying MongoDB and Qdrant to $(INFRASTRUCTURE_NAMESPACE) namespace..." + @set -a; source skaffold.env; set +a; skaffold run --profile=infrastructure --default-repo=$(CONTAINER_REGISTRY) + @echo "⏳ Waiting for infrastructure services to be ready..." + @kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=mongodb -n $(INFRASTRUCTURE_NAMESPACE) --timeout=300s || echo "⚠️ MongoDB not ready yet" + @kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=qdrant -n $(INFRASTRUCTURE_NAMESPACE) --timeout=300s || echo "⚠️ Qdrant not ready yet" + @echo "✅ Infrastructure services deployed" + +setup-rbac: ## Set up cross-namespace RBAC + @echo "🔐 Setting up cross-namespace RBAC..." + @kubectl apply -f k8s-manifests/cross-namespace-rbac.yaml + @echo "✅ Cross-namespace RBAC configured" + +setup-storage-pvc: ## Set up shared models PVC + @echo "💾 Setting up shared models PVC..." + @kubectl apply -f k8s-manifests/shared-models-pvc.yaml + @echo "⏳ Waiting for PVC to be bound..." + @kubectl wait --for=condition=bound pvc/shared-models-cache -n speech --timeout=60s || echo "⚠️ PVC not bound yet" + @echo "✅ Shared models PVC created" + +# ======================================== +# CONFIGURATION +# ======================================== + +config: config-all ## Generate all configuration files + +config-docker: ## Generate Docker Compose configuration files + @echo "🐳 Generating Docker Compose configuration files..." + @CONFIG_FILE=config.env.dev python3 scripts/generate-docker-configs.py + @echo "✅ Docker Compose configuration files generated" + +config-k8s: ## Generate Kubernetes configuration files (ConfigMap/Secret only - no .env files) + @echo "☸️ Generating Kubernetes configuration files..." + @python3 scripts/generate-k8s-configs.py + @echo "📦 Applying ConfigMap and Secret to Kubernetes..." + @kubectl apply -f k8s-manifests/configmap.yaml -n $(APPLICATION_NAMESPACE) 2>/dev/null || echo "⚠️ ConfigMap not applied (cluster not available?)" + @kubectl apply -f k8s-manifests/secrets.yaml -n $(APPLICATION_NAMESPACE) 2>/dev/null || echo "⚠️ Secret not applied (cluster not available?)" + @echo "📦 Copying ConfigMap and Secret to speech namespace..." + @kubectl get configmap friend-lite-config -n $(APPLICATION_NAMESPACE) -o yaml | \ + sed -e '/namespace:/d' -e '/resourceVersion:/d' -e '/uid:/d' -e '/creationTimestamp:/d' | \ + kubectl apply -n speech -f - 2>/dev/null || echo "⚠️ ConfigMap not copied to speech namespace" + @kubectl get secret friend-lite-secrets -n $(APPLICATION_NAMESPACE) -o yaml | \ + sed -e '/namespace:/d' -e '/resourceVersion:/d' -e '/uid:/d' -e '/creationTimestamp:/d' | \ + kubectl apply -n speech -f - 2>/dev/null || echo "⚠️ Secret not copied to speech namespace" + @echo "✅ Kubernetes configuration files generated" + +config-all: config-docker config-k8s ## Generate all configuration files + @echo "✅ All configuration files generated" + +clean: ## Clean up generated configuration files + @echo "🧹 Cleaning up generated configuration files..." + @rm -f backends/advanced/.env + @rm -f extras/speaker-recognition/.env + @rm -f extras/openmemory-mcp/.env + @rm -f extras/asr-services/.env + @rm -f extras/havpe-relay/.env + @rm -f backends/simple/.env + @rm -f backends/other-backends/omi-webhook-compatible/.env + @rm -f skaffold.env + @rm -f backends/charts/advanced-backend/templates/env-configmap.yaml + @echo "✅ Generated files cleaned" + +# ======================================== +# DEPLOYMENT TARGETS +# ======================================== + +deploy: ## Deploy using configured deployment mode + @echo "🚀 Deploying using $(DEPLOYMENT_MODE) mode..." +ifeq ($(DEPLOYMENT_MODE),docker-compose) + @$(MAKE) deploy-docker +else ifeq ($(DEPLOYMENT_MODE),kubernetes) + @$(MAKE) deploy-k8s +else + @echo "❌ Unknown deployment mode: $(DEPLOYMENT_MODE)" + @exit 1 +endif + +deploy-docker: config-docker ## Deploy using Docker Compose + @echo "🐳 Deploying with Docker Compose..." + @cd backends/advanced && docker-compose up -d + @echo "✅ Docker Compose deployment completed" + +deploy-k8s: config-k8s ## Deploy to Kubernetes using Skaffold + @echo "☸️ Deploying to Kubernetes with Skaffold..." + @set -a; source skaffold.env; set +a; skaffold run --profile=advanced-backend --default-repo=$(CONTAINER_REGISTRY) + @echo "✅ Kubernetes deployment completed" + +deploy-k8s-full: deploy-infrastructure deploy-apps ## Deploy infrastructure + applications to Kubernetes + @echo "✅ Full Kubernetes deployment completed" + +deploy-infrastructure: ## Deploy infrastructure services to Kubernetes + @echo "🏗️ Deploying infrastructure services..." + @kubectl apply -f k8s-manifests/ + @echo "✅ Infrastructure deployment completed" + +deploy-apps: config-k8s ## Deploy application services to Kubernetes + @echo "📱 Deploying application services..." + @set -a; source skaffold.env; set +a; skaffold run --profile=advanced-backend --default-repo=$(CONTAINER_REGISTRY) + @echo "✅ Application deployment completed" + +# ======================================== +# UTILITY TARGETS +# ======================================== + +check-infrastructure: ## Check if infrastructure services are running + @echo "🔍 Checking infrastructure services..." + @kubectl get pods -n $(INFRASTRUCTURE_NAMESPACE) || echo "❌ Infrastructure namespace not found" + @kubectl get services -n $(INFRASTRUCTURE_NAMESPACE) || echo "❌ Infrastructure services not found" + +check-apps: ## Check if application services are running + @echo "🔍 Checking application services..." + @kubectl get pods -n $(APPLICATION_NAMESPACE) || echo "❌ Application namespace not found" + @kubectl get services -n $(APPLICATION_NAMESPACE) || echo "❌ Application services not found" + +# ======================================== +# DEVELOPMENT TARGETS +# ======================================== + +build-backend: ## Build backend Docker image + @echo "🔨 Building backend Docker image..." + @cd backends/advanced && docker build -t advanced-backend:latest . + +up-backend: config-docker ## Start backend services + @echo "🚀 Starting backend services..." + @cd backends/advanced && docker-compose up -d + +down-backend: ## Stop backend services + @echo "🛑 Stopping backend services..." + @cd backends/advanced && docker-compose down + +# ======================================== +# KUBERNETES UTILITIES +# ======================================== + +k8s-status: ## Check Kubernetes cluster status and health + @echo "📊 Checking Kubernetes cluster status..." + @$(K8S_SCRIPTS_DIR)/cluster-status.sh + +k8s-cleanup: ## Clean up Kubernetes resources and storage + @echo "🧹 Starting Kubernetes cleanup..." + @echo "This will help clean up registry storage and unused resources" + @$(K8S_SCRIPTS_DIR)/cleanup-registry-storage.sh + +k8s-purge: ## Purge unused images (registry + container) + @echo "🗑️ Purging unused images..." + @$(K8S_SCRIPTS_DIR)/purge-images.sh + +# ======================================== +# AUDIO MANAGEMENT +# ======================================== + +audio-manage: ## Interactive audio file management + @echo "🎵 Starting audio file management..." + @$(SCRIPTS_DIR)/manage-audio-files.sh + +# ======================================== +# TESTING TARGETS +# ======================================== + +# Define test environment variables +TEST_ENV := BACKEND_URL=http://localhost:8001 ADMIN_EMAIL=test-admin@example.com ADMIN_PASSWORD=test-admin-password-123 + +test-robot: ## Run all Robot Framework tests + @echo "🧪 Running all Robot Framework tests..." + @cd tests && $(TEST_ENV) robot --outputdir ../results . + @echo "✅ All Robot Framework tests completed" + @echo "📊 Results available in: results/" + +test-robot-integration: ## Run integration tests only + @echo "🧪 Running Robot Framework integration tests..." + @cd tests && $(TEST_ENV) robot --outputdir ../results integration/ + @echo "✅ Robot Framework integration tests completed" + @echo "📊 Results available in: results/" + +test-robot-unit: ## Run unit tests only + @echo "🧪 Running Robot Framework unit tests..." + @cd tests && $(TEST_ENV) robot --outputdir ../results unit/ || echo "⚠️ No unit tests directory found" + @echo "✅ Robot Framework unit tests completed" + @echo "📊 Results available in: results/" + +test-robot-endpoints: ## Run endpoint tests only + @echo "🧪 Running Robot Framework endpoint tests..." + @cd tests && $(TEST_ENV) robot --outputdir ../results endpoints/ + @echo "✅ Robot Framework endpoint tests completed" + @echo "📊 Results available in: results/" + +test-robot-specific: ## Run specific Robot Framework test file (usage: make test-robot-specific FILE=path/to/test.robot) + @echo "🧪 Running specific Robot Framework test: $(FILE)" + @if [ -z "$(FILE)" ]; then \ + echo "❌ FILE parameter is required. Usage: make test-robot-specific FILE=path/to/test.robot"; \ + exit 1; \ + fi + @cd tests && $(TEST_ENV) robot --outputdir ../results $(FILE) + @echo "✅ Robot Framework test completed: $(FILE)" + @echo "📊 Results available in: results/" + +test-robot-clean: ## Clean up Robot Framework test results + @echo "🧹 Cleaning up Robot Framework test results..." + @rm -rf results/ + @echo "✅ Test results cleaned" diff --git a/README-K8S.md b/README-K8S.md new file mode 100644 index 00000000..161836af --- /dev/null +++ b/README-K8S.md @@ -0,0 +1,821 @@ +# Friend-Lite Kubernetes Setup Guide + +This guide walks you through setting up Friend-Lite from scratch on a fresh Ubuntu system, including MicroK8s installation, Docker registry configuration, and deployment via Skaffold. + +## System Architecture + +- **Build Machine**: Your development machine with Docker (for building images) +- **Kubernetes Node (k8s_control_plane)**: Ubuntu server running MicroK8s cluster +- **Docker Registry**: Runs on the Kubernetes node for image storage + +## Table of Contents + +1. [System Requirements](#system-requirements) +2. [Ubuntu Installation](#ubuntu-installation) +3. [MicroK8s Installation](#microk8s-installation) +4. [MicroK8s Registry Setup](#microk8s-registry-setup) +5. [Repository Setup](#repository-setup) +6. [Environment Configuration](#environment-configuration) +7. [Deployment](#deployment) +8. [Verification](#verification) +9. [Troubleshooting](#troubleshooting) + +## System Requirements + +### Build Machine (Your Development Machine) +- **OS**: macOS, Linux, or Windows with WSL2 +- **Docker**: Docker Desktop or Docker Engine +- **Tools**: Git, curl/wget + +### Kubernetes Node (k8s_control_plane) +- **Hardware**: Minimum 8GB RAM, 4 CPU cores, 50GB storage +- **Network**: Static IP configuration (recommended: 192.168.1.42) +- **OS**: Ubuntu 22.04 LTS or later +- **Architecture**: x86_64 (AMD64) + +## Ubuntu Installation + +**Run on: Kubernetes Node (k8s_control_plane)** + +1. **Download Ubuntu Server 22.04 LTS** + ```bash + # Download from https://ubuntu.com/download/server + # Or use wget: + wget https://releases.ubuntu.com/22.04/ubuntu-22.04.3-live-server-amd64.iso + ``` + +2. **Install Ubuntu Server** + - Boot from USB/DVD + - Choose "Install Ubuntu Server" + - Configure network with static IP (recommended: 192.168.1.42) + - Set hostname (e.g., `k8s_control_plane`) + - Create user account + - Install OpenSSH server + +3. **Post-Installation Setup** + ```bash + # Update system + sudo apt update && sudo apt upgrade -y + + # Install essential packages + sudo apt install -y curl wget git vim htop tree + + # Configure firewall + sudo ufw allow ssh + sudo ufw allow 6443 # Kubernetes API + sudo ufw allow 32000 # Docker registry + sudo ufw enable + ``` + +## MicroK8s Installation + +**Run on: Kubernetes Node (k8s_control_plane)** + +1. **Install MicroK8s** + ```bash + # Install MicroK8s + sudo snap install microk8s --classic + + # Add user to microk8s group + sudo usermod -a -G microk8s $USER + sudo chown -f -R $USER ~/.kube + + # Log out and back in, or run: + newgrp microk8s + ``` + +2. **Configure as Control Plane** + ```bash + # Start MicroK8s + sudo microk8s start + + # Wait for all services to be ready + sudo microk8s status --wait-ready + + # Generate join token for worker nodes + sudo microk8s add-node + # This will output a command like: + # sudo microk8s join 192.168.1.42:25000/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + # Save this command for worker node setup + ``` + +3. **Start MicroK8s** + ```bash + # Start MicroK8s + sudo microk8s start + + # Wait for all services to be ready + sudo microk8s status --wait-ready + ``` + +4. **Enable Required Add-ons** + ```bash + # Enable essential add-ons + sudo microk8s enable dns + sudo microk8s enable ingress + sudo microk8s enable storage + sudo microk8s enable metrics-server + + # Wait for add-ons to be ready + sudo microk8s status --wait-ready + ``` + +5. **Configure kubectl** + ```bash + # Create kubectl alias + echo 'alias kubectl="microk8s kubectl"' >> ~/.bashrc + source ~/.bashrc + + # Verify installation + kubectl get nodes + kubectl get pods -A + ``` + +## Worker Node Installation + +**Run on: Each Worker Node** + +1. **Install Ubuntu Server (Same as Control Plane)** + ```bash + # Follow the same Ubuntu installation steps as the control plane + # Use different hostname (e.g., k8s_worker_01, k8s_worker_02) + # Configure static IP (e.g., 192.168.1.43, 192.168.1.44) + # Ensure network connectivity to control plane (192.168.1.42) + ``` + +2. **Install MicroK8s** + ```bash + # Install MicroK8s + sudo snap install microk8s --classic + + # Add user to microk8s group + sudo usermod -a -G microk8s $USER + sudo chown -f -R $USER ~/.kube + + # Log out and back in, or run: + newgrp microk8s + ``` + +3. **Join the Cluster** + ```bash + # Use the join command from the control plane + # Replace with your actual join token + sudo microk8s join 192.168.1.42:25000/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + + # Wait for node to join + sudo microk8s status --wait-ready + ``` + +4. **Verify Node Status** + ```bash + # On the control plane, verify the worker node joined + kubectl get nodes + + # The worker node should show as Ready + # Example output: + # NAME STATUS ROLES AGE VERSION + # k8s_control_plane Ready 1h v1.28.0 + # k8s_worker_01 Ready 5m v1.28.0 + ``` + +5. **Configure Worker Node for Registry Access** + ```bash + # From your build machine, configure the worker node + ./configure-insecure-registry-remote.sh 192.168.1.43 + + # Repeat for each worker node with their respective IPs + ``` + +## MicroK8s Registry Setup + +**Run on: Kubernetes Node (k8s_control_plane)** + +1. **Enable Built-in Registry** + ```bash + # Enable the built-in MicroK8s registry (not enabled by default) + sudo microk8s enable registry + + # Wait for registry to be ready + sudo microk8s status --wait-ready + + # Verify registry is running + kubectl get pods -n container-registry + ``` + +2. **Configure Registry Access** + ```bash + # The registry runs on port 32000 by default + # Verify it's accessible + curl http://localhost:32000/v2/ + ``` + +3. **Configure Remote Access (from Build Machine)** + ```bash + # From your build machine, configure MicroK8s to trust the insecure registry + chmod +x scripts/configure-insecure-registry-remote.sh + + # Run the configuration script with your node IP address + # Usage: ./scripts/configure-insecure-registry-remote.sh [ssh_user] + ./scripts/configure-insecure-registry-remote.sh 192.168.1.42 + + # Or with custom SSH user: + # ./scripts/configure-insecure-registry-remote.sh 192.168.1.42 myuser + ``` + +## Storage Configuration + +**Run on: Kubernetes Node (k8s_control_plane)** + +1. **Install OpenEBS Hostpath Provisioner** + + the default hostpath provisioner seems to have a bug in it that makes it not work. If you want to use the default + one, then you will need to update the charts to use that for the PVC. + + ```bash + # Apply the hostpath provisioner + kubectl apply -f k8s-manifests/hostpath-provisioner-official.yaml + + # Verify storage class + kubectl get storageclass + ``` + +## Repository Setup + +**Run on: Build Machine (Your Development Machine)** + +### **Directory Structure** +``` +friend-lite/ +├── scripts/ # Kubernetes deployment and management scripts +│ ├── deploy-all-services.sh # Deploy all services +│ ├── cluster-status.sh # Check cluster health +│ ├── setup-nvidia-operator.sh # Setup GPU support +│ ├── test-gpu-pod.sh # Test GPU access +│ ├── cleanup-all.sh # Remove all deployments +│ ├── configure-insecure-registry-remote.sh # Configure registry access +│ └── generate-configmap.sh # Generate ConfigMap templates +├── k8s-manifests/ # Standalone Kubernetes manifests +│ └── hostpath-provisioner-official.yaml # Storage provisioner +├── skaffold.env.template # Configuration template +├── skaffold.yaml # Skaffold deployment configuration +├── init.sh # Docker Compose setup +└── deploy-speaker-recognition.sh # Standalone speaker recognition +``` + +### **Repository Setup** + +1. **Clone Repository** + ```bash + # Clone Friend-Lite repository + git clone https://github.com/yourusername/friend-lite.git + cd friend-lite + + # Verify template files are present + ls -la skaffold.env.template + ls -la backends/advanced/.env.template + ``` + +2. **Install Required Tools** + + **kubectl** (required for Skaffold and Helm): + - Visit: https://kubernetes.io/docs/tasks/tools/ + - Follow the official installation guide for your platform + + **Skaffold**: + - Visit: https://skaffold.dev/docs/install/ + - Follow the official installation guide + + **Helm**: + - Visit: https://helm.sh/docs/intro/install/ + - Follow the official installation guide + + **Verify installations:** + ```bash + kubectl version --client + skaffold version + helm version + ``` + +## Environment Configuration + +**Important**: Never commit your actual `.env` or `skaffold.env` files to version control. Only the `.template` files should be committed. + +**Run on: Build Machine (Your Development Machine)** + +1. **Create Environment File** + ```bash + # Copy template (if it exists) + # cp backends/advanced/.env.template backends/advanced/.env + + # Note: Most environment variables are automatically set by Skaffold during deployment + # including MONGODB_URI, QDRANT_BASE_URL, and other Kubernetes-specific values + ``` + +2. **Configure Skaffold Environment** + ```bash + # Copy the template file + cp skaffold.env.template skaffold.env + + # Edit skaffold.env with your specific values + vim skaffold.env + + # Essential variables to configure: + REGISTRY=192.168.1.42:32000 # Use IP address for immediate access + # Alternative: REGISTRY=k8s_control_plane:32000 (requires adding 'k8s_control_plane 192.168.1.42' to /etc/hosts) + BACKEND_IP=192.168.1.42 + BACKEND_NODEPORT=30270 + WEBUI_NODEPORT=31011 + + # Optional: Configure speaker recognition service + HF_TOKEN=hf_your_huggingface_token_here + DEEPGRAM_API_KEY=your_deepgram_api_key_here + + # Note: MONGODB_URI and QDRANT_BASE_URL are automatically generated + # by Skaffold based on your infrastructure namespace and service names + ``` + +3. **Configuration Variables Reference** + + **Required Variables:** + - `REGISTRY`: Docker registry for image storage + - `BACKEND_IP`: IP address of your Kubernetes control plane + - `BACKEND_NODEPORT`: Port for backend service (30000-32767) + - `WEBUI_NODEPORT`: Port for WebUI service (30000-32767) + - `INFRASTRUCTURE_NAMESPACE`: Namespace for MongoDB and Qdrant + - `APPLICATION_NAMESPACE`: Namespace for your application + + **Optional Variables (for Speaker Recognition):** + - `HF_TOKEN`: Hugging Face token for Pyannote models + - `DEEPGRAM_API_KEY`: Deepgram API key for speech-to-text + - `COMPUTE_MODE`: GPU or CPU mode for ML services + - `SIMILARITY_THRESHOLD`: Speaker identification threshold + + **Automatically Generated:** + - `MONGODB_URI`: Generated from infrastructure namespace + - `QDRANT_BASE_URL`: Generated from infrastructure namespace + - `IMAGE_REPO_*`: Generated from Skaffold build process + - `IMAGE_TAG_*`: Generated from Skaffold build process + +5. **Generate ConfigMap (if needed)** + ```bash + # Note: Most environment variables are handled by Skaffold automatically + # If you need custom environment variables, you can: + + # Option 1: Use the script (if it exists) + # chmod +x scripts/generate-helm-configmap.sh + # ./scripts/generate-helm-configmap.sh + + # Option 2: Add them directly to the Helm chart values + # Edit backends/charts/advanced-backend/values.yaml + ``` + +## Available Scripts + +**Run on: Build Machine (Your Development Machine)** + +The following scripts are available in the `scripts/` folder to simplify common operations: + +### **Deployment Scripts** +```bash +# Deploy all services in the correct order +./scripts/deploy-all-services.sh + +# Deploy with custom registry +./scripts/deploy-all-services.sh 192.168.1.43:32000 + +# Check cluster status and health +./scripts/cluster-status.sh + +# Check status of specific namespace +./scripts/cluster-status.sh friend-lite +``` + +### **Setup Scripts** +```bash +# Configure insecure registry access for remote nodes +./scripts/configure-insecure-registry-remote.sh 192.168.1.42 + +# Setup NVIDIA GPU operator +./scripts/setup-nvidia-operator.sh +``` + +### **Maintenance Scripts** +```bash +# Clean up all deployments (with confirmation) +./scripts/cleanup-all.sh + +# Force cleanup without confirmation +./scripts/cleanup-all.sh --force +``` + +### **GPU Testing Scripts** +```bash +# Test GPU access in Kubernetes +./scripts/test-gpu-pod.sh + +# Test with specific GPU count +./scripts/test-gpu-pod.sh 2 +``` + +### **Configuration Scripts** +```bash +# Generate ConfigMap data template from .env file +./scripts/generate-configmap.sh + +# This creates a Helm template that can be included in charts +# Output: backends/charts/advanced-backend/templates/env-data.yaml +``` + +**Note**: All scripts automatically read configuration from `skaffold.env` and provide helpful error messages if configuration is missing. + +**When to use generate-configmap.sh**: Use this script when you want to: +- Create environment-specific ConfigMaps from `.env` files +- Generate Helm templates that can be included in other charts +- Manage environment variables in a more maintainable way +- Override the default hardcoded values in the Helm charts + +**Removed Scripts**: The following scripts were removed as they are no longer needed: +- `generate-helm-configmap.sh` - Environment variables are now handled automatically by Skaffold + +## Kubernetes Manifests + +**Location**: `k8s-manifests/` directory + +This directory contains standalone Kubernetes manifests that are not managed by Skaffold: + +- **`hostpath-provisioner-official.yaml`** - OpenEBS hostpath storage provisioner + - Applied manually: `kubectl apply -f k8s-manifests/hostpath-provisioner-official.yaml` + - Creates the `openebs-hostpath` storage class used by all services + +## Deployment + +**Run on: Build Machine (Your Development Machine)** + +1. **Deploy All Services (Recommended)** + ```bash + # Deploy everything in the correct order + ./scripts/deploy-all-services.sh + + # This will automatically: + # - Deploy infrastructure (MongoDB, Qdrant) + # - Deploy main application (Backend, WebUI) + # - Deploy additional services (if configured) + # - Wait for each service to be ready + # - Verify all deployments + ``` + +2. **Manual Deployment (Alternative)** + ```bash + # Deploy infrastructure first + skaffold run --profile=infrastructure + + # Wait for infrastructure to be ready + kubectl get pods -n root + + # Deploy main application + skaffold run --profile=advanced-backend --default-repo=192.168.1.42:32000 + + # Monitor deployment + skaffold run --profile=advanced-backend --default-repo=192.168.1.42:32000 --tail + ``` + +3. **Verify Deployment** + ```bash + # Check all resources + kubectl get all -n friend-lite + kubectl get all -n root + + # Check Ingress + kubectl get ingress -n friend-lite + + # Check services + kubectl get svc -n friend-lite + ``` + +## Multi-Node Cluster Management + +**Run on: Build Machine (Your Development Machine)** + +### **Cluster Status and Management** +```bash +# View all nodes in the cluster +kubectl get nodes -o wide + +# Get detailed node information +kubectl describe node k8s_control_plane +kubectl describe node k8s_worker_01 + +# Check node resources +kubectl top nodes + +# View node labels and taints +kubectl get nodes --show-labels +``` + +### **Pod Scheduling and Distribution** +```bash +# Check pod distribution across nodes +kubectl get pods -A -o wide + +# Force pod scheduling to specific node (if needed) +kubectl label node k8s_worker_01 node-role.kubernetes.io/worker=true + +# Check node capacity and allocatable resources +kubectl describe node k8s_worker_01 | grep -A 5 "Allocated resources" +``` + +### **Worker Node Maintenance** +```bash +# Drain a worker node before maintenance +kubectl drain k8s_worker_01 --ignore-daemonsets --delete-emptydir-data + +# After maintenance, uncordon the node +kubectl uncordon k8s_worker_01 + +# Remove a worker node from the cluster +kubectl delete node k8s_worker_01 +``` + +## Additional Services Deployment + +**Run on: Build Machine (Your Development Machine)** + +### 1. **Speaker Recognition Service** + +```bash +# Deploy speaker recognition with GPU support +skaffold run --profile=speaker-recognition --default-repo=k8s_control_plane:32000 + +# Monitor deployment +skaffold run --profile=speaker-recognition --default-repo=k8s_control_plane:32000 --tail + +# Verify deployment +kubectl get pods -n speech +kubectl get svc -n speech +``` + +### 2. **ASR (Automatic Speech Recognition) Services** + +```bash +# Deploy ASR services (Moonshine, Parakeet) +skaffold run --profile=asr-services --default-repo=k8s_control_plane:32000 + +# Monitor deployment +skaffold run --profile=asr-services --default-repo=k8s_control_plane:32000 --tail + +# Verify deployment +kubectl get pods -n asr +kubectl get svc -n asr +``` + +### 3. **NVIDIA GPU Operator Setup** + +**Prerequisites:** +- NVIDIA GPU(s) installed on the Kubernetes node +- NVIDIA drivers installed on the host system + +**Installation:** +```bash +# Use the automated setup script +./scripts/setup-nvidia-operator.sh + +# Or manually install: +# Add NVIDIA Helm repository +helm repo add nvidia https://helm.ngc.nvidia.com/nvidia +helm repo update + +# Install NVIDIA GPU Operator +helm install gpu-operator nvidia/gpu-operator \ + --namespace gpu-operator \ + --create-namespace \ + --set driver.enabled=false \ + --set toolkit.enabled=false + +# Wait for GPU operator to be ready +kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=gpu-operator -n gpu-operator --timeout=300s + +# Verify GPU detection +kubectl get nodes -o json | jq '.items[] | {name: .metadata.name, gpu: .status.allocatable."nvidia.com/gpu"}' + +# Check GPU operator pods +kubectl get pods -n gpu-operator +``` + +**GPU-Enabled Pod Configuration:** +```yaml +# Example pod spec with GPU access +spec: + containers: + - name: gpu-container + image: nvidia/cuda:11.8-base-ubuntu20.04 + resources: + limits: + nvidia.com/gpu: 1 + command: ["nvidia-smi"] +``` + +**Test GPU Access:** +```bash +# Test GPU functionality +./scripts/test-gpu-pod.sh + +# Test with multiple GPUs +./scripts/test-gpu-pod.sh 2 +``` + +## Verification + +**Run on: Build Machine (Your Development Machine)** + +1. **Check Application Health** + ```bash + # Check backend health + curl -k https://friend-lite.192-168-1-42.nip.io:32623/health + + # Check WebUI + curl -k https://friend-lite.192-168-1-42.nip.io:32623/ + ``` + +2. **Access WebUI** + - Open browser to: `https://friend-lite.192-168-1-42.nip.io:32623/` + - Accept self-signed certificate warning + - Create admin user account + - Test audio recording functionality + +3. **Test WebSocket Connection** + - Open browser console + - Check for WebSocket connection success + - Verify audio recording works + +## Troubleshooting + +### Common Issues + +1. **Registry Access Issues** + ```bash + # Test registry connectivity (run on Kubernetes node) + curl http://k8s_control_plane:32000/v2/ + + # Check MicroK8s containerd config (run on Kubernetes node) + sudo cat /var/snap/microk8s/current/args/certs.d/k8s_control_plane:32000/hosts.toml + ``` + +2. **Storage Issues** + ```bash + # Check storage class (run on build machine) + kubectl get storageclass + + # Check persistent volumes (run on build machine) + kubectl get pv + kubectl get pvc -A + ``` + +3. **Ingress Issues** + ```bash + # Check Ingress controller (run on build machine) + kubectl get pods -n ingress-nginx + + # Check Ingress configuration (run on build machine) + kubectl describe ingress -n friend-lite + ``` + +4. **Build Issues** + ```bash + # Clean and rebuild (run on build machine) + skaffold clean + skaffold run --profile=advanced-backend --default-repo=k8s_control_plane:32000 + ``` + +5. **GPU Issues** + ```bash + # Check GPU operator status (run on build machine) + kubectl get pods -n gpu-operator + kubectl describe pod -n gpu-operator + + # Check GPU detection on nodes + kubectl get nodes -o json | jq '.items[] | {name: .metadata.name, gpu: .status.allocatable."nvidia.com/gpu"}' + + # Check GPU operator logs + kubectl logs -n gpu-operator deployment/gpu-operator + + # Verify NVIDIA drivers on host (run on Kubernetes node) + nvidia-smi + ``` + +6. **Multi-Node Cluster Issues** + ```bash + # Check node connectivity (run on build machine) + kubectl get nodes + kubectl describe node + + # Check node status and conditions + kubectl get nodes -o json | jq '.items[] | {name: .metadata.name, status: .status.conditions[] | select(.type=="Ready") | .status, message: .message}' + + # Check if pods can be scheduled + kubectl get pods -A -o wide + kubectl describe pod -n + + # Check node resources and capacity + kubectl top nodes + kubectl describe node | grep -A 10 "Allocated resources" + + # Verify network connectivity between nodes + # Run on each node: + ping + curl -s http://:32000/v2/ # Registry access + ``` + +### Useful Commands + +```bash +# View logs (run on build machine) +kubectl logs -n friend-lite deployment/advanced-backend +kubectl logs -n friend-lite deployment/webui + +# Port forward for debugging (run on build machine) +kubectl port-forward -n friend-lite svc/advanced-backend 8000:8000 +kubectl port-forward -n friend-lite svc/webui 8080:80 + +# Check resource usage (run on build machine) +kubectl top pods -n friend-lite +kubectl top nodes + +# Restart deployments (run on build machine) +kubectl rollout restart deployment/advanced-backend -n friend-lite +kubectl rollout restart deployment/webui -n friend-lite +``` + +## Maintenance + +**Run on: Build Machine (Your Development Machine)** + +1. **Regular Updates** + ```bash + # Update Docker images + skaffold run --profile=advanced-backend --default-repo=k8s_control_plane:32000 + ``` + +**Run on: Kubernetes Node (k8s_control_plane)** + +1. **System Updates** + ```bash + # Update system packages + sudo apt update && sudo apt upgrade -y + + # Update MicroK8s + sudo snap refresh microk8s + ``` + +2. **Backup Configuration** + ```bash + # Backup environment files (run on build machine) + cp backends/advanced/.env backends/advanced/.env.backup + cp skaffold.env skaffold.env.backup + + # Backup Kubernetes manifests (run on build machine) + kubectl get all -n friend-lite -o yaml > friend-lite-backup.yaml + kubectl get all -n root -o yaml > infrastructure-backup.yaml + ``` + +## Alternative: Docker Compose Setup + +If you prefer to use Docker Compose instead of Kubernetes, use the `init.sh` script: + +**Run on: Build Machine (Your Development Machine)** + +```bash +# Make script executable +chmod +x init.sh + +# Run interactive setup +./init.sh +``` + +This will guide you through setting up Friend-Lite using Docker Compose instead of Kubernetes. + +## Speaker Recognition Deployment + +For standalone speaker recognition deployment (without full Kubernetes setup): + +**Run on: Build Machine (Your Development Machine)** + +```bash +# Make script executable +chmod +x deploy-speaker-recognition.sh + +# Run deployment +./deploy-speaker-recognition.sh +``` + +This script handles speaker recognition service deployment with proper environment configuration. + +## Support + +For additional support: +- Check the main [README.md](README.md) +- Review [CLAUDE.md](CLAUDE.md) for development notes +- Check [README-skaffold.md](README-skaffold.md) for Skaffold-specific information + +--- + +**Note**: This setup supports both single-node and multi-node MicroK8s clusters. For production use, multi-node clusters provide better reliability, scalability, and resource distribution. The worker node installation section above provides complete instructions for expanding your cluster. diff --git a/README.md b/README.md index fd5b9f6a..8bb8c0e8 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,54 @@ -# EZ Wearable -I made this to understand the bluetooth streaming and websocket implementation in the Omi app. Then realized this is a pretty good point to push it since its basic enough that someone else may find it useful. Moving forawrd I want to implement the same or better memory features as in the Omi app, and definitely better overall context handling with a better backend. -Maybe wakeword etc. -Wainting for my dev kit 2 honestly. - - -Anyway, to go from Friend to Friend-lite, the steps are: -1. Modified main.dart to have bluetooth connection page -2. Modified android manifest to include bluetooth permissions -3. Added flutter_blue_plus and permission_handler dependencies -4. Created bluetooth_page.dart to handle bluetooth connection and scanning (_checkBluetoothPermission, _startScan, _connectToDevice) -5. Update bluetooth_page.dart within lib/ to handle the actual streaming of data. This also handles the charactaristic, which is basically services defined by a bluetooth device to describe the data that can be streamed. (_startAudioStreaming) -6. Created the websocket server within backend/main.py along with the pyproject.toml file. Implement the websocket and opus decoding using opuslib. First 3 bytes are metadata packets for the rest of the data. -7. Websocket kept disconnecting, implement reconnect. +# Friend-Lite + +Self-hostable AI system that captures audio/video data from OMI devices and other sources to generate memories, action items, and contextual insights about your conversations and daily interactions. + +## Quick Start → [Get Started](quickstart.md) + +Clone, run setup wizard, start services, access at http://localhost:5173 + +## Screenshots + +*[WebUI Dashboard - Screenshot coming soon]* + +![WebUI Dashboard](screenshots/dashboard.png) + +*[Mobile App - Screenshot coming soon]* + +![Mobile App](screenshots/mobile-app.png) + +*[Memory Search - Screenshot coming soon]* + +![Memory Search](screenshots/memory-search.png) + +## What's Included + +- **Mobile app** for OMI devices via Bluetooth +- **Backend services** (simple → advanced with full AI features) +- **Web dashboard** for conversation and memory management +- **Optional services**: Speaker recognition, offline ASR, distributed deployment + +## Links + +- **📚 [Setup Guide](quickstart.md)** - Start here +- **🔧 [Full Documentation](CLAUDE.md)** - Comprehensive reference +- **🏗️ [Architecture Details](Docs/features.md)** - Technical deep dive +- **🐳 [Docker/K8s](README-K8S.md)** - Container deployment + +## Vision + +This fits as a small part of the larger idea of "Have various sensors feeding the state of YOUR world to computers/AI and get some use out of it" + +Usecases are numerous - OMI Mentor is one of them. Friend/Omi/pendants are a small but important part of this, since they record personal spoken context the best. OMI-like devices with a camera can also capture visual context - or smart glasses - which also double as a display. + +Regardless - this repo will try to do the minimal of this - multiple OMI-like audio devices feeding audio data - and from it: +- Memories +- Action items +- Home automation + +## Golden Goals (Not Yet Achieved) + +- **Action items detection** (partial implementation) +- **Home automation integration** (planned) +- **Multi-device coordination** (planned) +- **Visual context capture** (smart glasses integration planned) + diff --git a/friend-lite/.gitignore b/app/.gitignore similarity index 97% rename from friend-lite/.gitignore rename to app/.gitignore index 05647d55..6bf33056 100644 --- a/friend-lite/.gitignore +++ b/app/.gitignore @@ -33,3 +33,5 @@ yarn-error.* # typescript *.tsbuildinfo + +android/* \ No newline at end of file diff --git a/friend-lite/App.tsx b/app/App.tsx similarity index 100% rename from friend-lite/App.tsx rename to app/App.tsx diff --git a/app/README.md b/app/README.md new file mode 100644 index 00000000..6d3272f9 --- /dev/null +++ b/app/README.md @@ -0,0 +1,368 @@ +# Friend-Lite Mobile App + +React Native mobile application for connecting OMI devices and streaming audio to Friend-Lite backends. Supports cross-platform deployment on iOS and Android with Bluetooth integration. + +## Features + +- **OMI Device Integration**: Connect via Bluetooth and stream audio +- **Phone Audio Streaming**: Use phone's microphone directly (NEW) +- **Cross-Platform**: iOS and Android support using React Native +- **Real-time Audio Streaming**: OPUS audio transmission to backend services +- **WebSocket Communication**: Efficient real-time data transfer +- **Backend Selection**: Configure connection to any compatible backend +- **Live Audio Visualization**: Real-time audio level meters + +## Quick Start + +### Prerequisites + +- Node.js 18+ and npm installed +- Expo CLI: `npm install -g @expo/cli` +- **iOS**: Xcode and iOS Simulator or physical iOS device +- **Android**: Android Studio and Android device/emulator + +### Installation + +```bash +# Navigate to app directory +cd app + +# Install dependencies +npm install + +# Start development server +npm start +``` + +## Platform-Specific Setup + +### iOS Development + +#### Method 1: Expo Development Build (Recommended) + +```bash +# Clean and prebuild +npx expo prebuild --clean + +# Install development client +npx expo install expo-dev-client + +# Start development server +npx expo start --dev-client + +# Run on iOS device +npx expo run:ios --device +``` + +#### Method 2: Xcode Development + +```bash +# Prebuild for iOS +npx expo prebuild --clean + +# Install iOS dependencies +cd ios && pod install && cd .. + +# Open in Xcode +open ios/friendlite.xcworkspace +``` + +Build and run from Xcode interface. + +### Android Development + +```bash +# Build and run on Android device +npx expo run:android --device +``` + +#### Android Network Configuration + +For local development, configure network permissions: + +**Development (Local Backend):** +Add to `android/app/src/main/AndroidManifest.xml`: +```xml + +``` + +**Production (HTTPS Backend):** +Create `android/app/src/main/res/xml/network_security_config.xml`: +```xml + + + + your-backend-domain.com + + +``` + +Reference in `AndroidManifest.xml`: +```xml + +``` + +## Backend Configuration + +### Supported Backends + +The app connects to any backend that accepts OPUS audio streams: + +1. **Simple Backend** (`backends/simple/`) + - Basic audio capture and storage + - Good for testing and development + - WebSocket endpoint: `/ws` + +2. **Advanced Backend** (`backends/advanced/`) + - Full transcription and memory features + - Real-time processing with speaker recognition + - WebSocket endpoint: `/ws_pcm` + +### Connection Setup + +#### Local Development +``` +Backend URL: ws://[machine-ip]:8000/ws_pcm +Example: ws://192.168.1.100:8000/ws_pcm +``` + +#### Public Access (Production) +Use ngrok or similar tunneling service: + +```bash +# Start ngrok tunnel +ngrok http 8000 + +# Use provided URL in app +Backend URL: wss://[ngrok-subdomain].ngrok.io/ws_pcm +``` + +### Configuration Steps + +1. **Start your chosen backend** (see backend-specific README) +2. **Open the mobile app** +3. **Navigate to Settings** +4. **Enter Backend URL**: + - Local: `ws://[your-ip]:8000/ws_pcm` + - Public: `wss://[your-domain]/ws_pcm` +5. **Save configuration** + +## Phone Audio Streaming (NEW) + +### Overview +Stream audio directly from your phone's microphone to Friend-Lite backend, bypassing Bluetooth devices. This feature provides a direct audio input method for users who want to use their phone as the audio source. + +### Features +- **Direct Microphone Access**: Use phone's built-in microphone +- **Real-time Audio Streaming**: Live audio processing with visualization +- **Seamless Integration**: Switch between Bluetooth and phone audio modes +- **Cross-Platform**: Works on both iOS and Android +- **Live Audio Meters**: Visual feedback showing audio levels in real-time + +### Setup & Usage + +#### Enable Phone Audio Streaming +1. **Open Friend-Lite app** +2. **Configure Backend Connection** (see Backend Configuration section) +3. **Grant Microphone Permissions** when prompted +4. **Tap "Stream Phone Audio" button** in main interface +5. **Start speaking** - audio streams in real-time to backend + +#### Requirements +- **iOS**: iOS 13+ with microphone permissions +- **Android**: Android API 21+ with microphone permissions +- **Network**: Stable connection to Friend-Lite backend +- **Backend**: Advanced backend running with `/ws_pcm` endpoint + +#### Switching Audio Sources +- **Mutual Exclusion**: Cannot use Bluetooth and phone audio simultaneously +- **Automatic Detection**: App disables conflicting options when one is active +- **Visual Feedback**: Clear indicators show active audio source + +### Troubleshooting Phone Audio + +#### Audio Not Streaming +- **Check Permissions**: Ensure microphone access granted +- **Verify Backend URL**: Confirm `ws://[ip]:8000/ws_pcm` format +- **Network Connection**: Test backend connectivity +- **Authentication**: Verify JWT token is valid + +#### Poor Audio Quality +- **Check Signal Strength**: Ensure stable network connection +- **Reduce Background Noise**: Use in quiet environment +- **Restart Recording**: Stop and restart phone audio streaming + +#### Permission Issues +- **iOS**: Settings > Privacy & Security > Microphone > Friend-Lite +- **Android**: Settings > Apps > Friend-Lite > Permissions > Microphone + +#### No Audio Level Visualization +- **Restart App**: Close and reopen the application +- **Check Audio Input**: Ensure microphone is working in other apps +- **Backend Logs**: Verify backend is receiving audio data + +## User Workflow + +### Device Connection + +1. **Enable Bluetooth** on your mobile device +2. **Open Friend-Lite app** +3. **Pair OMI device**: + - Go to Device Settings + - Scan for nearby OMI devices + - Select your device from the list + - Complete pairing process + +### Audio Streaming + +#### Option 1: Bluetooth Audio (OMI Device) +1. **Configure backend connection** (see Configuration Steps above) +2. **Test connection**: + - Tap "Test Connection" in settings + - Verify green status indicator +3. **Start recording**: + - Press record button in main interface + - Speak into OMI device + - Audio streams to backend in real-time + +#### Option 2: Phone Audio Streaming +1. **Configure backend connection** (see Configuration Steps above) +2. **Enable phone audio**: + - Tap "Stream Phone Audio" button + - Grant microphone permissions when prompted +3. **Start speaking**: + - Speak directly into phone microphone + - Watch real-time audio level visualization + - Audio streams to backend automatically + +### Monitoring + +1. **Check connection status** in app header +2. **View real-time indicators**: + - Audio level meters + - Connection status + - Battery level (if supported) +3. **Access backend dashboard** for processed results + +## Troubleshooting + +### Common Issues + +**Bluetooth Connection Problems:** +- Ensure OMI device is in pairing mode +- Reset Bluetooth on mobile device +- Clear app cache and restart +- Check device compatibility + +**Audio Streaming Issues:** +- Verify backend URL format (include `ws://` or `wss://`) +- Check network connectivity +- Test with simple backend first +- Monitor backend logs for connection attempts + +**Phone Audio Streaming Issues:** +- Grant microphone permissions in device settings +- Ensure stable network connection to backend +- Restart phone audio streaming if no data flowing +- Check backend logs for audio data reception +- Verify JWT authentication token is valid + +**Build Errors:** +- Clear Expo cache: `npx expo start --clear` +- Clean prebuild: `npx expo prebuild --clean` +- Reinstall dependencies: `rm -rf node_modules && npm install` + +### Debug Mode + +Enable detailed logging: +1. Go to app Settings +2. Enable "Debug Mode" +3. View console logs for connection details + +### Network Testing + +Test backend connectivity: +```bash +# Test WebSocket endpoint +curl -i -N -H "Connection: Upgrade" \ + -H "Upgrade: websocket" \ + -H "Sec-WebSocket-Key: test" \ + -H "Sec-WebSocket-Version: 13" \ + http://[backend-ip]:8000/ws_pcm +``` + +## Development + +### Project Structure +``` +app/ +├── src/ +│ ├── components/ # React Native components +│ ├── screens/ # App screens +│ ├── services/ # WebSocket and Bluetooth services +│ └── utils/ # Helper utilities +├── app.json # Expo configuration +└── package.json # Dependencies +``` + +### Key Dependencies +- **React Native**: Cross-platform mobile framework +- **Expo**: Development and build toolchain +- **React Native Bluetooth**: OMI device communication +- **WebSocket**: Real-time backend communication + +### Building for Production + +#### iOS App Store +```bash +# Build for iOS +npx expo build:ios + +# Follow Expo documentation for App Store submission +``` + +#### Android Play Store +```bash +# Build for Android +npx expo build:android + +# Generate signed APK for distribution +``` + +## Integration Examples + +### WebSocket Communication +```javascript +// Connect to backend +const ws = new WebSocket('ws://backend-url:8000/ws_pcm'); + +// Send audio data +ws.send(audioBuffer); + +// Handle responses +ws.onmessage = (event) => { + // Process transcription or acknowledgment +}; +``` + +### Bluetooth Audio Capture +```javascript +// Start audio streaming from OMI device +await BluetoothService.startAudioStream(); + +// Handle audio data +BluetoothService.onAudioData = (audioBuffer) => { + websocket.send(audioBuffer); +}; +``` + +## Related Documentation + +- **[Backend Setup](../backends/)**: Choose and configure backend services +- **[Quick Start Guide](../quickstart.md)**: Complete system setup +- **[Advanced Backend](../backends/advanced/)**: Full-featured backend option +- **[Simple Backend](../backends/simple/)**: Basic backend for testing \ No newline at end of file diff --git a/app/app.json b/app/app.json new file mode 100644 index 00000000..9acdac77 --- /dev/null +++ b/app/app.json @@ -0,0 +1,102 @@ +{ + "expo": { + "name": "friend-lite-app", + "slug": "friend-lite-app", + "version": "1.0.0", + "orientation": "portrait", + "icon": "./assets/icon.png", + "entryPoint": "./app/index.tsx", + "userInterfaceStyle": "light", + "splash": { + "image": "./assets/splash.png", + "resizeMode": "contain", + "backgroundColor": "#ffffff" + }, + "assetBundlePatterns": [ + "**/*" + ], + "ios": { + "supportsTablet": true, + "bundleIdentifier": "com.cupbearer5517.friendlite", + "infoPlist": { + "NSMicrophoneUsageDescription": "Friend-Lite needs access to your microphone to stream audio to the backend for processing." + } + }, + "android": { + "adaptiveIcon": { + "foregroundImage": "./assets/adaptive-icon.png", + "backgroundColor": "#ffffff" + }, + "package": "com.cupbearer5517.friendlite", + "permissions": [ + "android.permission.BLUETOOTH", + "android.permission.BLUETOOTH_ADMIN", + "android.permission.BLUETOOTH_CONNECT", + "android.permission.ACCESS_NETWORK_STATE", + "android.permission.FOREGROUND_SERVICE", + "android.permission.FOREGROUND_SERVICE_DATA_SYNC", + "android.permission.POST_NOTIFICATIONS", + "android.permission.RECORD_AUDIO" + ], + "usesCleartextTraffic": true + }, + "newArchEnabled": true, + "plugins": [ + [ + "@siteed/expo-audio-studio", + { + "enablePhoneStateHandling": true, + "enableNotifications": true, + "enableBackgroundAudio": true, + "enableDeviceDetection": true, + "iosBackgroundModes": { "useProcessing": true }, + "iosConfig": { + "microphoneUsageDescription": "We use the mic for live audio streaming" + } + } + ], + [ + "react-native-ble-plx", + { + "isBackgroundEnabled": true, + "modes": [ + "central" + ], + "bluetoothAlwaysPermission": "This app uses Bluetooth to connect to and interact with nearby BLE devices." + } + ], + [ + "./plugins/with-ws-fgs", + { + "microphone": true, + "android": { + "permissions": [ + "FOREGROUND_SERVICE", + "FOREGROUND_SERVICE_DATA_SYNC", + "FOREGROUND_SERVICE_MICROPHONE", + "POST_NOTIFICATIONS" + ] + } + } + ], + [ + "expo-build-properties", + { + "android": { + "usesCleartextTraffic": true, + "compileSdkVersion": 35, + "targetSdkVersion": 35, + "extraMavenRepos": [ + "../../node_modules/@notifee/react-native/android/libs" + ] + } + } + ] + ], + "extra": { + "eas": { + "projectId": "05d8598e-6fe7-4373-81e4-1654f3d8e181" + } + } + } +} \ No newline at end of file diff --git a/friend-lite/app/_layout.tsx b/app/app/_layout.tsx similarity index 100% rename from friend-lite/app/_layout.tsx rename to app/app/_layout.tsx diff --git a/app/app/components/AuthSection.tsx b/app/app/components/AuthSection.tsx new file mode 100644 index 00000000..e5014854 --- /dev/null +++ b/app/app/components/AuthSection.tsx @@ -0,0 +1,264 @@ +import React, { useState, useEffect } from 'react'; +import { View, Text, TextInput, TouchableOpacity, StyleSheet, Alert, ActivityIndicator } from 'react-native'; +import { saveAuthEmail, saveAuthPassword, saveJwtToken, getAuthEmail, getAuthPassword, clearAuthData } from '../utils/storage'; + +interface AuthSectionProps { + backendUrl: string; + isAuthenticated: boolean; + currentUserEmail: string | null; + onAuthStatusChange: (isAuthenticated: boolean, email: string | null, token: string | null) => void; +} + +export const AuthSection: React.FC = ({ + backendUrl, + isAuthenticated, + currentUserEmail, + onAuthStatusChange, +}) => { + const [email, setEmail] = useState(''); + const [password, setPassword] = useState(''); + const [isLoggingIn, setIsLoggingIn] = useState(false); + + // Load saved email and password on component mount + useEffect(() => { + const loadAuthData = async () => { + const savedEmail = await getAuthEmail(); + const savedPassword = await getAuthPassword(); + if (savedEmail) setEmail(savedEmail); + if (savedPassword) setPassword(savedPassword); + }; + loadAuthData(); + }, []); + + const handleLogin = async () => { + if (!email.trim() || !password.trim()) { + Alert.alert('Missing Credentials', 'Please enter both email and password.'); + return; + } + + if (!backendUrl.trim()) { + Alert.alert('Backend URL Required', 'Please enter a backend URL first.'); + return; + } + + setIsLoggingIn(true); + + try { + // Convert WebSocket URL to HTTP URL for authentication + const baseUrl = backendUrl.replace('ws://', 'http://').replace('wss://', 'https://').split('/ws')[0]; + const loginUrl = `${baseUrl}/auth/jwt/login`; + + const formData = new URLSearchParams(); + formData.append('username', email.trim()); + formData.append('password', password.trim()); + + const response = await fetch(loginUrl, { + method: 'POST', + headers: { + 'Content-Type': 'application/x-www-form-urlencoded', + }, + body: formData.toString(), + }); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error(`Login failed: ${response.status} ${response.statusText} - ${errorText}`); + } + + const authData = await response.json(); + const jwtToken = authData.access_token; + + if (!jwtToken) { + throw new Error('No access token received from server'); + } + + // Save credentials and token + await saveAuthEmail(email.trim()); + await saveAuthPassword(password.trim()); + await saveJwtToken(jwtToken); + + console.log('[AuthSection] Login successful for user:', email); + onAuthStatusChange(true, email.trim(), jwtToken); + + } catch (error) { + console.error('[AuthSection] Login error:', error); + Alert.alert( + 'Login Failed', + error instanceof Error ? error.message : 'An unknown error occurred during login.' + ); + } finally { + setIsLoggingIn(false); + } + }; + + const handleLogout = async () => { + try { + await clearAuthData(); + setEmail(''); + setPassword(''); + console.log('[AuthSection] Logout successful'); + onAuthStatusChange(false, null, null); + } catch (error) { + console.error('[AuthSection] Logout error:', error); + Alert.alert('Logout Error', 'Failed to clear authentication data.'); + } + }; + + if (isAuthenticated && currentUserEmail) { + return ( + + Authentication + + Logged in as: {currentUserEmail} + + Logout + + + + ); + } + + return ( + + Authentication + Email: + + + Password: + + + + {isLoggingIn ? ( + + + Logging in... + + ) : ( + Login + )} + + + {!isAuthenticated && ( + + Enter your email and password to authenticate with the backend. + + )} + + ); +}; + +const styles = StyleSheet.create({ + section: { + marginBottom: 25, + padding: 15, + backgroundColor: 'white', + borderRadius: 10, + shadowColor: '#000', + shadowOffset: { width: 0, height: 1 }, + shadowOpacity: 0.1, + shadowRadius: 3, + elevation: 2, + }, + sectionTitle: { + fontSize: 18, + fontWeight: '600', + marginBottom: 15, + color: '#333', + }, + inputLabel: { + fontSize: 14, + color: '#333', + marginBottom: 5, + marginTop: 10, + fontWeight: '500', + }, + textInput: { + backgroundColor: '#f0f0f0', + borderWidth: 1, + borderColor: '#ddd', + borderRadius: 6, + padding: 10, + fontSize: 14, + width: '100%', + marginBottom: 10, + color: '#333', + }, + button: { + backgroundColor: '#007AFF', + paddingVertical: 12, + paddingHorizontal: 20, + borderRadius: 8, + alignItems: 'center', + marginTop: 15, + elevation: 2, + }, + buttonDisabled: { + backgroundColor: '#A0A0A0', + opacity: 0.7, + }, + buttonDanger: { + backgroundColor: '#FF3B30', + }, + buttonText: { + color: 'white', + fontSize: 16, + fontWeight: '600', + }, + loadingContainer: { + flexDirection: 'row', + alignItems: 'center', + }, + helpText: { + fontSize: 12, + color: '#666', + marginTop: 10, + textAlign: 'center', + fontStyle: 'italic', + }, + authenticatedContainer: { + flexDirection: 'row', + justifyContent: 'space-between', + alignItems: 'center', + }, + authenticatedText: { + fontSize: 14, + color: '#4CD964', + fontWeight: '500', + flex: 1, + marginRight: 10, + }, +}); + +export default AuthSection; \ No newline at end of file diff --git a/app/app/components/BackendStatus.tsx b/app/app/components/BackendStatus.tsx new file mode 100644 index 00000000..75fdd7a8 --- /dev/null +++ b/app/app/components/BackendStatus.tsx @@ -0,0 +1,319 @@ +import React, { useState, useEffect } from 'react'; +import { View, Text, TextInput, TouchableOpacity, StyleSheet, Alert, ActivityIndicator } from 'react-native'; + +interface BackendStatusProps { + backendUrl: string; + onBackendUrlChange: (url: string) => void; + jwtToken: string | null; +} + +interface HealthStatus { + status: 'unknown' | 'checking' | 'healthy' | 'unhealthy' | 'auth_required'; + message: string; + lastChecked?: Date; +} + +export const BackendStatus: React.FC = ({ + backendUrl, + onBackendUrlChange, + jwtToken, +}) => { + const [healthStatus, setHealthStatus] = useState({ + status: 'unknown', + message: 'Not checked', + }); + + const checkBackendHealth = async (showAlert: boolean = false) => { + if (!backendUrl.trim()) { + setHealthStatus({ + status: 'unhealthy', + message: 'Backend URL not set', + }); + return; + } + + setHealthStatus({ + status: 'checking', + message: 'Checking connection...', + }); + + try { + // Convert WebSocket URL to HTTP URL for health check + let baseUrl = backendUrl.trim(); + + // Handle different URL formats + if (baseUrl.startsWith('ws://')) { + baseUrl = baseUrl.replace('ws://', 'http://'); + } else if (baseUrl.startsWith('wss://')) { + baseUrl = baseUrl.replace('wss://', 'https://'); + } + + // Remove any WebSocket path if present + baseUrl = baseUrl.split('/ws')[0]; + + // Try health endpoint first + const healthUrl = `${baseUrl}/health`; + console.log('[BackendStatus] Checking health at:', healthUrl); + + const response = await fetch(healthUrl, { + method: 'GET', + headers: { + 'Accept': 'application/json', + 'Content-Type': 'application/json', + ...(jwtToken ? { 'Authorization': `Bearer ${jwtToken}` } : {}), + }, + }); + + console.log('[BackendStatus] Health check response status:', response.status); + + if (response.ok) { + const healthData = await response.json(); + setHealthStatus({ + status: 'healthy', + message: `Connected (${healthData.status || 'OK'})`, + lastChecked: new Date(), + }); + + if (showAlert) { + Alert.alert('Connection Success', 'Successfully connected to backend!'); + } + } else if (response.status === 401 || response.status === 403) { + setHealthStatus({ + status: 'auth_required', + message: 'Authentication required', + lastChecked: new Date(), + }); + + if (showAlert) { + Alert.alert('Authentication Required', 'Please login to access the backend.'); + } + } else { + throw new Error(`HTTP ${response.status}: ${response.statusText}`); + } + } catch (error) { + console.error('[BackendStatus] Health check error:', error); + + let errorMessage = 'Connection failed'; + if (error instanceof Error) { + if (error.message.includes('Network request failed')) { + errorMessage = 'Network request failed - check URL and network connection'; + } else if (error.name === 'AbortError') { + errorMessage = 'Request timeout'; + } else { + errorMessage = error.message; + } + } + + setHealthStatus({ + status: 'unhealthy', + message: errorMessage, + lastChecked: new Date(), + }); + + if (showAlert) { + Alert.alert( + 'Connection Failed', + `Could not connect to backend: ${errorMessage}\n\nMake sure the backend is running and accessible.` + ); + } + } + }; + + // Auto-check health when backend URL or JWT token changes + useEffect(() => { + if (backendUrl.trim()) { + const timer = setTimeout(() => { + checkBackendHealth(false); + }, 500); // Debounce + + return () => clearTimeout(timer); + } + }, [backendUrl, jwtToken]); + + const getStatusColor = (status: HealthStatus['status']): string => { + switch (status) { + case 'healthy': + return '#4CD964'; + case 'checking': + return '#FF9500'; + case 'unhealthy': + return '#FF3B30'; + case 'auth_required': + return '#FF9500'; + default: + return '#8E8E93'; + } + }; + + const getStatusIcon = (status: HealthStatus['status']): string => { + switch (status) { + case 'healthy': + return '✅'; + case 'checking': + return '🔄'; + case 'unhealthy': + return '❌'; + case 'auth_required': + return '🔐'; + default: + return '❓'; + } + }; + + return ( + + Backend Connection + + Backend URL: + + + + + Status: + + {getStatusIcon(healthStatus.status)} + + {healthStatus.message} + + {healthStatus.status === 'checking' && ( + + )} + + + + {healthStatus.lastChecked && ( + + Last checked: {healthStatus.lastChecked.toLocaleTimeString()} + + )} + + + checkBackendHealth(true)} + disabled={healthStatus.status === 'checking'} + > + + {healthStatus.status === 'checking' ? 'Checking...' : 'Test Connection'} + + + + + Enter the WebSocket URL of your backend server. Simple backend: http://localhost:8000/ (no auth). + Advanced backend: http://localhost:8080/ (requires login). Status is automatically checked. + The websocket URL can be different or the same as the HTTP URL, with /ws_omi suffix + + + ); +}; + +const styles = StyleSheet.create({ + section: { + marginBottom: 25, + padding: 15, + backgroundColor: 'white', + borderRadius: 10, + shadowColor: '#000', + shadowOffset: { width: 0, height: 1 }, + shadowOpacity: 0.1, + shadowRadius: 3, + elevation: 2, + }, + sectionTitle: { + fontSize: 18, + fontWeight: '600', + marginBottom: 15, + color: '#333', + }, + inputLabel: { + fontSize: 14, + color: '#333', + marginBottom: 5, + fontWeight: '500', + }, + textInput: { + backgroundColor: '#f0f0f0', + borderWidth: 1, + borderColor: '#ddd', + borderRadius: 6, + padding: 10, + fontSize: 14, + width: '100%', + marginBottom: 15, + color: '#333', + }, + statusContainer: { + marginBottom: 15, + padding: 10, + backgroundColor: '#f8f9fa', + borderRadius: 6, + borderWidth: 1, + borderColor: '#e9ecef', + }, + statusRow: { + flexDirection: 'row', + alignItems: 'center', + justifyContent: 'space-between', + }, + statusLabel: { + fontSize: 14, + fontWeight: '500', + color: '#333', + }, + statusValue: { + flexDirection: 'row', + alignItems: 'center', + flex: 1, + justifyContent: 'flex-end', + }, + statusIcon: { + fontSize: 16, + marginRight: 6, + }, + statusText: { + fontSize: 14, + fontWeight: '500', + }, + lastCheckedText: { + fontSize: 12, + color: '#666', + marginTop: 5, + textAlign: 'center', + fontStyle: 'italic', + }, + button: { + backgroundColor: '#007AFF', + paddingVertical: 12, + paddingHorizontal: 20, + borderRadius: 8, + alignItems: 'center', + marginBottom: 10, + elevation: 2, + }, + buttonDisabled: { + backgroundColor: '#A0A0A0', + opacity: 0.7, + }, + buttonText: { + color: 'white', + fontSize: 16, + fontWeight: '600', + }, + helpText: { + fontSize: 12, + color: '#666', + textAlign: 'center', + fontStyle: 'italic', + }, +}); + +export default BackendStatus; \ No newline at end of file diff --git a/friend-lite/app/components/BluetoothStatusBanner.tsx b/app/app/components/BluetoothStatusBanner.tsx similarity index 100% rename from friend-lite/app/components/BluetoothStatusBanner.tsx rename to app/app/components/BluetoothStatusBanner.tsx diff --git a/friend-lite/app/components/DeviceDetails.tsx b/app/app/components/DeviceDetails.tsx similarity index 73% rename from friend-lite/app/components/DeviceDetails.tsx rename to app/app/components/DeviceDetails.tsx index 29a449e0..ebf204c3 100644 --- a/friend-lite/app/components/DeviceDetails.tsx +++ b/app/app/components/DeviceDetails.tsx @@ -1,6 +1,6 @@ import React from 'react'; -import { View, Text, TouchableOpacity, StyleSheet, TextInput, ScrollView } from 'react-native'; -import { BleAudioCodec } from '@omiai/omi-react-native'; +import { View, Text, TouchableOpacity, StyleSheet, TextInput } from 'react-native'; +import { BleAudioCodec } from 'friend-lite-react-native'; interface DeviceDetailsProps { // Device Info @@ -24,6 +24,14 @@ interface DeviceDetailsProps { isAudioStreaming: boolean; isConnectingAudioStreamer: boolean; audioStreamerError: string | null; + + // User ID Management + userId: string; + onSetUserId: (userId: string) => void; + + // Audio Listener Retry State + isAudioListenerRetrying?: boolean; + audioListenerRetryAttempts?: number; } export const DeviceDetails: React.FC = ({ @@ -40,10 +48,15 @@ export const DeviceDetails: React.FC = ({ onSetWebSocketUrl, isAudioStreaming, isConnectingAudioStreamer, - audioStreamerError + audioStreamerError, + userId, + onSetUserId, + isAudioListenerRetrying, + audioListenerRetryAttempts }) => { if (!connectedDeviceId) return null; + return ( Device Functions @@ -73,16 +86,54 @@ export const DeviceDetails: React.FC = ({ )} + {/* User ID Management */} + + User ID (optional) + Enter User ID (for device identification): + + + + {userId && ( + + Current User ID: + {userId} + + )} + + {/* Audio Controls */} - {isListeningAudio ? "Stop Audio Listener" : "Start Audio Listener"} + {isListeningAudio ? "Stop Audio Listener" : + isAudioListenerRetrying ? "Stop Retry" : "Start Audio Listener"} + + {isAudioListenerRetrying && ( + + + 🔄 Retrying audio listener... (Attempt {audioListenerRetryAttempts || 0}/10) + + + )} + {isListeningAudio && ( Audio Packets Received: @@ -102,6 +153,8 @@ export const DeviceDetails: React.FC = ({ placeholder="wss://your-backend.com/ws/audio" autoCapitalize="none" keyboardType="url" + returnKeyType="done" + autoCorrect={false} editable={!isListeningAudio && !isAudioStreaming} // Prevent edit while listening/streaming /> @@ -116,6 +169,7 @@ export const DeviceDetails: React.FC = ({ Error: {audioStreamerError} )} + ); }; @@ -162,6 +216,12 @@ const styles = StyleSheet.create({ backgroundColor: '#A0A0A0', opacity: 0.7, }, + buttonSecondary: { + backgroundColor: '#8E8E93', + }, + buttonSecondaryText: { + color: 'white', + }, buttonText: { color: 'white', fontSize: 16, @@ -249,6 +309,7 @@ const styles = StyleSheet.create({ fontSize: 14, width: '100%', // Ensure input takes full width of its container marginBottom: 10, + color: '#333', }, statusText: { // New style for status messages marginTop: 8, @@ -263,6 +324,20 @@ const styles = StyleSheet.create({ color: 'red', fontWeight: 'bold', }, + retryContainer: { + marginTop: 10, + padding: 12, + backgroundColor: '#FFF3CD', + borderRadius: 8, + borderLeftWidth: 4, + borderLeftColor: '#FF9500', + }, + retryText: { + fontSize: 14, + color: '#856404', + fontWeight: '500', + textAlign: 'center', + }, }); export default DeviceDetails; \ No newline at end of file diff --git a/friend-lite/app/components/DeviceListItem.tsx b/app/app/components/DeviceListItem.tsx similarity index 98% rename from friend-lite/app/components/DeviceListItem.tsx rename to app/app/components/DeviceListItem.tsx index aff38d08..a8083035 100644 --- a/friend-lite/app/components/DeviceListItem.tsx +++ b/app/app/components/DeviceListItem.tsx @@ -1,6 +1,6 @@ import React from 'react'; import { View, Text, TouchableOpacity, StyleSheet } from 'react-native'; -import { OmiDevice } from '@omiai/omi-react-native'; +import { OmiDevice } from 'friend-lite-react-native'; interface DeviceListItemProps { device: OmiDevice; diff --git a/app/app/components/PhoneAudioButton.tsx b/app/app/components/PhoneAudioButton.tsx new file mode 100644 index 00000000..1f486e55 --- /dev/null +++ b/app/app/components/PhoneAudioButton.tsx @@ -0,0 +1,201 @@ +// PhoneAudioButton.tsx +import React from 'react'; +import { + TouchableOpacity, + Text, + View, + StyleSheet, + ActivityIndicator, +} from 'react-native'; + +interface PhoneAudioButtonProps { + isRecording: boolean; + isInitializing: boolean; + isDisabled: boolean; + audioLevel: number; + error: string | null; + onPress: () => void; +} + +const PhoneAudioButton: React.FC = ({ + isRecording, + isInitializing, + isDisabled, + audioLevel, + error, + onPress, +}) => { + + const getButtonStyle = () => { + if (isDisabled && !isRecording) { + return [styles.button, styles.buttonDisabled]; + } + if (isRecording) { + return [styles.button, styles.buttonRecording]; + } + if (error) { + return [styles.button, styles.buttonError]; + } + return [styles.button, styles.buttonIdle]; + }; + + const getButtonText = () => { + if (isInitializing) { + return 'Initializing...'; + } + if (isRecording) { + return 'Stop Phone Audio'; + } + return 'Stream Phone Audio'; + }; + + const getMicrophoneIcon = () => { + if (isRecording) { + return '🎤'; // Recording microphone + } + return '🎙️'; // Idle microphone + }; + + return ( + + + + {isInitializing ? ( + + ) : ( + + {getMicrophoneIcon()} + {getButtonText()} + + )} + + + + {/* Audio Level Indicator */} + {isRecording && ( + + + + + Audio Level + + )} + + {/* Status Message */} + {isRecording && ( + + Streaming audio to backend... + + )} + + {/* Error Message */} + {error && !isRecording && ( + {error} + )} + + {/* Disabled Message */} + {isDisabled && !isRecording && ( + + Disconnect Bluetooth device to use phone audio + + )} + + ); +}; + +const styles = StyleSheet.create({ + container: { + marginVertical: 10, + paddingHorizontal: 20, + }, + buttonWrapper: { + alignSelf: 'stretch', + }, + button: { + flexDirection: 'row', + alignItems: 'center', + justifyContent: 'center', + paddingVertical: 12, + paddingHorizontal: 20, + borderRadius: 8, + minHeight: 48, + }, + buttonContent: { + flexDirection: 'row', + alignItems: 'center', + justifyContent: 'center', + }, + buttonIdle: { + backgroundColor: '#007AFF', + }, + buttonRecording: { + backgroundColor: '#FF3B30', + }, + buttonDisabled: { + backgroundColor: '#C7C7CC', + }, + buttonError: { + backgroundColor: '#FF9500', + }, + buttonText: { + color: '#FFFFFF', + fontSize: 16, + fontWeight: '600', + marginLeft: 8, + }, + icon: { + fontSize: 20, + }, + statusText: { + textAlign: 'center', + marginTop: 8, + fontSize: 12, + color: '#8E8E93', + }, + errorText: { + textAlign: 'center', + marginTop: 8, + fontSize: 12, + color: '#FF3B30', + }, + disabledText: { + textAlign: 'center', + marginTop: 8, + fontSize: 12, + color: '#8E8E93', + fontStyle: 'italic', + }, + audioLevelContainer: { + marginTop: 12, + alignItems: 'center', + }, + audioLevelBackground: { + width: '100%', + height: 4, + backgroundColor: '#E5E5EA', + borderRadius: 2, + overflow: 'hidden', + }, + audioLevelBar: { + height: '100%', + backgroundColor: '#34C759', + borderRadius: 2, + }, + audioLevelText: { + marginTop: 4, + fontSize: 10, + color: '#8E8E93', + }, +}); + +export default PhoneAudioButton; \ No newline at end of file diff --git a/friend-lite/app/components/ScanControls.tsx b/app/app/components/ScanControls.tsx similarity index 100% rename from friend-lite/app/components/ScanControls.tsx rename to app/app/components/ScanControls.tsx diff --git a/friend-lite/app/components/StatusIndicator.tsx b/app/app/components/StatusIndicator.tsx similarity index 100% rename from friend-lite/app/components/StatusIndicator.tsx rename to app/app/components/StatusIndicator.tsx diff --git a/friend-lite/app/hooks/.gitkeep b/app/app/hooks/.gitkeep similarity index 100% rename from friend-lite/app/hooks/.gitkeep rename to app/app/hooks/.gitkeep diff --git a/app/app/hooks/useAudioListener.ts b/app/app/hooks/useAudioListener.ts new file mode 100644 index 00000000..391ed125 --- /dev/null +++ b/app/app/hooks/useAudioListener.ts @@ -0,0 +1,226 @@ +import { useState, useRef, useCallback, useEffect } from 'react'; +import { Alert } from 'react-native'; +import { OmiConnection } from 'friend-lite-react-native'; +import { Subscription, ConnectionPriority } from 'react-native-ble-plx'; // OmiConnection might use this type for subscriptions + +interface UseAudioListener { + isListeningAudio: boolean; + audioPacketsReceived: number; + startAudioListener: (onAudioData: (bytes: Uint8Array) => void) => Promise; + stopAudioListener: () => Promise; + isRetrying: boolean; + retryAttempts: number; +} + +export const useAudioListener = ( + omiConnection: OmiConnection, + isConnected: () => boolean // Function to check current connection status +): UseAudioListener => { + const [isListeningAudio, setIsListeningAudio] = useState(false); + const [audioPacketsReceived, setAudioPacketsReceived] = useState(0); + const [isRetrying, setIsRetrying] = useState(false); + const [retryAttempts, setRetryAttempts] = useState(0); + + const audioSubscriptionRef = useRef(null); + const uiUpdateIntervalRef = useRef(null); + const localPacketCounterRef = useRef(0); + const retryTimeoutRef = useRef(null); + const shouldRetryRef = useRef(false); + const currentOnAudioDataRef = useRef<((bytes: Uint8Array) => void) | null>(null); + + // Retry configuration + const MAX_RETRY_ATTEMPTS = 10; + const INITIAL_RETRY_DELAY = 1000; // 1 second + const MAX_RETRY_DELAY = 60000; // 60 seconds + + const stopAudioListener = useCallback(async () => { + console.log('Attempting to stop audio listener...'); + + // Stop retry mechanism + shouldRetryRef.current = false; + setIsRetrying(false); + setRetryAttempts(0); + currentOnAudioDataRef.current = null; + + if (retryTimeoutRef.current) { + clearTimeout(retryTimeoutRef.current); + retryTimeoutRef.current = null; + } + + if (uiUpdateIntervalRef.current) { + clearInterval(uiUpdateIntervalRef.current); + uiUpdateIntervalRef.current = null; + } + + if (audioSubscriptionRef.current) { + try { + await omiConnection.stopAudioBytesListener(audioSubscriptionRef.current); + audioSubscriptionRef.current = null; + setIsListeningAudio(false); + localPacketCounterRef.current = 0; // Reset local counter + // setAudioPacketsReceived(0); // Optionally reset global counter on stop, or keep cumulative + console.log('Audio listener stopped.'); + } catch (error) { + console.error('Stop audio listener error:', error); + Alert.alert('Error', `Failed to stop audio listener: ${error}`); + } + } else { + console.log('Audio listener was not active.'); + } + setIsListeningAudio(false); // Ensure state is false even if no subscription was found + }, [omiConnection]); + + // Calculate exponential backoff delay + const getRetryDelay = useCallback((attemptNumber: number): number => { + const delay = Math.min(INITIAL_RETRY_DELAY * Math.pow(2, attemptNumber), MAX_RETRY_DELAY); + // Add jitter to prevent thundering herd + const jitter = Math.random() * 0.3 * delay; + return delay + jitter; + }, []); + + // Internal function to attempt starting audio listener + const attemptStartAudioListener = useCallback(async (onAudioData: (bytes: Uint8Array) => void): Promise => { + if (!isConnected()) { + console.log('[AudioListener] Device not connected, cannot start audio listener'); + return false; + } + + try { + // Request high connection priority before starting audio listener + await omiConnection.requestConnectionPriority(ConnectionPriority.High); + console.log('[AudioListener] Requested high connection priority'); + } catch (error) { + console.error('[AudioListener] Failed to request high connection priority:', error); + // Continue anyway, this is not a fatal error + } + + try { + const subscription = await omiConnection.startAudioBytesListener((bytes) => { + localPacketCounterRef.current++; + if (bytes && bytes.length > 0) { + onAudioData(new Uint8Array(bytes)); + } + }); + + if (subscription) { + audioSubscriptionRef.current = subscription; + setIsListeningAudio(true); + setIsRetrying(false); + setRetryAttempts(0); + console.log('[AudioListener] Audio listener started successfully'); + return true; + } else { + console.error('[AudioListener] No subscription returned from startAudioBytesListener'); + return false; + } + } catch (error) { + console.error('[AudioListener] Failed to start audio listener:', error); + return false; + } + }, [omiConnection, isConnected]); + + // Retry mechanism with exponential backoff + const retryStartAudioListener = useCallback(async () => { + if (!shouldRetryRef.current || !currentOnAudioDataRef.current) { + console.log('[AudioListener] Retry cancelled or no callback available'); + return; + } + + const currentAttempt = retryAttempts; + if (currentAttempt >= MAX_RETRY_ATTEMPTS) { + console.log(`[AudioListener] Maximum retry attempts (${MAX_RETRY_ATTEMPTS}) reached`); + setIsRetrying(false); + setIsListeningAudio(false); + Alert.alert( + 'Audio Listener Failed', + `Failed to start audio listener after ${MAX_RETRY_ATTEMPTS} attempts. Please try again manually.` + ); + return; + } + + console.log(`[AudioListener] Retry attempt ${currentAttempt + 1}/${MAX_RETRY_ATTEMPTS}`); + setRetryAttempts(currentAttempt + 1); + setIsRetrying(true); + + const success = await attemptStartAudioListener(currentOnAudioDataRef.current); + + if (success) { + console.log('[AudioListener] Retry successful'); + return; + } + + // If still should retry, schedule next attempt + if (shouldRetryRef.current) { + const delay = getRetryDelay(currentAttempt); + console.log(`[AudioListener] Scheduling retry in ${Math.round(delay)}ms`); + + retryTimeoutRef.current = setTimeout(() => { + if (shouldRetryRef.current) { + retryStartAudioListener(); + } + }, delay); + } + }, [retryAttempts, attemptStartAudioListener, getRetryDelay]); + + const startAudioListener = useCallback(async (onAudioData: (bytes: Uint8Array) => void) => { + if (!isConnected()) { + Alert.alert('Not Connected', 'Please connect to a device first to start audio listener.'); + return; + } + + if (isListeningAudio) { + console.log('[AudioListener] Audio listener is already active. Stopping first.'); + await stopAudioListener(); + } + + // Store the callback for retry attempts + currentOnAudioDataRef.current = onAudioData; + shouldRetryRef.current = true; + + setAudioPacketsReceived(0); // Reset counter on start + localPacketCounterRef.current = 0; + setRetryAttempts(0); + console.log('[AudioListener] Starting audio bytes listener...'); + + // Batch UI updates for packet counter + if (uiUpdateIntervalRef.current) clearInterval(uiUpdateIntervalRef.current); + uiUpdateIntervalRef.current = setInterval(() => { + if (localPacketCounterRef.current > 0) { + setAudioPacketsReceived(prev => prev + localPacketCounterRef.current); + localPacketCounterRef.current = 0; + } + }, 500); // Update UI every 500ms + + // Try to start audio listener + const success = await attemptStartAudioListener(onAudioData); + + if (!success && shouldRetryRef.current) { + console.log('[AudioListener] Initial attempt failed, starting retry mechanism'); + setIsRetrying(true); + // Start retry mechanism + retryStartAudioListener(); + } + }, [omiConnection, isConnected, stopAudioListener, attemptStartAudioListener, retryStartAudioListener]); + + // Cleanup on unmount + useEffect(() => { + return () => { + shouldRetryRef.current = false; + if (retryTimeoutRef.current) { + clearTimeout(retryTimeoutRef.current); + } + if (uiUpdateIntervalRef.current) { + clearInterval(uiUpdateIntervalRef.current); + } + }; + }, []); + + return { + isListeningAudio, + audioPacketsReceived, + startAudioListener, + stopAudioListener, + isRetrying, + retryAttempts, + }; +}; \ No newline at end of file diff --git a/app/app/hooks/useAudioStreamer.ts b/app/app/hooks/useAudioStreamer.ts new file mode 100644 index 00000000..51e9c8ce --- /dev/null +++ b/app/app/hooks/useAudioStreamer.ts @@ -0,0 +1,388 @@ +// useAudioStreamer.ts +import { useState, useRef, useCallback, useEffect } from 'react'; +import { PermissionsAndroid, Platform } from 'react-native'; +import notifee, { AndroidImportance } from '@notifee/react-native'; +import NetInfo from '@react-native-community/netinfo'; + +interface UseAudioStreamer { + isStreaming: boolean; + isConnecting: boolean; + error: string | null; + startStreaming: (url: string) => Promise; + getWebSocketReadyState: () => number | undefined; + stopStreaming: () => void; + sendAudio: (audioBytes: Uint8Array) => void; +} + +// Wyoming Protocol Types +interface WyomingEvent { + type: string; + data?: any; + version?: string; + payload_length?: number | null; +} + +// Audio format constants (matching OMI device format) +const AUDIO_FORMAT = { + rate: 16000, + width: 2, + channels: 1, +}; + +/** -------------------- Foreground Service helpers (NEW) -------------------- */ + +const FGS_CHANNEL_ID = 'ws_channel'; +const FGS_NOTIFICATION_ID = 'ws_foreground'; + +// Notifee requires registering the foreground service task once. +let _fgsRegistered = false; +function ensureFgsRegistered() { + if (_fgsRegistered) return; + notifee.registerForegroundService(async () => { + // Keep this task alive as long as any foreground notification is active. + return new Promise(() => {}); + }); + _fgsRegistered = true; +} + +async function ensureNotificationPermission() { + if (Platform.OS === 'android' && Platform.Version >= 33) { + await PermissionsAndroid.request( + PermissionsAndroid.PERMISSIONS.POST_NOTIFICATIONS + ); + } +} + +async function startForegroundServiceNotification(title: string, body: string) { + ensureFgsRegistered(); + await ensureNotificationPermission(); + + // Create channel if needed + await notifee.createChannel({ + id: FGS_CHANNEL_ID, + name: 'Streaming', + importance: AndroidImportance.LOW, + }); + + // Start (or update) the foreground notification + await notifee.displayNotification({ + id: FGS_NOTIFICATION_ID, + title, + body, + android: { + channelId: FGS_CHANNEL_ID, + asForegroundService: true, + ongoing: true, + pressAction: { id: 'default' }, + }, + }); +} + +async function stopForegroundServiceNotification() { + try { + await notifee.stopForegroundService(); + } catch {} + try { + await notifee.cancelNotification(FGS_NOTIFICATION_ID); + } catch {} +} + +/** -------------------- Hook -------------------- */ + +export const useAudioStreamer = (): UseAudioStreamer => { + const [isStreaming, setIsStreaming] = useState(false); + const [isConnecting, setIsConnecting] = useState(false); + const [error, setError] = useState(null); + + const websocketRef = useRef(null); + const manuallyStoppedRef = useRef(false); + const reconnectTimeoutRef = useRef(null); + const heartbeatRef = useRef(null); + const currentUrlRef = useRef(''); + + // backoff: 3s, 6s, 12s, ... capped at 30s; up to 10 attempts before showing an error notification + const reconnectAttemptsRef = useRef(0); + const MAX_RECONNECT_ATTEMPTS = 10; + const BASE_RECONNECT_MS = 3000; + const MAX_RECONNECT_MS = 30000; + const HEARTBEAT_MS = 25000; + + // Guard state updates after unmount + const mountedRef = useRef(true); + useEffect(() => { + return () => { + mountedRef.current = false; + }; + }, []); + + const setStateSafe = useCallback((setter: (v: T) => void, val: T) => { + if (mountedRef.current) setter(val); + }, []); + + // Helper: background-safe, optional notification for errors/info (NEW) + const notifyInfo = useCallback(async (title: string, body: string) => { + try { + await notifee.displayNotification({ + title, + body, + android: { channelId: FGS_CHANNEL_ID }, + }); + } catch { + // ignore if not available + } + }, []); + + // Helper: send Wyoming protocol events (UNCHANGED logic) + const sendWyomingEvent = useCallback(async (event: WyomingEvent, payload?: Uint8Array) => { + if (!websocketRef.current || websocketRef.current.readyState !== WebSocket.OPEN) { + console.log('[AudioStreamer] WebSocket not ready for Wyoming event'); + return; + } + try { + event.version = '1.0.0'; + event.payload_length = payload ? payload.length : null; + + const jsonHeader = JSON.stringify(event) + '\n'; + websocketRef.current.send(jsonHeader); + if (payload?.length) websocketRef.current.send(payload); + } catch (e) { + const errorMessage = (e as any).message || 'Error sending Wyoming event.'; + console.error('[AudioStreamer] Error sending Wyoming event:', errorMessage); + setStateSafe(setError, errorMessage); + } + }, [setStateSafe]); + + // Stop (CHANGED): use explicit close code & reason; clear heartbeat; stop FGS + const stopStreaming = useCallback(async () => { + manuallyStoppedRef.current = true; + + if (reconnectTimeoutRef.current) { + clearTimeout(reconnectTimeoutRef.current); + reconnectTimeoutRef.current = null; + } + if (heartbeatRef.current) { + clearInterval(heartbeatRef.current); + heartbeatRef.current = null; + } + + if (websocketRef.current) { + try { + // Send audio-stop best-effort + if (websocketRef.current.readyState === WebSocket.OPEN) { + const audioStopEvent: WyomingEvent = { type: 'audio-stop', data: { timestamp: Date.now() } }; + await sendWyomingEvent(audioStopEvent); + } + } catch {} + try { + websocketRef.current.close(1000, 'manual-stop'); // <— explicit manual reason + } catch {} + websocketRef.current = null; + } + + setStateSafe(setIsStreaming, false); + setStateSafe(setIsConnecting, false); + await stopForegroundServiceNotification(); + }, [sendWyomingEvent, setStateSafe]); + + // Reconnect (CHANGED): exponential backoff + no Alerts + optional notification on max attempts + const attemptReconnect = useCallback(() => { + if (manuallyStoppedRef.current || !currentUrlRef.current) { + console.log('[AudioStreamer] Not reconnecting: manually stopped or missing URL'); + return; + } + if (reconnectAttemptsRef.current >= MAX_RECONNECT_ATTEMPTS) { + console.log('[AudioStreamer] Reconnect attempts exhausted'); + notifyInfo('Connection lost', 'Failed to reconnect after multiple attempts.'); + manuallyStoppedRef.current = true; + setStateSafe(setIsStreaming, false); + setStateSafe(setIsConnecting, false); + return; + } + + const attempt = reconnectAttemptsRef.current + 1; + const delay = Math.min(MAX_RECONNECT_MS, BASE_RECONNECT_MS * Math.pow(2, reconnectAttemptsRef.current)); + reconnectAttemptsRef.current = attempt; + + console.log(`[AudioStreamer] Reconnect attempt ${attempt}/${MAX_RECONNECT_ATTEMPTS} in ${delay}ms`); + + if (reconnectTimeoutRef.current) clearTimeout(reconnectTimeoutRef.current); + setStateSafe(setIsConnecting, true); + + reconnectTimeoutRef.current = setTimeout(() => { + if (!manuallyStoppedRef.current) { + startStreaming(currentUrlRef.current) + .catch(err => { + console.error('[AudioStreamer] Reconnection failed:', err?.message || err); + attemptReconnect(); + }); + } + }, delay); + }, [notifyInfo, setStateSafe]); + + // Start (CHANGED): start/refresh FGS before connecting; remove Alerts; set heartbeat + const startStreaming = useCallback(async (url: string): Promise => { + const trimmed = (url || '').trim(); + if (!trimmed) { + const errorMsg = 'WebSocket URL is required.'; + setStateSafe(setError, errorMsg); + return Promise.reject(new Error(errorMsg)); + } + + currentUrlRef.current = trimmed; + manuallyStoppedRef.current = false; + + // Network gate + const netState = await NetInfo.fetch(); + if (!netState.isConnected || !netState.isInternetReachable) { + const errorMsg = 'No internet connection.'; + setStateSafe(setError, errorMsg); + return Promise.reject(new Error(errorMsg)); + } + + // Ensure Foreground Service is up so the JS VM isn’t killed when backgrounded + await startForegroundServiceNotification('Streaming active', 'Keeping WebSocket connection alive'); + + console.log(`[AudioStreamer] Initializing WebSocket: ${trimmed}`); + if (websocketRef.current) await stopStreaming(); // close any existing + + setStateSafe(setIsConnecting, true); + setStateSafe(setError, null); + + return new Promise((resolve, reject) => { + try { + const ws = new WebSocket(trimmed); + + ws.onopen = async () => { + console.log('[AudioStreamer] WebSocket open'); + websocketRef.current = ws; + reconnectAttemptsRef.current = 0; + setStateSafe(setIsConnecting, false); + setStateSafe(setIsStreaming, true); + setStateSafe(setError, null); + + // Start heartbeat + if (heartbeatRef.current) clearInterval(heartbeatRef.current); + heartbeatRef.current = setInterval(() => { + try { + if (websocketRef.current?.readyState === WebSocket.OPEN) { + websocketRef.current.send(JSON.stringify({ type: 'ping', t: Date.now() })); + } + } catch {} + }, HEARTBEAT_MS); + + try { + const audioStartEvent: WyomingEvent = { type: 'audio-start', data: AUDIO_FORMAT }; + console.log('[AudioStreamer] Sending audio-start event'); + await sendWyomingEvent(audioStartEvent); + console.log('[AudioStreamer] ✅ audio-start sent successfully'); + } catch (e) { + console.error('[AudioStreamer] audio-start failed:', e); + } + + resolve(); + }; + + ws.onmessage = (event) => { + // Handle server messages if needed + console.log('[AudioStreamer] Message:', event.data); + }; + + ws.onerror = (e) => { + const msg = (e as any).message || 'WebSocket connection error.'; + console.error('[AudioStreamer] Error:', msg); + setStateSafe(setError, msg); + setStateSafe(setIsConnecting, false); + setStateSafe(setIsStreaming, false); + if (websocketRef.current === ws) websocketRef.current = null; + reject(new Error(msg)); + }; + + ws.onclose = (event) => { + console.log('[AudioStreamer] Closed. Code:', event.code, 'Reason:', event.reason); + const isManual = event.code === 1000 && event.reason === 'manual-stop'; + + setStateSafe(setIsConnecting, false); + setStateSafe(setIsStreaming, false); + + if (websocketRef.current === ws) websocketRef.current = null; + + if (!isManual && !manuallyStoppedRef.current) { + setStateSafe(setError, 'Connection closed; attempting to reconnect.'); + attemptReconnect(); + } + }; + } catch (e) { + const msg = (e as any).message || 'Failed to create WebSocket.'; + console.error('[AudioStreamer] Create WS error:', msg); + setStateSafe(setError, msg); + setStateSafe(setIsConnecting, false); + setStateSafe(setIsStreaming, false); + reject(new Error(msg)); + } + }); + }, [attemptReconnect, sendWyomingEvent, setStateSafe, stopStreaming]); + + const sendAudio = useCallback(async (audioBytes: Uint8Array) => { + if (websocketRef.current && websocketRef.current.readyState === WebSocket.OPEN && audioBytes.length > 0) { + try { + console.log(`[AudioStreamer] 📤 Sending audio chunk: ${audioBytes.length} bytes`); + const audioChunkEvent: WyomingEvent = { type: 'audio-chunk', data: AUDIO_FORMAT }; + await sendWyomingEvent(audioChunkEvent, audioBytes); + } catch (e) { + const msg = (e as any).message || 'Error sending audio data.'; + console.error('[AudioStreamer] sendAudio error:', msg); + setStateSafe(setError, msg); + } + } else { + console.log( + `[AudioStreamer] NOT sending audio. hasWS=${!!websocketRef.current + } ready=${websocketRef.current?.readyState === WebSocket.OPEN + } bytes=${audioBytes.length} actualReady=${websocketRef.current?.readyState}` + ); + } + }, [sendWyomingEvent, setStateSafe]); + + const getWebSocketReadyState = useCallback(() => websocketRef.current?.readyState, []); + + /** Connectivity-triggered reconnect (NEW) */ + useEffect(() => { + const sub = NetInfo.addEventListener(state => { + const online = !!state.isConnected && !!state.isInternetReachable; + if (online && !manuallyStoppedRef.current) { + // If socket isn’t open, try to reconnect with backoff + const ready = websocketRef.current?.readyState; + if (ready !== WebSocket.OPEN && currentUrlRef.current) { + console.log('[AudioStreamer] Network back; scheduling reconnect'); + attemptReconnect(); + } + } + }); + return () => sub(); + }, [attemptReconnect]); + + /** Cleanup on unmount (CHANGED): don’t auto-stop streaming; just clear timers */ + useEffect(() => { + return () => { + if (reconnectTimeoutRef.current) { + clearTimeout(reconnectTimeoutRef.current); + reconnectTimeoutRef.current = null; + } + if (heartbeatRef.current) { + clearInterval(heartbeatRef.current); + heartbeatRef.current = null; + } + // Intentionally NOT calling stopStreaming() to allow background persistence. + // The owner (screen/app) should call stopStreaming() explicitly when the session ends. + }; + }, []); + + return { + isStreaming, + isConnecting, + error, + startStreaming, + getWebSocketReadyState, + stopStreaming, + sendAudio, + }; +}; diff --git a/friend-lite/app/hooks/useBluetoothManager.ts b/app/app/hooks/useBluetoothManager.ts similarity index 100% rename from friend-lite/app/hooks/useBluetoothManager.ts rename to app/app/hooks/useBluetoothManager.ts diff --git a/friend-lite/app/hooks/useDeviceConnection.ts b/app/app/hooks/useDeviceConnection.ts similarity index 98% rename from friend-lite/app/hooks/useDeviceConnection.ts rename to app/app/hooks/useDeviceConnection.ts index 65d0dd2f..e729169e 100644 --- a/friend-lite/app/hooks/useDeviceConnection.ts +++ b/app/app/hooks/useDeviceConnection.ts @@ -1,6 +1,6 @@ import { useState, useCallback } from 'react'; import { Alert } from 'react-native'; -import { OmiConnection, BleAudioCodec, OmiDevice } from '@omiai/omi-react-native'; +import { OmiConnection, BleAudioCodec, OmiDevice } from 'friend-lite-react-native'; interface UseDeviceConnection { connectedDevice: OmiDevice | null; diff --git a/friend-lite/app/hooks/useDeviceScanning.ts b/app/app/hooks/useDeviceScanning.ts similarity index 95% rename from friend-lite/app/hooks/useDeviceScanning.ts rename to app/app/hooks/useDeviceScanning.ts index 71a9c177..d7780266 100644 --- a/friend-lite/app/hooks/useDeviceScanning.ts +++ b/app/app/hooks/useDeviceScanning.ts @@ -1,7 +1,6 @@ import { useState, useEffect, useCallback, useRef } from 'react'; -import { Alert, Linking, Platform } from 'react-native'; -import { BleManager, Device, State as BluetoothState } from 'react-native-ble-plx'; -import { OmiConnection, OmiDevice } from '@omiai/omi-react-native'; // Assuming this is the correct import for Omi types +import { BleManager, State as BluetoothState } from 'react-native-ble-plx'; +import { OmiConnection, OmiDevice } from 'friend-lite-react-native'; // Assuming this is the correct import for Omi types interface UseDeviceScanning { devices: OmiDevice[]; diff --git a/app/app/hooks/usePhoneAudioRecorder.ts b/app/app/hooks/usePhoneAudioRecorder.ts new file mode 100644 index 00000000..9d422c38 --- /dev/null +++ b/app/app/hooks/usePhoneAudioRecorder.ts @@ -0,0 +1,254 @@ +// usePhoneAudioRecorder.ts +import { useState, useRef, useCallback, useEffect } from 'react'; +import { Alert, Platform } from 'react-native'; +import { + useAudioRecorder, + AudioRecording, + AudioAnalysis, + ExpoAudioStreamModule, +} from '@siteed/expo-audio-studio'; +import type { AudioDataEvent } from '@siteed/expo-audio-studio'; +import base64 from 'react-native-base64'; + + +interface UsePhoneAudioRecorder { + isRecording: boolean; + isInitializing: boolean; + error: string | null; + audioLevel: number; + startRecording: (onAudioData: (pcmBuffer: Uint8Array) => void) => Promise; + stopRecording: () => Promise; +} + +// Audio format constants matching backend expectations +const RECORDING_CONFIG = { + sampleRate: 16000 as const, // 16kHz for backend compatibility + channels: 1 as const, // Mono + encoding: 'pcm_16bit' as const, // 16-bit PCM + interval: 100, // Send audio every 100ms + intervalAnalysis: 100, // Analysis every 100ms +}; + +export const usePhoneAudioRecorder = (): UsePhoneAudioRecorder => { + const [isInitializing, setIsInitializing] = useState(false); + const [error, setError] = useState(null); + const [audioLevel, setAudioLevel] = useState(0); + + const onAudioDataRef = useRef<((pcmBuffer: Uint8Array) => void) | null>(null); + const mountedRef = useRef(true); + + // Use the expo-audio-studio hook + const { + startRecording: startRecorderInternal, + stopRecording: stopRecorderInternal, + isRecording, + pauseRecording, + resumeRecording, + analysisData, + } = useAudioRecorder(); + + // Convert AudioDataEvent to PCM buffer + const processAudioDataEvent = useCallback((event: AudioDataEvent): Uint8Array | null => { + try { + const audioData = event.data; + console.log('[PhoneAudioRecorder] processAudioDataEvent called, data type:', typeof audioData); + + if (typeof audioData === 'string') { + // Base64 encoded data (native platforms) - decode using react-native-base64 + console.log('[PhoneAudioRecorder] Decoding Base64 string, length:', audioData.length); + const binaryString = base64.decode(audioData); + console.log('[PhoneAudioRecorder] Decoded to binary string, length:', binaryString.length); + + const bytes = new Uint8Array(binaryString.length); + for (let i = 0; i < binaryString.length; i++) { + bytes[i] = binaryString.charCodeAt(i); + } + console.log('[PhoneAudioRecorder] Created Uint8Array, length:', bytes.length); + return bytes; + } else if (audioData instanceof Float32Array) { + // Float32Array (web platform) - convert to 16-bit PCM + const int16Buffer = new Int16Array(audioData.length); + for (let i = 0; i < audioData.length; i++) { + // Convert float32 (-1 to 1) to int16 (-32768 to 32767) + const s = Math.max(-1, Math.min(1, audioData[i])); + int16Buffer[i] = s < 0 ? s * 0x8000 : s * 0x7FFF; + } + // Convert Int16Array to Uint8Array (little-endian) + const buffer = new ArrayBuffer(int16Buffer.length * 2); + const view = new DataView(buffer); + for (let i = 0; i < int16Buffer.length; i++) { + view.setInt16(i * 2, int16Buffer[i], true); // little-endian + } + return new Uint8Array(buffer); + } + return null; + } catch (error) { + console.error('[PhoneAudioRecorder] Audio conversion error:', error); + return null; + } + }, []); + + // Safe state setter + const setStateSafe = useCallback((setter: (v: T) => void, val: T) => { + if (mountedRef.current) setter(val); + }, []); + + // Check and request microphone permissions + const checkPermissions = useCallback(async (): Promise => { + try { + const { granted } = await ExpoAudioStreamModule.getPermissionsAsync(); + if (granted) { + return true; + } + + const { granted: newGranted } = await ExpoAudioStreamModule.requestPermissionsAsync(); + if (!newGranted) { + Alert.alert( + 'Microphone Permission Required', + 'Please enable microphone access in your device settings to use phone audio streaming.', + [{ text: 'OK' }] + ); + return false; + } + return true; + } catch (error) { + console.error('[PhoneAudioRecorder] Permission check error:', error); + return false; + } + }, []); + + // Start recording from phone microphone - EXACT 2025 guide pattern + const startRecording = useCallback(async (onAudioData: (pcmBuffer: Uint8Array) => void): Promise => { + if (isRecording) { + console.log('[PhoneAudioRecorder] Already recording, stopping first...'); + await stopRecording(); + } + + setStateSafe(setIsInitializing, true); + setStateSafe(setError, null); + onAudioDataRef.current = onAudioData; + + try { + // EXACT permission check from guide + const { granted } = await ExpoAudioStreamModule.requestPermissionsAsync(); + if (!granted) { + throw new Error('Microphone permission denied'); + } + + console.log('[PhoneAudioRecorder] Starting audio recording...'); + + // EXACT config from 2025 guide + processing for audio levels + const config = { + interval: 100, + sampleRate: 16000, + channels: 1, + encoding: "pcm_16bit" as const, + enableProcessing: true, // Enable audio analysis for live RMS + intervalAnalysis: 500, // Analysis every 500ms + onAudioStream: async (event: AudioDataEvent) => { + // EXACT payload handling from guide + const payload = typeof event.data === "string" + ? event.data + : Buffer.from(event.data as ArrayBuffer).toString("base64"); + + // Convert to our expected format + if (onAudioDataRef.current && mountedRef.current) { + const pcmBuffer = processAudioDataEvent(event); + if (pcmBuffer && pcmBuffer.length > 0) { + onAudioDataRef.current(pcmBuffer); + } + } + } + }; + + const result = await startRecorderInternal(config); + + if (!result) { + throw new Error('Failed to start recording'); + } + + setStateSafe(setIsInitializing, false); + console.log('[PhoneAudioRecorder] Recording started successfully'); + + } catch (error) { + const errorMessage = (error as any).message || 'Failed to start recording'; + console.error('[PhoneAudioRecorder] Start recording error:', errorMessage); + setStateSafe(setError, errorMessage); + setStateSafe(setIsInitializing, false); + onAudioDataRef.current = null; + + throw new Error(errorMessage); + } + }, [isRecording, startRecorderInternal, processAudioDataEvent, setStateSafe]); + + // Stop recording + const stopRecording = useCallback(async (): Promise => { + console.log('[PhoneAudioRecorder] Stopping recording...'); + + // Early return if not recording + if (!isRecording) { + console.log('[PhoneAudioRecorder] Not recording, nothing to stop'); + onAudioDataRef.current = null; + setStateSafe(setAudioLevel, 0); + setStateSafe(setIsInitializing, false); + return; + } + + onAudioDataRef.current = null; + setStateSafe(setAudioLevel, 0); + + try { + const result = await stopRecorderInternal(); + console.log('[PhoneAudioRecorder] Recording stopped'); + } catch (error) { + // Only log error if it's not about recording being inactive + const errorMessage = (error as any).message || ''; + if (!errorMessage.includes('Recording is not active') && !errorMessage.includes('not active')) { + console.error('[PhoneAudioRecorder] Stop recording error:', error); + setStateSafe(setError, 'Failed to stop recording'); + } else { + console.log('[PhoneAudioRecorder] Recording was already inactive'); + } + } + + setStateSafe(setIsInitializing, false); + }, [isRecording, stopRecorderInternal, setStateSafe]); + + // Update audio level from analysis data + useEffect(() => { + if (analysisData?.dataPoints && analysisData.dataPoints.length > 0 && mountedRef.current) { + const latestDataPoint = analysisData.dataPoints[analysisData.dataPoints.length - 1]; + const liveRMS = latestDataPoint.rms; + setStateSafe(setAudioLevel, liveRMS); + } + }, [analysisData, setStateSafe]); + + // Cleanup on unmount - NO dependencies so it only runs on true unmount + useEffect(() => { + return () => { + mountedRef.current = false; + console.log('[PhoneAudioRecorder] Component unmounting, setting mountedRef to false'); + }; + }, []); // Empty dependency array - only runs on mount/unmount + + // Separate effect for stopping recording when needed + useEffect(() => { + return () => { + // Stop recording if active when dependencies change + if (isRecording) { + stopRecorderInternal().catch(err => + console.error('[PhoneAudioRecorder] Cleanup stop error:', err) + ); + } + }; + }, [isRecording, stopRecorderInternal]); + + return { + isRecording, + isInitializing, + error, + audioLevel, + startRecording, + stopRecording, + }; +}; \ No newline at end of file diff --git a/app/app/index.tsx b/app/app/index.tsx new file mode 100644 index 00000000..8bb1234a --- /dev/null +++ b/app/app/index.tsx @@ -0,0 +1,817 @@ +import React, { useRef, useCallback, useEffect, useState } from 'react'; +import { StyleSheet, Text, View, SafeAreaView, ScrollView, Platform, FlatList, ActivityIndicator, Alert, Switch, Button, TouchableOpacity, KeyboardAvoidingView } from 'react-native'; +import { OmiConnection } from 'friend-lite-react-native'; // OmiDevice also comes from here +import { State as BluetoothState } from 'react-native-ble-plx'; // Import State from ble-plx + +// Hooks +import { useBluetoothManager } from './hooks/useBluetoothManager'; +import { useDeviceScanning } from './hooks/useDeviceScanning'; +import { useDeviceConnection } from './hooks/useDeviceConnection'; +import { + saveLastConnectedDeviceId, + getLastConnectedDeviceId, + saveWebSocketUrl, + getWebSocketUrl, + saveUserId, + getUserId, + getAuthEmail, + getJwtToken, +} from './utils/storage'; +import { useAudioListener } from './hooks/useAudioListener'; +import { useAudioStreamer } from './hooks/useAudioStreamer'; +import { usePhoneAudioRecorder } from './hooks/usePhoneAudioRecorder'; + +// Components +import BluetoothStatusBanner from './components/BluetoothStatusBanner'; +import ScanControls from './components/ScanControls'; +import DeviceListItem from './components/DeviceListItem'; +import DeviceDetails from './components/DeviceDetails'; +import AuthSection from './components/AuthSection'; +import BackendStatus from './components/BackendStatus'; +import PhoneAudioButton from './components/PhoneAudioButton'; + +export default function App() { + // Initialize OmiConnection + const omiConnection = useRef(new OmiConnection()).current; + + // Filter state + const [showOnlyOmi, setShowOnlyOmi] = useState(false); + + // State for remembering the last connected device + const [lastKnownDeviceId, setLastKnownDeviceId] = useState(null); + const [isAttemptingAutoReconnect, setIsAttemptingAutoReconnect] = useState(false); + const [triedAutoReconnectForCurrentId, setTriedAutoReconnectForCurrentId] = useState(false); + + // State for WebSocket URL for custom audio streaming + const [webSocketUrl, setWebSocketUrl] = useState(''); + + // State for User ID + const [userId, setUserId] = useState(''); + + // Authentication state + const [isAuthenticated, setIsAuthenticated] = useState(false); + const [currentUserEmail, setCurrentUserEmail] = useState(null); + const [jwtToken, setJwtToken] = useState(null); + + // Bluetooth Management Hook + const { + bleManager, + bluetoothState, + permissionGranted, + requestBluetoothPermission, + isPermissionsLoading, + } = useBluetoothManager(); + + // Custom Audio Streamer Hook + const audioStreamer = useAudioStreamer(); + + // Phone Audio Recorder Hook + const phoneAudioRecorder = usePhoneAudioRecorder(); + const [isPhoneAudioMode, setIsPhoneAudioMode] = useState(false); + + + const { + isListeningAudio: isOmiAudioListenerActive, + audioPacketsReceived, + startAudioListener: originalStartAudioListener, + stopAudioListener: originalStopAudioListener, + isRetrying: isAudioListenerRetrying, + retryAttempts: audioListenerRetryAttempts, + } = useAudioListener( + omiConnection, + () => !!deviceConnection.connectedDeviceId + ); + + // Refs to hold the current state for onDeviceDisconnect without causing re-memoization + const isOmiAudioListenerActiveRef = useRef(isOmiAudioListenerActive); + const isAudioStreamingRef = useRef(audioStreamer.isStreaming); + + useEffect(() => { + isOmiAudioListenerActiveRef.current = isOmiAudioListenerActive; + }, [isOmiAudioListenerActive]); + + useEffect(() => { + isAudioStreamingRef.current = audioStreamer.isStreaming; + }, [audioStreamer.isStreaming]); + + // Now define the stable onDeviceConnect and onDeviceDisconnect callbacks + const onDeviceConnect = useCallback(async () => { + console.log('[App.tsx] Device connected callback.'); + const deviceIdToSave = omiConnection.connectedDeviceId; // Corrected: Use property from OmiConnection instance + + if (deviceIdToSave) { + console.log('[App.tsx] Saving connected device ID to storage:', deviceIdToSave); + await saveLastConnectedDeviceId(deviceIdToSave); + setLastKnownDeviceId(deviceIdToSave); // Update state for consistency + setTriedAutoReconnectForCurrentId(false); // Reset if a new device connects successfully + } else { + console.warn('[App.tsx] onDeviceConnect: Could not determine connected device ID to save. omiConnection.connectedDeviceId was null/undefined.'); + } + // Actions on connect (e.g., auto-fetch codec/battery) + }, [omiConnection]); // saveLastConnectedDeviceId is stable, omiConnection is stable ref + + const onDeviceDisconnect = useCallback(async () => { + console.log('[App.tsx] Device disconnected callback.'); + if (isOmiAudioListenerActiveRef.current) { + console.log('[App.tsx] Disconnect: Stopping audio listener.'); + await originalStopAudioListener(); + } + if (isAudioStreamingRef.current) { + console.log('[App.tsx] Disconnect: Stopping custom audio streaming.'); + audioStreamer.stopStreaming(); + } + // Also stop phone audio if it's running + if (phoneAudioRecorder.isRecording) { + console.log('[App.tsx] Disconnect: Stopping phone audio recording.'); + await phoneAudioRecorder.stopRecording(); + setIsPhoneAudioMode(false); + } + }, [originalStopAudioListener, audioStreamer.stopStreaming, phoneAudioRecorder.stopRecording, phoneAudioRecorder.isRecording, setIsPhoneAudioMode]); + + // Initialize Device Connection hook, passing the memoized callbacks + const deviceConnection = useDeviceConnection( + omiConnection, + onDeviceDisconnect, + onDeviceConnect + ); + + // Effect to load settings on app startup + useEffect(() => { + const loadSettings = async () => { + const deviceId = await getLastConnectedDeviceId(); + if (deviceId) { + console.log('[App.tsx] Loaded last known device ID from storage:', deviceId); + setLastKnownDeviceId(deviceId); + setTriedAutoReconnectForCurrentId(false); + } else { + console.log('[App.tsx] No last known device ID found in storage. Auto-reconnect will not be attempted.'); + setLastKnownDeviceId(null); // Explicitly ensure it's null + setTriedAutoReconnectForCurrentId(true); // Mark that we shouldn't try (as no ID is known) + } + + const storedWsUrl = await getWebSocketUrl(); + if (storedWsUrl) { + console.log('[App.tsx] Loaded WebSocket URL from storage:', storedWsUrl); + setWebSocketUrl(storedWsUrl); + } else { + // Set default to simple backend + const defaultUrl = 'ws://localhost:8000/ws'; + console.log('[App.tsx] No stored WebSocket URL, setting default for simple backend:', defaultUrl); + setWebSocketUrl(defaultUrl); + await saveWebSocketUrl(defaultUrl); + } + + const storedUserId = await getUserId(); + if (storedUserId) { + console.log('[App.tsx] Loaded User ID from storage:', storedUserId); + setUserId(storedUserId); + } + + // Load authentication data + const storedEmail = await getAuthEmail(); + const storedToken = await getJwtToken(); + if (storedEmail && storedToken) { + console.log('[App.tsx] Loaded auth data from storage for:', storedEmail); + setCurrentUserEmail(storedEmail); + setJwtToken(storedToken); + setIsAuthenticated(true); + } + }; + loadSettings(); + }, []); + + + // Device Scanning Hook + const { + devices: scannedDevices, + scanning, + startScan, + stopScan: stopDeviceScanAction, + } = useDeviceScanning( + bleManager, // From useBluetoothManager + omiConnection, + permissionGranted, // From useBluetoothManager + bluetoothState === BluetoothState.PoweredOn, // Derived from useBluetoothManager + requestBluetoothPermission // From useBluetoothManager, should be stable + ); + + // Effect for attempting auto-reconnection + useEffect(() => { + if ( + bluetoothState === BluetoothState.PoweredOn && + permissionGranted && + lastKnownDeviceId && + !deviceConnection.connectedDeviceId && // Only if not already connected + !deviceConnection.isConnecting && // Only if not currently trying to connect by other means + !scanning && // Only if not currently scanning + !isAttemptingAutoReconnect && // Only if not already attempting auto-reconnect + !triedAutoReconnectForCurrentId // Only try once per loaded/set lastKnownDeviceId + ) { + const attemptAutoConnect = async () => { + console.log(`[App.tsx] Attempting to auto-reconnect to device: ${lastKnownDeviceId}`); + setIsAttemptingAutoReconnect(true); + setTriedAutoReconnectForCurrentId(true); // Mark that we've initiated an attempt for this ID + try { + // useDeviceConnection.connectToDevice can take a device ID string directly + await deviceConnection.connectToDevice(lastKnownDeviceId); + // If connectToDevice throws, catch block handles it. + // If it resolves, the connection attempt was made. + // The onDeviceConnect callback will be triggered if successful. + console.log(`[App.tsx] Auto-reconnect attempt initiated for ${lastKnownDeviceId}. Waiting for connection event.`); + // Removed the if(success) block as connectToDevice is void + } catch (error) { + console.error(`[App.tsx] Error auto-reconnecting to ${lastKnownDeviceId}:`, error); + // Clear the problematic device ID from storage and state + if (lastKnownDeviceId) { // Ensure we have an ID to clear + console.log(`[App.tsx] Clearing problematic device ID ${lastKnownDeviceId} from storage due to auto-reconnect failure.`); + await saveLastConnectedDeviceId(null); // Clears from AsyncStorage + setLastKnownDeviceId(null); // Clears from current app state + } + } finally { + setIsAttemptingAutoReconnect(false); + } + }; + attemptAutoConnect(); + } + }, [ + bluetoothState, + permissionGranted, + lastKnownDeviceId, + deviceConnection.connectedDeviceId, + deviceConnection.isConnecting, + scanning, + deviceConnection.connectToDevice, // Stable function from the hook + triedAutoReconnectForCurrentId, + isAttemptingAutoReconnect, // Added to prevent re-triggering while one is in progress + // Added saveLastConnectedDeviceId and setLastKnownDeviceId to dependency array if they were not already implicitly covered + // saveLastConnectedDeviceId is an import, setLastKnownDeviceId is a state setter - typically stable + ]); + + const handleStartAudioListeningAndStreaming = useCallback(async () => { + if (!webSocketUrl || webSocketUrl.trim() === '') { + Alert.alert('WebSocket URL Required', 'Please enter the WebSocket URL for streaming.'); + return; + } + if (!omiConnection.isConnected() || !deviceConnection.connectedDeviceId) { + Alert.alert('Device Not Connected', 'Please connect to an OMI device first.'); + return; + } + + try { + let finalWebSocketUrl = webSocketUrl.trim(); + + // Check if this is the advanced backend (requires authentication) or simple backend + const isAdvancedBackend = jwtToken && isAuthenticated; + + if (isAdvancedBackend) { + // Advanced backend: include JWT token and device parameters + const params = new URLSearchParams(); + params.append('token', jwtToken); + + if (userId && userId.trim() !== '') { + params.append('device_name', userId.trim()); + console.log('[App.tsx] Using advanced backend with token and device_name:', userId.trim()); + } else { + params.append('device_name', 'phone'); // Default device name + console.log('[App.tsx] Using advanced backend with token and default device_name'); + } + + const separator = webSocketUrl.includes('?') ? '&' : '?'; + finalWebSocketUrl = `${webSocketUrl}${separator}${params.toString()}`; + console.log('[App.tsx] Advanced backend WebSocket URL constructed (token hidden for security)'); + } else { + // Simple backend: use URL as-is without authentication + console.log('[App.tsx] Using simple backend without authentication:', finalWebSocketUrl); + } + + // Start custom WebSocket streaming first + await audioStreamer.startStreaming(finalWebSocketUrl); + + // Then start OMI audio listener + await originalStartAudioListener(async (audioBytes) => { + const wsReadyState = audioStreamer.getWebSocketReadyState(); + if (wsReadyState === WebSocket.OPEN && audioBytes.length > 0) { + await audioStreamer.sendAudio(audioBytes); + } + }); + } catch (error) { + console.error('[App.tsx] Error starting audio listening/streaming:', error); + Alert.alert('Error', 'Could not start audio listening or streaming.'); + // Ensure cleanup if one part started but the other failed + if (audioStreamer.isStreaming) audioStreamer.stopStreaming(); + } + }, [originalStartAudioListener, audioStreamer, webSocketUrl, userId, omiConnection, deviceConnection.connectedDeviceId, jwtToken, isAuthenticated]); + + const handleStopAudioListeningAndStreaming = useCallback(async () => { + console.log('[App.tsx] Stopping audio listening and streaming.'); + await originalStopAudioListener(); + audioStreamer.stopStreaming(); + }, [originalStopAudioListener, audioStreamer]); + + // Phone Audio Streaming Functions + const handleStartPhoneAudioStreaming = useCallback(async () => { + if (!webSocketUrl || webSocketUrl.trim() === '') { + Alert.alert('WebSocket URL Required', 'Please enter the WebSocket URL for streaming.'); + return; + } + + try { + let finalWebSocketUrl = webSocketUrl.trim(); + + // Convert HTTP/HTTPS to WS/WSS protocol + finalWebSocketUrl = finalWebSocketUrl.replace(/^http:/, 'ws:').replace(/^https:/, 'wss:'); + + // Ensure /ws_pcm endpoint is included + if (!finalWebSocketUrl.includes('/ws_pcm')) { + // Remove trailing slash if present, then add /ws_pcm + finalWebSocketUrl = finalWebSocketUrl.replace(/\/$/, '') + '/ws_pcm'; + } + + // Check if this is the advanced backend (requires authentication) or simple backend + const isAdvancedBackend = jwtToken && isAuthenticated; + + if (isAdvancedBackend) { + // Advanced backend: include JWT token and device parameters + const params = new URLSearchParams(); + params.append('token', jwtToken); + + const deviceName = userId && userId.trim() !== '' ? userId.trim() : 'phone-mic'; + params.append('device_name', deviceName); + console.log('[App.tsx] Using advanced backend with token and device_name:', deviceName); + + const separator = finalWebSocketUrl.includes('?') ? '&' : '?'; + finalWebSocketUrl = `${finalWebSocketUrl}${separator}${params.toString()}`; + console.log('[App.tsx] Advanced backend WebSocket URL constructed for phone audio'); + } else { + // Simple backend: use URL as-is without authentication + console.log('[App.tsx] Using simple backend without authentication for phone audio'); + } + + // Start WebSocket streaming first + await audioStreamer.startStreaming(finalWebSocketUrl); + + // Start phone audio recording + await phoneAudioRecorder.startRecording(async (pcmBuffer) => { + const wsReadyState = audioStreamer.getWebSocketReadyState(); + if (wsReadyState === WebSocket.OPEN && pcmBuffer.length > 0) { + await audioStreamer.sendAudio(pcmBuffer); + } + }); + + setIsPhoneAudioMode(true); + console.log('[App.tsx] Phone audio streaming started successfully'); + } catch (error) { + console.error('[App.tsx] Error starting phone audio streaming:', error); + Alert.alert('Error', 'Could not start phone audio streaming.'); + // Ensure cleanup if one part started but the other failed + if (audioStreamer.isStreaming) audioStreamer.stopStreaming(); + if (phoneAudioRecorder.isRecording) await phoneAudioRecorder.stopRecording(); + setIsPhoneAudioMode(false); + } + }, [audioStreamer, phoneAudioRecorder, webSocketUrl, userId, jwtToken, isAuthenticated]); + + const handleStopPhoneAudioStreaming = useCallback(async () => { + console.log('[App.tsx] Stopping phone audio streaming.'); + await phoneAudioRecorder.stopRecording(); + audioStreamer.stopStreaming(); + setIsPhoneAudioMode(false); + }, [phoneAudioRecorder, audioStreamer]); + + const handleTogglePhoneAudio = useCallback(async () => { + if (isPhoneAudioMode || phoneAudioRecorder.isRecording) { + await handleStopPhoneAudioStreaming(); + } else { + await handleStartPhoneAudioStreaming(); + } + }, [isPhoneAudioMode, phoneAudioRecorder.isRecording, handleStartPhoneAudioStreaming, handleStopPhoneAudioStreaming]); + + // Store stable references for cleanup + const cleanupRefs = useRef({ + omiConnection, + bleManager, + disconnectFromDevice: deviceConnection.disconnectFromDevice, + stopAudioStreaming: audioStreamer.stopStreaming, + stopPhoneAudio: phoneAudioRecorder.stopRecording, + }); + + // Update refs when functions change + useEffect(() => { + cleanupRefs.current = { + omiConnection, + bleManager, + disconnectFromDevice: deviceConnection.disconnectFromDevice, + stopAudioStreaming: audioStreamer.stopStreaming, + stopPhoneAudio: phoneAudioRecorder.stopRecording, + }; + }); + + // Cleanup only on actual unmount (no dependencies to avoid re-runs) + useEffect(() => { + return () => { + console.log('App unmounting - cleaning up OmiConnection, BleManager, AudioStreamer, and PhoneAudioRecorder'); + const refs = cleanupRefs.current; + + if (refs.omiConnection.isConnected()) { + refs.disconnectFromDevice().catch(err => console.error("Error disconnecting in cleanup:", err)); + } + if (refs.bleManager) { + refs.bleManager.destroy(); + } + refs.stopAudioStreaming(); + // Phone audio stopRecording now handles inactive state gracefully + refs.stopPhoneAudio().catch(err => console.error("Error stopping phone audio in cleanup:", err)); + }; + }, []); // Empty dependency array - only run on mount/unmount + + const canScan = React.useMemo(() => ( + permissionGranted && + bluetoothState === BluetoothState.PoweredOn && + !isAttemptingAutoReconnect && + !deviceConnection.isConnecting && + !deviceConnection.connectedDeviceId && + (triedAutoReconnectForCurrentId || !lastKnownDeviceId) + // Removed authentication requirement for scanning + ), [ + permissionGranted, + bluetoothState, + isAttemptingAutoReconnect, + deviceConnection.isConnecting, + deviceConnection.connectedDeviceId, + triedAutoReconnectForCurrentId, + lastKnownDeviceId, + ]); + + const filteredDevices = React.useMemo(() => { + if (!showOnlyOmi) { + return scannedDevices; + } + return scannedDevices.filter(device => { + const name = device.name?.toLowerCase() || ''; + return name.includes('omi') || name.includes('friend'); + }); + }, [scannedDevices, showOnlyOmi]); + + const handleSetAndSaveWebSocketUrl = useCallback(async (url: string) => { + setWebSocketUrl(url); + await saveWebSocketUrl(url); + }, []); + + const handleSetAndSaveUserId = useCallback(async (id: string) => { + setUserId(id); + await saveUserId(id || null); + }, []); + + // Authentication status change handler + const handleAuthStatusChange = useCallback((authenticated: boolean, email: string | null, token: string | null) => { + setIsAuthenticated(authenticated); + setCurrentUserEmail(email); + setJwtToken(token); + console.log('[App.tsx] Auth status changed:', { authenticated, email: email ? 'logged in' : 'logged out' }); + }, []); + + const handleCancelAutoReconnect = useCallback(async () => { + console.log('[App.tsx] Cancelling auto-reconnection attempt.'); + if (lastKnownDeviceId) { + // Clear the last known device ID to prevent further auto-reconnect attempts in this session + await saveLastConnectedDeviceId(null); + setLastKnownDeviceId(null); + setTriedAutoReconnectForCurrentId(true); // Mark as tried to prevent immediate re-trigger if conditions meet again + } + // Attempt to stop any ongoing connection process + // disconnectFromDevice also sets isConnecting to false internally. + await deviceConnection.disconnectFromDevice(); + setIsAttemptingAutoReconnect(false); // Explicitly set to false to hide the auto-reconnect screen + }, [deviceConnection, lastKnownDeviceId, saveLastConnectedDeviceId, setLastKnownDeviceId, setTriedAutoReconnectForCurrentId, setIsAttemptingAutoReconnect]); + + if (isPermissionsLoading && bluetoothState === BluetoothState.Unknown) { + return ( + + + + {isAttemptingAutoReconnect + ? `Attempting to reconnect to the last device (${lastKnownDeviceId ? lastKnownDeviceId.substring(0, 10) + '...' : ''})...` + : 'Initializing Bluetooth...'} + + + ); + } + + if (isAttemptingAutoReconnect) { + return ( + + + + + Attempting to reconnect to the last device ({lastKnownDeviceId ? lastKnownDeviceId.substring(0, 10) + '...' : ''})... + + + + {showTranscriptDropdown && versionHistory && ( +
e.stopPropagation()} + > +
+ {versionHistory.transcript_versions.map((version, index) => ( + + ))} +
+
+ )} + + )} + + {/* Memory Version Dropdown */} + {(versionInfo.memory_count || 0) > 1 && ( +
+ + + {showMemoryDropdown && versionHistory && ( +
e.stopPropagation()} + > +
+ {versionHistory.memory_versions.map((version, index) => ( + + ))} +
+
+ )} +
+ )} + + {loading && ( +
+ + Loading versions... +
+ )} + + ) +} \ No newline at end of file diff --git a/backends/advanced/webui/src/components/ConversationVersionHeader.tsx b/backends/advanced/webui/src/components/ConversationVersionHeader.tsx new file mode 100644 index 00000000..9e7c5e09 --- /dev/null +++ b/backends/advanced/webui/src/components/ConversationVersionHeader.tsx @@ -0,0 +1,111 @@ +import { useState } from 'react'; +import { RotateCcw } from 'lucide-react'; +import { conversationsApi } from '../services/api'; +import ConversationVersionDropdown from './ConversationVersionDropdown'; + +interface ConversationVersionHeaderProps { + conversationId: string; + versionInfo?: { + transcript_count: number; + memory_count: number; + active_transcript_version?: string; + active_memory_version?: string; + }; + onVersionChange?: () => void; +} + +export default function ConversationVersionHeader({ conversationId, versionInfo, onVersionChange }: ConversationVersionHeaderProps) { + const [loading, setLoading] = useState(false); + const [error, setError] = useState(null); + + const handleReprocessTranscript = async (event: React.MouseEvent) => { + event.preventDefault(); + event.stopPropagation(); + + try { + setLoading(true); + await conversationsApi.reprocessTranscript(conversationId); + onVersionChange?.(); + } catch (err) { + console.error('Failed to reprocess transcript:', err); + setError('Failed to reprocess transcript'); + } finally { + setLoading(false); + } + }; + + // If no version info provided, don't show anything + if (!versionInfo) return null; + + // Only show if there are multiple versions or reprocessing capability + if (versionInfo.transcript_count <= 1 && versionInfo.memory_count <= 1) { + return ( +
+
+
+ {versionInfo.transcript_count} transcript version, {versionInfo.memory_count} memory version +
+ +
+
+ ); + } + + // Show multiple version info with reprocess option and version selector + return ( +
+
+
+
+ {versionInfo.transcript_count} transcript versions, + {versionInfo.memory_count} memory versions + {error &&
{error}
} +
+ + {/* Version Selector Dropdowns */} + {})} + /> +
+ + +
+
+ ); +} \ No newline at end of file diff --git a/backends/advanced/webui/src/components/ErrorBoundary.tsx b/backends/advanced/webui/src/components/ErrorBoundary.tsx new file mode 100644 index 00000000..1b73a871 --- /dev/null +++ b/backends/advanced/webui/src/components/ErrorBoundary.tsx @@ -0,0 +1,138 @@ +import React, { Component, ReactNode } from 'react' +import { AlertTriangle, RefreshCw, Home } from 'lucide-react' + +interface Props { + children: ReactNode + fallback?: ReactNode +} + +interface State { + hasError: boolean + error?: Error + errorInfo?: React.ErrorInfo +} + +export class ErrorBoundary extends Component { + constructor(props: Props) { + super(props) + this.state = { hasError: false } + } + + static getDerivedStateFromError(error: Error): State { + return { hasError: true, error } + } + + componentDidCatch(error: Error, errorInfo: React.ErrorInfo) { + // Always log errors to console for debugging + console.error('❌ ErrorBoundary caught an error:', error) + console.error('📍 Component stack:', errorInfo.componentStack) + console.error('📝 Error stack:', error.stack) + + this.setState({ + error, + errorInfo + }) + } + + handleRetry = () => { + this.setState({ hasError: false, error: undefined, errorInfo: undefined }) + } + + handleGoHome = () => { + window.location.href = '/' + } + + render() { + if (this.state.hasError) { + if (this.props.fallback) { + return this.props.fallback + } + + return ( +
+
+
+ +
+ +

+ Something went wrong +

+ +

+ An unexpected error occurred. You can try refreshing the page or go back to the dashboard. +

+ + {this.state.error && ( +
+ + Error Details (Always Shown for Debugging) + +
+                  Error: {this.state.error.toString()}
+                  {this.state.error.stack && (
+                    <>
+                      {'\n\n'}Stack:
+                      {'\n'}{this.state.error.stack}
+                    
+                  )}
+                  {this.state.errorInfo?.componentStack && (
+                    <>
+                      {'\n\n'}Component Stack:
+                      {'\n'}{this.state.errorInfo.componentStack}
+                    
+                  )}
+                
+
+ )} + +
+ + + +
+
+
+ ) + } + + return this.props.children + } +} + +// Lightweight error boundary for individual components +export const PageErrorBoundary: React.FC<{ children: ReactNode }> = ({ children }) => ( + +
+ + Page Loading Error +
+

+ This page encountered an error. Check the browser console for details. +

+ + + } + > + {children} +
+) \ No newline at end of file diff --git a/backends/advanced/webui/src/components/MemorySettings.tsx b/backends/advanced/webui/src/components/MemorySettings.tsx new file mode 100644 index 00000000..d01a84ea --- /dev/null +++ b/backends/advanced/webui/src/components/MemorySettings.tsx @@ -0,0 +1,235 @@ +import { useState, useEffect } from 'react' +import { Brain, RefreshCw, CheckCircle, Trash2, Save, RotateCcw, AlertCircle } from 'lucide-react' +import { systemApi, memoriesApi } from '../services/api' + +interface MemorySettingsProps { + className?: string +} + +export default function MemorySettings({ className }: MemorySettingsProps) { + const [configYaml, setConfigYaml] = useState('') + const [loading, setLoading] = useState(false) + const [validating, setValidating] = useState(false) + const [saving, setSaving] = useState(false) + const [deleting, setDeleting] = useState(false) + const [message, setMessage] = useState('') + const [error, setError] = useState('') + + useEffect(() => { + loadMemoryConfig() + }, []) + + const loadMemoryConfig = async () => { + setLoading(true) + setError('') + setMessage('') + + try { + const response = await systemApi.getMemoryConfigRaw() + setConfigYaml(response.data.config_yaml) + setMessage('Configuration loaded successfully') + setTimeout(() => setMessage(''), 3000) + } catch (err: any) { + setError(err.response?.data?.error || 'Failed to load memory configuration') + } finally { + setLoading(false) + } + } + + const validateConfig = async () => { + if (!configYaml.trim()) { + setError('Configuration cannot be empty') + return + } + + setValidating(true) + setError('') + setMessage('') + + try { + await systemApi.validateMemoryConfig(configYaml) + setMessage('✅ Configuration is valid') + setTimeout(() => setMessage(''), 3000) + } catch (err: any) { + setError(err.response?.data?.error || 'Validation failed') + } finally { + setValidating(false) + } + } + + const saveConfig = async () => { + if (!configYaml.trim()) { + setError('Configuration cannot be empty') + return + } + + setSaving(true) + setError('') + setMessage('') + + try { + await systemApi.updateMemoryConfigRaw(configYaml) + setMessage('✅ Configuration saved and reloaded successfully') + setTimeout(() => setMessage(''), 5000) + } catch (err: any) { + setError(err.response?.data?.error || 'Failed to save configuration') + } finally { + setSaving(false) + } + } + + const reloadConfig = async () => { + await loadMemoryConfig() + } + + const resetConfig = () => { + loadMemoryConfig() + setMessage('Configuration reset to file version') + setTimeout(() => setMessage(''), 3000) + } + + const deleteAllMemories = async () => { + const confirmed = window.confirm( + '⚠️ WARNING: This will permanently delete ALL your memories. This action cannot be undone.\n\nAre you sure you want to continue?' + ) + + if (!confirmed) return + + const doubleConfirmed = window.confirm( + '🚨 FINAL CONFIRMATION: You are about to delete ALL memories permanently.\n\nType "DELETE" in the next dialog to confirm.' + ) + + if (!doubleConfirmed) return + + const userInput = window.prompt('Type "DELETE" to confirm memory deletion:') + if (userInput !== 'DELETE') { + setMessage('Deletion cancelled - confirmation text did not match') + return + } + + setDeleting(true) + setError('') + setMessage('') + + try { + const response = await memoriesApi.deleteAll() + setMessage(`✅ Successfully deleted ${response.data.deleted_count || 'all'} memories`) + setTimeout(() => setMessage(''), 5000) + } catch (err: any) { + setError(err.response?.data?.error || 'Failed to delete memories') + } finally { + setDeleting(false) + } + } + + return ( +
+
+ {/* Header */} +
+
+ +

+ Memory Configuration +

+
+
+ + +
+
+ +

+ Edit the memory extraction configuration. This controls how memories are extracted from conversations, quality + control settings, and processing parameters. +

+ + {/* Status Messages */} + {message && ( +
+

{message}

+
+ )} + + {error && ( +
+
+ +

{error}

+
+
+ )} + + {/* YAML Editor */} +
+
+ +