diff --git a/.github/workflows/advanced-docker-compose-build.yml b/.github/workflows/advanced-docker-compose-build.yml new file mode 100644 index 00000000..5acc717e --- /dev/null +++ b/.github/workflows/advanced-docker-compose-build.yml @@ -0,0 +1,248 @@ +name: Build and Deploy Advanced (Docker Compose) + +on: + workflow_dispatch: + inputs: + version: + description: Optional version tag override (e.g. v1.2.3) + required: false + push: + branches: [ "main" ] + paths: + - "*" + - "backends/advanced/**" + - "extras/asr-services/**" + - "extras/speaker-recognition/**" + - "extras/openmemory-mcp/**" + - ".github/workflows/advanced-docker-compose-build.yml" + tags: + - "v*" + + +permissions: + contents: read + packages: write + actions: read + +env: + REGISTRY: ghcr.io + +jobs: + build-default: + runs-on: ubuntu-latest + timeout-minutes: 60 + env: + ADVANCED_ENV: ${{ secrets.ADVANCED_ENV }} + RUNNER_FLAVOUR: ubuntu-latest + defaults: + run: + shell: bash + working-directory: backends/advanced + + steps: + - name: Show selected runner + run: echo "Workflow running on ${RUNNER_FLAVOUR} runner" + working-directory: . + + - name: Checkout + uses: actions/checkout@v4 + + - name: Print commit details + run: | + echo "Event: ${{ github.event_name }}" + echo "Ref: $GITHUB_REF" + echo "Ref name: ${{ github.ref_name }}" + echo "Repository: $GITHUB_REPOSITORY" + echo "Actor: $GITHUB_ACTOR" + echo "SHA: $GITHUB_SHA" + echo "Short SHA: ${GITHUB_SHA::7}" + echo "Commit info:" + git log -1 --pretty=format:'Author: %an <%ae>%nDate: %ad%nSubject: %s' || true + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to GHCR + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Copy .env.template to .env + run: | + set -euo pipefail + copy_env() { + local dir="$1" + local template="${dir}/.env.template" + local target="${dir}/.env" + if [ -f "$template" ]; then + echo "Copying $template to $target" + cp "$template" "$target" + else + echo "$template not found; skipping" + fi + } + + copy_env . + copy_env ../../extras/asr-services + copy_env ../../extras/speaker-recognition + copy_env ../../extras/openmemory-mcp + + - name: Create .env from secret (if provided) + if: env.ADVANCED_ENV != '' + run: | + echo "Writing .env from ADVANCED_ENV secret" + printf "%s\n" "${ADVANCED_ENV}" > .env + + - name: Source .env (if present) + run: | + if [ -f .env ]; then + set -a + # shellcheck disable=SC1091 + source .env + set +a + else + echo ".env not found; continuing" + fi + + - name: Determine version + id: version + run: | + if [ -n "${{ github.event.inputs.version }}" ]; then + VERSION="${{ github.event.inputs.version }}" + elif [[ "${GITHUB_REF}" == refs/tags/* ]]; then + VERSION="${GITHUB_REF#refs/tags/}" + else + VERSION="sha-${GITHUB_SHA::7}" + fi + echo "VERSION=$VERSION" >> "$GITHUB_OUTPUT" + + - name: Build, tag, and push services sequentially with version + env: + OWNER: ${{ github.repository_owner }} + VERSION: ${{ steps.version.outputs.VERSION }} + run: | + set -euo pipefail + docker compose version + OWNER_LC=$(echo "$OWNER" | tr '[:upper:]' '[:lower:]') + + # CUDA variants from pyproject.toml + CUDA_VARIANTS=("cpu" "cu121" "cu126" "cu128") + + # Base services (no CUDA variants, no profiles) + base_service_specs=( + "friend-backend|advanced-friend-backend|docker-compose.yml|." + "workers|advanced-workers|docker-compose.yml|." + "webui|advanced-webui|docker-compose.yml|." + "openmemory-mcp|openmemory-mcp|../../extras/openmemory-mcp/docker-compose.yml|../../extras/openmemory-mcp" + ) + + # Build and push base services + for spec in "${base_service_specs[@]}"; do + IFS='|' read -r svc svc_repo compose_file project_dir <<< "$spec" + + echo "::group::Building and pushing $svc_repo" + if [ "$compose_file" = "docker-compose.yml" ] && [ "$project_dir" = "." ]; then + docker compose build --pull "$svc" + else + docker compose -f "$compose_file" --project-directory "$project_dir" build "$svc" + fi + # Resolve the built image ID via compose (avoids name mismatches) + if [ "$compose_file" = "docker-compose.yml" ] && [ "$project_dir" = "." ]; then + img_id=$(docker compose images -q "$svc" | head -n1) + else + img_id=$(docker compose -f "$compose_file" --project-directory "$project_dir" images -q "$svc" | head -n1) + fi + if [ -z "${img_id:-}" ]; then + echo "Skipping $svc_repo (no built image found after build)" + echo "::endgroup::" + continue + fi + + # Tag and push with version + target_image="$REGISTRY/$OWNER_LC/$svc_repo:$VERSION" + latest_image="$REGISTRY/$OWNER_LC/$svc_repo:latest" + echo "Tagging $img_id as $target_image" + docker tag "$img_id" "$target_image" + echo "Tagging $img_id as $latest_image" + docker tag "$img_id" "$latest_image" + + echo "Pushing $target_image" + docker push "$target_image" + echo "Pushing $latest_image" + docker push "$latest_image" + + # Clean up local tags + docker image rm -f "$target_image" || true + docker image rm -f "$latest_image" || true + echo "::endgroup::" + done + + # Build and push parakeet-asr with CUDA variants (cu121, cu126, cu128) + echo "::group::Building and pushing parakeet-asr CUDA variants" + cd ../../extras/asr-services + for cuda_variant in cu121 cu126 cu128; do + echo "Building parakeet-asr-${cuda_variant}" + export CUDA_VERSION="${cuda_variant}" + docker compose build parakeet-asr + + img_id=$(docker compose images -q parakeet-asr | head -n1) + if [ -n "${img_id:-}" ]; then + target_image="$REGISTRY/$OWNER_LC/parakeet-asr-${cuda_variant}:$VERSION" + latest_image="$REGISTRY/$OWNER_LC/parakeet-asr-${cuda_variant}:latest" + echo "Tagging $img_id as $target_image" + docker tag "$img_id" "$target_image" + echo "Tagging $img_id as $latest_image" + docker tag "$img_id" "$latest_image" + + echo "Pushing $target_image" + docker push "$target_image" + echo "Pushing $latest_image" + docker push "$latest_image" + + # Clean up local tags + docker image rm -f "$target_image" || true + docker image rm -f "$latest_image" || true + fi + done + cd - > /dev/null + echo "::endgroup::" + + # Build and push speaker-recognition with all CUDA variants (including CPU) + # Note: speaker-service has profiles, but we can build it directly by setting PYTORCH_CUDA_VERSION + echo "::group::Building and pushing speaker-recognition variants" + cd ../../extras/speaker-recognition + for cuda_variant in "${CUDA_VARIANTS[@]}"; do + echo "Building speaker-recognition-${cuda_variant}" + export PYTORCH_CUDA_VERSION="${cuda_variant}" + # Build speaker-service directly (profiles only affect 'up', not 'build') + docker compose build speaker-service + + img_id=$(docker compose images -q speaker-service | head -n1) + if [ -n "${img_id:-}" ]; then + target_image="$REGISTRY/$OWNER_LC/speaker-recognition-${cuda_variant}:$VERSION" + latest_image="$REGISTRY/$OWNER_LC/speaker-recognition-${cuda_variant}:latest" + echo "Tagging $img_id as $target_image" + docker tag "$img_id" "$target_image" + echo "Tagging $img_id as $latest_image" + docker tag "$img_id" "$latest_image" + + echo "Pushing $target_image" + docker push "$target_image" + echo "Pushing $latest_image" + docker push "$latest_image" + + # Clean up local tags + docker image rm -f "$target_image" || true + docker image rm -f "$latest_image" || true + fi + done + cd - > /dev/null + echo "::endgroup::" + + # Summary + echo "::group::Build Summary" + echo "Built and pushed images with version tag: ${VERSION}" + echo "Images pushed to: $REGISTRY/$OWNER_LC/" + echo "::endgroup::" diff --git a/CLAUDE.md b/CLAUDE.md index 9c06c934..1efb4a2e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -38,6 +38,9 @@ uv run pytest tests/test_memory_service.py # Single test file # Environment setup cp .env.template .env # Configure environment variables +# Setup test environment (optional, for running integration tests) +uv run --with-requirements setup-requirements.txt python setup_test_env.py # Creates .env.test + # Reset data (development) sudo rm -rf backends/advanced/data/ ``` @@ -68,6 +71,10 @@ cd backends/advanced # Requires .env file with DEEPGRAM_API_KEY and OPENAI_API_KEY cp .env.template .env # Configure API keys +# Optional: Setup test environment with test-specific credentials +# (wizard.py prompts for this, or run manually) +uv run --with-requirements setup-requirements.txt python setup_test_env.py + # Run full integration test suite ./run-test.sh diff --git a/Docs/init-system.md b/Docs/init-system.md index 0688f8bf..fb9c1763 100644 --- a/Docs/init-system.md +++ b/Docs/init-system.md @@ -127,7 +127,7 @@ Note (Linux): If `host.docker.internal` is unavailable, add `extra_hosts: - "hos ### Container-to-Container Communication Services use `host.docker.internal` for inter-container communication: -- `http://host.docker.internal:8085` - Speaker Recognition +- `http://127.0.0.1:8085` - Speaker Recognition - `http://host.docker.internal:8767` - Parakeet ASR - `http://host.docker.internal:8765` - OpenMemory MCP diff --git a/backends/advanced/.dockerignore b/backends/advanced/.dockerignore index 2f10e879..38c6284e 100644 --- a/backends/advanced/.dockerignore +++ b/backends/advanced/.dockerignore @@ -16,4 +16,5 @@ !nginx.conf !nginx.conf.template !start.sh -!start-workers.sh \ No newline at end of file +!start-workers.sh +!Caddyfile \ No newline at end of file diff --git a/backends/advanced/docker-compose.yml b/backends/advanced/docker-compose.yml index 2dcc11bb..d9d58dca 100644 --- a/backends/advanced/docker-compose.yml +++ b/backends/advanced/docker-compose.yml @@ -17,6 +17,7 @@ services: - MISTRAL_API_KEY=${MISTRAL_API_KEY} - MISTRAL_MODEL=${MISTRAL_MODEL} - TRANSCRIPTION_PROVIDER=${TRANSCRIPTION_PROVIDER} + - PARAKEET_ASR_URL=${PARAKEET_ASR_URL} - OFFLINE_ASR_TCP_URI=${OFFLINE_ASR_TCP_URI} - OLLAMA_BASE_URL=${OLLAMA_BASE_URL} - HF_TOKEN=${HF_TOKEN} @@ -31,7 +32,7 @@ services: - NEO4J_HOST=${NEO4J_HOST} - NEO4J_USER=${NEO4J_USER} - NEO4J_PASSWORD=${NEO4J_PASSWORD} - - CORS_ORIGINS=http://localhost:3010,http://localhost:8000,https://localhost:3010,https://localhost:8000,https://100.105.225.45,https://localhost + - CORS_ORIGINS=http://localhost:3010,http://localhost:8000,http://192.168.1.153:3010,http://192.168.1.153:8000,https://localhost:3010,https://localhost:8000,https://100.105.225.45,https://localhost - REDIS_URL=redis://redis:6379/0 depends_on: qdrant: @@ -51,6 +52,7 @@ services: restart: unless-stopped # Unified Worker Container + # No CUDA needed for friend-backend and workers, workers only orchestrate jobs and call external services # Runs all workers in a single container for efficiency: # - 3 RQ workers (transcription, memory, default queues) # - 1 Audio stream worker (Redis Streams consumer - must be single to maintain sequential chunks) @@ -63,6 +65,7 @@ services: - .env volumes: - ./src:/app/src + - ./start-workers.sh:/app/start-workers.sh - ./data/audio_chunks:/app/audio_chunks - ./data:/app/data environment: diff --git a/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py index 86e00ad3..4b852dae 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py @@ -137,10 +137,13 @@ async def get_conversations(user: User): # Convert conversations to API format conversations = [] for conv in user_conversations: - # Format conversation for list - use model_dump with exclusions + # Ensure legacy fields are populated from active transcript version + conv._update_legacy_transcript_fields() + + # Format conversation for list - include segments but exclude large nested fields conv_dict = conv.model_dump( mode='json', # Automatically converts datetime to ISO strings - exclude={'id', 'transcript', 'segments', 'transcript_versions', 'memory_versions'} # Exclude large fields for list view + exclude={'id', 'transcript_versions', 'memory_versions'} # Include segments for UI display ) # Add computed/external fields @@ -349,7 +352,7 @@ async def reprocess_transcript(conversation_id: str, user: User): from advanced_omi_backend.workers.speaker_jobs import recognise_speakers_job from advanced_omi_backend.workers.audio_jobs import process_cropping_job from advanced_omi_backend.workers.memory_jobs import process_memory_job - from advanced_omi_backend.controllers.queue_controller import transcription_queue, memory_queue, default_queue, JOB_RESULT_TTL, redis_conn + from advanced_omi_backend.controllers.queue_controller import transcription_queue, memory_queue, default_queue, JOB_RESULT_TTL # Job 1: Transcribe audio to text transcript_job = transcription_queue.enqueue( @@ -400,10 +403,10 @@ async def reprocess_transcript(conversation_id: str, user: User): logger.info(f"šŸ“„ RQ: Enqueued audio cropping job {cropping_job.id} (depends on {speaker_job.id})") # Job 4: Extract memories (depends on cropping) + # Note: redis_client is injected by @async_job decorator, don't pass it directly memory_job = memory_queue.enqueue( process_memory_job, conversation_id, - redis_conn, depends_on=cropping_job, job_timeout=1800, result_ttl=JOB_RESULT_TTL, diff --git a/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py index 98e96734..53a580a7 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py @@ -313,13 +313,30 @@ async def _initialize_streaming_session( client_state.stream_audio_format = audio_format application_logger.info(f"šŸ†” Created stream session: {client_state.stream_session_id}") + # Determine transcription provider from environment + transcription_provider = os.getenv("TRANSCRIPTION_PROVIDER", "").lower() + if transcription_provider in ["offline", "parakeet"]: + provider = "parakeet" + elif transcription_provider == "deepgram": + provider = "deepgram" + else: + # Auto-detect: prefer Parakeet if URL is set, otherwise Deepgram + parakeet_url = os.getenv("PARAKEET_ASR_URL") or os.getenv("OFFLINE_ASR_TCP_URI") + deepgram_key = os.getenv("DEEPGRAM_API_KEY") + if parakeet_url: + provider = "parakeet" + elif deepgram_key: + provider = "deepgram" + else: + raise ValueError("No transcription provider configured (DEEPGRAM_API_KEY or PARAKEET_ASR_URL required)") + # Initialize session tracking in Redis await audio_stream_producer.init_session( session_id=client_state.stream_session_id, user_id=user_id, client_id=client_id, mode="streaming", - provider="deepgram" + provider=provider ) # Enqueue streaming jobs (speech detection + audio persistence) diff --git a/backends/advanced/src/advanced_omi_backend/models/conversation.py b/backends/advanced/src/advanced_omi_backend/models/conversation.py index cba23c41..367c0daf 100644 --- a/backends/advanced/src/advanced_omi_backend/models/conversation.py +++ b/backends/advanced/src/advanced_omi_backend/models/conversation.py @@ -159,6 +159,19 @@ def clean_legacy_data(cls, data: Any) -> Any: elif not isinstance(segment['speaker'], str): segment['speaker'] = "unknown" + # Populate legacy fields from active transcript version if they're empty + active_version_id = data.get('active_transcript_version') + if active_version_id and 'transcript_versions' in data and isinstance(data['transcript_versions'], list): + for version in data['transcript_versions']: + if isinstance(version, dict) and version.get('version_id') == active_version_id: + # Populate transcript if missing + if not data.get('transcript') and version.get('transcript'): + data['transcript'] = version['transcript'] + # Populate segments if missing or empty + if (not data.get('segments') or len(data.get('segments', [])) == 0) and version.get('segments'): + data['segments'] = version['segments'] + break + return data @property diff --git a/backends/advanced/src/advanced_omi_backend/services/transcription/parakeet.py b/backends/advanced/src/advanced_omi_backend/services/transcription/parakeet.py index 10da0058..5b11e094 100644 --- a/backends/advanced/src/advanced_omi_backend/services/transcription/parakeet.py +++ b/backends/advanced/src/advanced_omi_backend/services/transcription/parakeet.py @@ -7,6 +7,7 @@ import asyncio import json import logging +import os import tempfile from typing import Dict, Optional diff --git a/backends/advanced/src/advanced_omi_backend/services/transcription/parakeet_stream_consumer.py b/backends/advanced/src/advanced_omi_backend/services/transcription/parakeet_stream_consumer.py new file mode 100644 index 00000000..75974fed --- /dev/null +++ b/backends/advanced/src/advanced_omi_backend/services/transcription/parakeet_stream_consumer.py @@ -0,0 +1,91 @@ +""" +Parakeet stream consumer for Redis Streams architecture. + +Reads from: audio:stream:* streams +Writes to: transcription:results:{session_id} +""" + +import logging +import os + +from advanced_omi_backend.services.audio_stream.consumer import BaseAudioStreamConsumer +from advanced_omi_backend.services.transcription.parakeet import ParakeetProvider + +logger = logging.getLogger(__name__) + + +class ParakeetStreamConsumer: + """ + Parakeet consumer for Redis Streams architecture. + + Reads from: specified stream (client-specific or provider-specific) + Writes to: transcription:results:{session_id} + + This inherits from BaseAudioStreamConsumer and implements transcribe_audio(). + """ + + def __init__(self, redis_client, service_url: str = None, buffer_chunks: int = 30): + """ + Initialize Parakeet consumer. + + Dynamically discovers all audio:stream:* streams and claims them using Redis locks. + + Args: + redis_client: Connected Redis client + service_url: Parakeet service URL (defaults to PARAKEET_ASR_URL env var) + buffer_chunks: Number of chunks to buffer before transcribing (default: 30 = ~7.5s) + """ + self.service_url = service_url or os.getenv("PARAKEET_ASR_URL") or os.getenv("OFFLINE_ASR_TCP_URI") + if not self.service_url: + raise ValueError("PARAKEET_ASR_URL or OFFLINE_ASR_TCP_URI is required") + + # Initialize Parakeet provider + self.provider = ParakeetProvider(service_url=self.service_url) + + # Create a concrete subclass that implements transcribe_audio + class _ConcreteConsumer(BaseAudioStreamConsumer): + def __init__(inner_self, provider_name: str, redis_client, buffer_chunks: int): + super().__init__(provider_name, redis_client, buffer_chunks) + inner_self._parakeet_provider = self.provider + + async def transcribe_audio(inner_self, audio_data: bytes, sample_rate: int) -> dict: + """Transcribe using ParakeetProvider.""" + try: + result = await inner_self._parakeet_provider.transcribe( + audio_data=audio_data, + sample_rate=sample_rate + ) + + # Calculate confidence (Parakeet may not provide confidence, default to 0.9) + confidence = 0.9 + if result.get("words"): + confidences = [ + w.get("confidence", 0.9) + for w in result["words"] + if "confidence" in w + ] + if confidences: + confidence = sum(confidences) / len(confidences) + + return { + "text": result.get("text", ""), + "words": result.get("words", []), + "segments": result.get("segments", []), + "confidence": confidence + } + + except Exception as e: + logger.error(f"Parakeet transcription failed: {e}", exc_info=True) + raise + + # Instantiate the concrete consumer + self._consumer = _ConcreteConsumer("parakeet", redis_client, buffer_chunks) + + async def start_consuming(self): + """Delegate to base consumer.""" + return await self._consumer.start_consuming() + + async def stop(self): + """Delegate to base consumer.""" + return await self._consumer.stop() + diff --git a/backends/advanced/src/advanced_omi_backend/workers/audio_stream_deepgram_worker.py b/backends/advanced/src/advanced_omi_backend/workers/audio_stream_deepgram_worker.py index 80203677..c8866eed 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/audio_stream_deepgram_worker.py +++ b/backends/advanced/src/advanced_omi_backend/workers/audio_stream_deepgram_worker.py @@ -30,8 +30,9 @@ async def main(): # Get configuration from environment api_key = os.getenv("DEEPGRAM_API_KEY") if not api_key: - logger.error("DEEPGRAM_API_KEY environment variable is required") - sys.exit(1) + logger.warning("DEEPGRAM_API_KEY environment variable not set - Deepgram audio stream worker will not start") + logger.warning("Audio transcription will use alternative providers if configured") + return redis_url = os.getenv("REDIS_URL", "redis://localhost:6379/0") diff --git a/backends/advanced/src/advanced_omi_backend/workers/audio_stream_parakeet_worker.py b/backends/advanced/src/advanced_omi_backend/workers/audio_stream_parakeet_worker.py new file mode 100644 index 00000000..1ffae49c --- /dev/null +++ b/backends/advanced/src/advanced_omi_backend/workers/audio_stream_parakeet_worker.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 +""" +Parakeet audio stream worker. + +Starts a consumer that reads from audio:stream:* and transcribes audio using Parakeet. +""" + +import asyncio +import logging +import os +import signal +import sys + +import redis.asyncio as redis + +from advanced_omi_backend.services.transcription.parakeet_stream_consumer import ParakeetStreamConsumer + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(name)s: %(message)s" +) + +logger = logging.getLogger(__name__) + + +async def main(): + """Main worker entry point.""" + logger.info("šŸš€ Starting Parakeet audio stream worker") + + # Get configuration from environment + service_url = os.getenv("PARAKEET_ASR_URL") or os.getenv("OFFLINE_ASR_TCP_URI") + if not service_url: + logger.warning("PARAKEET_ASR_URL or OFFLINE_ASR_TCP_URI environment variable not set - Parakeet audio stream worker will not start") + logger.warning("Audio transcription will use alternative providers if configured") + return + + redis_url = os.getenv("REDIS_URL", "redis://localhost:6379/0") + + # Create Redis client + redis_client = await redis.from_url( + redis_url, + encoding="utf-8", + decode_responses=False + ) + logger.info("Connected to Redis") + + # Create consumer with balanced buffer size + # 20 chunks = ~5 seconds of audio + # Balance between transcription accuracy and latency + consumer = ParakeetStreamConsumer( + redis_client=redis_client, + service_url=service_url, + buffer_chunks=20 # 5 seconds - good context without excessive delay + ) + + # Setup signal handlers for graceful shutdown + shutdown_event = asyncio.Event() + + def signal_handler(signum, _frame): + logger.info(f"Received signal {signum}, shutting down...") + shutdown_event.set() + + signal.signal(signal.SIGINT, signal_handler) + signal.signal(signal.SIGTERM, signal_handler) + + try: + logger.info("āœ… Parakeet worker ready") + + # This blocks until consumer is stopped or shutdown signaled + consume_task = asyncio.create_task(consumer.start_consuming()) + shutdown_task = asyncio.create_task(shutdown_event.wait()) + + done, pending = await asyncio.wait( + [consume_task, shutdown_task], + return_when=asyncio.FIRST_COMPLETED + ) + + # Cancel pending tasks + for task in pending: + task.cancel() + + await consumer.stop() + + except Exception as e: + logger.error(f"Worker error: {e}", exc_info=True) + sys.exit(1) + finally: + await redis_client.aclose() + logger.info("šŸ‘‹ Parakeet worker stopped") + + +if __name__ == "__main__": + asyncio.run(main()) + diff --git a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py index e081786a..664f621f 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py @@ -197,19 +197,52 @@ async def transcribe_full_audio_job( # Convert segments to SpeakerSegment objects speaker_segments = [] - for seg in segments: - # Use identified_as if available (from speaker recognition), otherwise use speaker label - speaker_name = seg.get("identified_as") or seg.get("speaker", "Unknown") - + + if segments: + # Use provided segments + for seg in segments: + # Use identified_as if available (from speaker recognition), otherwise use speaker label + speaker_name = seg.get("identified_as") or seg.get("speaker", "Unknown") + + speaker_segments.append( + Conversation.SpeakerSegment( + start=seg.get("start", 0), + end=seg.get("end", 0), + text=seg.get("text", ""), + speaker=speaker_name, + confidence=seg.get("confidence") + ) + ) + elif transcript_text: + # NOTE: Parakeet falls here. + # If no segments but we have text, create a single segment from the full transcript + # Calculate duration from words if available, otherwise estimate from audio + start_time_seg = 0.0 + end_time_seg = 0.0 + + if words: + # Use word timestamps if available + start_times = [w.get("start", 0) for w in words if "start" in w] + end_times = [w.get("end", 0) for w in words if "end" in w] + if start_times: + start_time_seg = min(start_times) + if end_times: + end_time_seg = max(end_times) + else: + # Estimate duration: assume ~150 words per minute, or use audio file duration + # For now, use a default duration if we can't calculate it + end_time_seg = len(transcript_text.split()) * 0.4 # Rough estimate: 0.4s per word + speaker_segments.append( Conversation.SpeakerSegment( - start=seg.get("start", 0), - end=seg.get("end", 0), - text=seg.get("text", ""), - speaker=speaker_name, - confidence=seg.get("confidence") + start=start_time_seg, + end=end_time_seg if end_time_seg > start_time_seg else start_time_seg + 1.0, + text=transcript_text, + speaker="Unknown", + confidence=None ) ) + logger.info(f"šŸ“Š Created single segment from transcript text (no segments returned by provider)") logger.info(f"šŸ“Š Created {len(speaker_segments)} speaker segments") diff --git a/backends/advanced/start-workers.sh b/backends/advanced/start-workers.sh index 0f39cb09..d9386d37 100755 --- a/backends/advanced/start-workers.sh +++ b/backends/advanced/start-workers.sh @@ -41,7 +41,8 @@ shutdown() { kill $RQ_WORKER_5_PID 2>/dev/null || true kill $RQ_WORKER_6_PID 2>/dev/null || true kill $AUDIO_PERSISTENCE_WORKER_PID 2>/dev/null || true - kill $AUDIO_STREAM_WORKER_PID 2>/dev/null || true + [ -n "$AUDIO_STREAM_WORKER_PID" ] && kill $AUDIO_STREAM_WORKER_PID 2>/dev/null || true + [ -n "$PARAKEET_STREAM_WORKER_PID" ] && kill $PARAKEET_STREAM_WORKER_PID 2>/dev/null || true wait echo "āœ… All workers stopped" exit 0 @@ -74,11 +75,28 @@ echo "šŸ’¾ Starting audio persistence worker (1 worker for audio queue)..." uv run python -m advanced_omi_backend.workers.rq_worker_entry audio & AUDIO_PERSISTENCE_WORKER_PID=$! -# Start 1 audio stream worker for Deepgram +# Start 1 audio stream worker for Deepgram (only if DEEPGRAM_API_KEY is set) # Single worker ensures sequential processing of audio chunks -echo "šŸŽµ Starting audio stream Deepgram worker (1 worker for sequential processing)..." -uv run python -m advanced_omi_backend.workers.audio_stream_deepgram_worker & -AUDIO_STREAM_WORKER_PID=$! +if [ -n "$DEEPGRAM_API_KEY" ]; then + echo "šŸŽµ Starting audio stream Deepgram worker (1 worker for sequential processing)..." + uv run python -m advanced_omi_backend.workers.audio_stream_deepgram_worker & + AUDIO_STREAM_WORKER_PID=$! +else + echo "ā­ļø Skipping Deepgram audio stream worker (DEEPGRAM_API_KEY not set)" + AUDIO_STREAM_WORKER_PID="" +fi + +# Start 1 audio stream worker for Parakeet (only if PARAKEET_ASR_URL or OFFLINE_ASR_TCP_URI is set) +# Single worker ensures sequential processing of audio chunks +PARAKEET_URL="${PARAKEET_ASR_URL:-${OFFLINE_ASR_TCP_URI:-}}" +if [ -n "$PARAKEET_URL" ]; then + echo "šŸŽ¤ Starting audio stream Parakeet worker (1 worker for sequential processing)..." + uv run python -m advanced_omi_backend.workers.audio_stream_parakeet_worker & + PARAKEET_STREAM_WORKER_PID=$! +else + echo "ā­ļø Skipping Parakeet audio stream worker (PARAKEET_ASR_URL or OFFLINE_ASR_TCP_URI not set)" + PARAKEET_STREAM_WORKER_PID="" +fi echo "āœ… All workers started:" echo " - RQ worker 1: PID $RQ_WORKER_1_PID (transcription, memory, default)" @@ -88,7 +106,12 @@ echo " - RQ worker 4: PID $RQ_WORKER_4_PID (transcription, memory, default)" echo " - RQ worker 5: PID $RQ_WORKER_5_PID (transcription, memory, default)" echo " - RQ worker 6: PID $RQ_WORKER_6_PID (transcription, memory, default)" echo " - Audio persistence worker: PID $AUDIO_PERSISTENCE_WORKER_PID (audio queue - file rotation)" -echo " - Audio stream worker: PID $AUDIO_STREAM_WORKER_PID (Redis Streams consumer - sequential processing)" +if [ -n "$AUDIO_STREAM_WORKER_PID" ]; then + echo " - Audio stream Deepgram worker: PID $AUDIO_STREAM_WORKER_PID (Redis Streams consumer - sequential processing)" +fi +if [ -n "$PARAKEET_STREAM_WORKER_PID" ]; then + echo " - Audio stream Parakeet worker: PID $PARAKEET_STREAM_WORKER_PID (Redis Streams consumer - sequential processing)" +fi # Wait for any process to exit wait -n @@ -102,7 +125,7 @@ kill $RQ_WORKER_4_PID 2>/dev/null || true kill $RQ_WORKER_5_PID 2>/dev/null || true kill $RQ_WORKER_6_PID 2>/dev/null || true kill $AUDIO_PERSISTENCE_WORKER_PID 2>/dev/null || true -kill $AUDIO_STREAM_WORKER_PID 2>/dev/null || true +[ -n "$AUDIO_STREAM_WORKER_PID" ] && kill $AUDIO_STREAM_WORKER_PID 2>/dev/null || true wait echo "šŸ”„ All workers stopped" diff --git a/extras/speaker-recognition/.env.template b/extras/speaker-recognition/.env.template index 2ba3a1c7..3d653c62 100644 --- a/extras/speaker-recognition/.env.template +++ b/extras/speaker-recognition/.env.template @@ -27,7 +27,9 @@ SIMILARITY_THRESHOLD=0.15 # SPEAKER_SERVICE_HOST: Interface to bind to (0.0.0.0 = all interfaces, allows cross-network access) SPEAKER_SERVICE_HOST=0.0.0.0 SPEAKER_SERVICE_PORT=8085 -SPEAKER_SERVICE_URL=http://speaker-service:8085 + +# should be 127.0.0.1 here because speaker-service and speaker-service gpu are different +SPEAKER_SERVICE_URL=http://127.0.0.1:8085 # React Web UI Configuration REACT_UI_HOST=0.0.0.0 diff --git a/extras/speaker-recognition/README.md b/extras/speaker-recognition/README.md index 3196e349..4bfbc810 100644 --- a/extras/speaker-recognition/README.md +++ b/extras/speaker-recognition/README.md @@ -50,7 +50,29 @@ For non-interactive setup: ./init.sh --hf-token YOUR_TOKEN --compute-mode gpu --enable-https --server-ip 100.83.66.30 ``` -### 4. Start the system +### 4. Generate SSL Certificates (Required for Nginx) + +**āš ļø Important**: The nginx proxy requires SSL certificates to start. SSL certificates are optional through `wizard.sh`. If you haven't generated them during setup, you must create them manually: + +```bash +cd extras/speaker-recognition +# Generate certificates for localhost (default) +bash ssl/generate-ssl.sh localhost + +# Or generate for a specific IP/domain (e.g., Tailscale IP) +bash ssl/generate-ssl.sh 100.83.66.30 +``` + +This creates: +- `ssl/server.crt` - SSL certificate +- `ssl/server.key` - Private key + +**Note**: If SSL certificates are missing, nginx will fail to start with errors like: +``` +cannot load certificate "/etc/nginx/ssl/server.crt": BIO_new_file() failed +``` + +### 5. Start the system ```bash # For CPU-only docker compose --profile cpu up --build -d @@ -73,7 +95,7 @@ docker compose --profile cpu down docker compose --profile gpu down ``` -### 5. Access the Web UI +### 6. Access the Web UI **HTTPS Mode (Recommended for microphone access):** - **Secure Access**: https://localhost:8444/ or https://your-ip:8444/ @@ -85,7 +107,7 @@ docker compose --profile gpu down **Microphone access requires HTTPS for network connections (not just localhost).** -### 6. Get Started +### 7. Get Started 1. **Create a user** using the sidebar 2. **Upload audio** in the "Audio Viewer" page 3. **Annotate segments** in the "Annotation" page @@ -393,9 +415,20 @@ The React UI is configured with HTTPS enabled by default (`REACT_UI_HTTPS=true`) ## 🚨 Troubleshooting +**Nginx failing to start with SSL certificate errors?** +- Error: `cannot load certificate "/etc/nginx/ssl/server.crt": BIO_new_file() failed` +- **Solution**: Generate SSL certificates (see step 4 in Quick Start): + ```bash + cd extras/speaker-recognition + bash ssl/generate-ssl.sh localhost + ``` +- Verify certificates exist: `ls -la ssl/server.crt ssl/server.key` +- Restart nginx: `docker compose --profile cpu restart nginx` (or `--profile gpu`) + **Can't access the web UI?** - Check if services are running: `docker compose --profile cpu ps` (or `--profile gpu`) - View logs: `docker compose --profile cpu logs web-ui` +- Check nginx logs: `docker compose --profile cpu logs nginx` **Speaker service not responding?** - Check backend logs: `docker compose --profile cpu logs speaker-service` diff --git a/extras/speaker-recognition/docker-compose.yml b/extras/speaker-recognition/docker-compose.yml index c0821de5..364e81c5 100644 --- a/extras/speaker-recognition/docker-compose.yml +++ b/extras/speaker-recognition/docker-compose.yml @@ -29,7 +29,7 @@ services: - DEEPGRAM_API_KEY=${DEEPGRAM_API_KEY} restart: unless-stopped healthcheck: - test: ["CMD", "curl", "-f", "http://speaker-service:8085/health"] + test: ["CMD", "curl", "-f", "http://localhost:8085/health"] interval: 30s timeout: 10s retries: 3 @@ -96,7 +96,8 @@ services: - ./nginx.conf:/etc/nginx/nginx.conf:ro - ./ssl:/etc/nginx/ssl:ro depends_on: - - web-ui + web-ui: + condition: service_healthy restart: unless-stopped healthcheck: test: ["CMD", "curl", "-f", "-k", "https://localhost/health"] diff --git a/extras/speaker-recognition/nginx.conf.template b/extras/speaker-recognition/nginx.conf.template index 38a8604a..da22ab28 100644 --- a/extras/speaker-recognition/nginx.conf.template +++ b/extras/speaker-recognition/nginx.conf.template @@ -51,7 +51,8 @@ http { # HTTPS Server server { - listen 443 ssl http2; + listen 443 ssl; + http2 on; server_name localhost TAILSCALE_IP; # SSL Configuration diff --git a/setup-requirements.txt b/setup-requirements.txt index de5610c1..284ce5bb 100644 --- a/setup-requirements.txt +++ b/setup-requirements.txt @@ -1,3 +1,4 @@ # Dependencies for Friend-Lite root setup orchestrator rich>=13.0.0 -python-dotenv \ No newline at end of file +python-dotenv +requests>=2.31.0 \ No newline at end of file diff --git a/status.py b/status.py new file mode 100644 index 00000000..1ae9a353 --- /dev/null +++ b/status.py @@ -0,0 +1,358 @@ +#!/usr/bin/env python3 +""" +Friend-Lite Health Status Checker +Show runtime health status of all services +""" + +import argparse +import subprocess +import sys +import json +import requests +from pathlib import Path +from typing import Dict, List, Any, Optional + +from rich import print as rprint +from rich.console import Console +from rich.table import Table +from rich.panel import Panel +from rich.live import Live +from rich.layout import Layout + +# Import service definitions from services.py +from services import SERVICES, check_service_configured + +console = Console() + +# Health check endpoints +HEALTH_ENDPOINTS = { + 'backend': 'http://localhost:8000/health', + 'speaker-recognition': 'http://localhost:8085/health', + 'openmemory-mcp': 'http://localhost:8765/docs', # No health endpoint, check docs +} + + +def get_container_status(service_name: str) -> Dict[str, Any]: + """Get Docker container status for a service""" + service = SERVICES[service_name] + service_path = Path(service['path']) + + if not service_path.exists(): + return {'status': 'not_found', 'containers': []} + + try: + # Get container status using docker compose ps + cmd = ['docker', 'compose', 'ps', '--format', 'json'] + + # Handle special profiles for backend (HTTPS) + if service_name == 'backend': + caddyfile_path = service_path / 'Caddyfile' + if caddyfile_path.exists(): + cmd = ['docker', 'compose', '--profile', 'https', 'ps', '--format', 'json'] + + # Handle speaker-recognition profiles + if service_name == 'speaker-recognition': + from dotenv import dotenv_values + env_file = service_path / '.env' + if env_file.exists(): + env_values = dotenv_values(env_file) + compute_mode = env_values.get('COMPUTE_MODE', 'cpu') + if compute_mode == 'gpu': + cmd = ['docker', 'compose', '--profile', 'gpu', 'ps', '--format', 'json'] + else: + cmd = ['docker', 'compose', '--profile', 'cpu', 'ps', '--format', 'json'] + + result = subprocess.run( + cmd, + cwd=service_path, + capture_output=True, + text=True, + timeout=10 + ) + + if result.returncode != 0: + return {'status': 'error', 'containers': [], 'error': result.stderr} + + # Parse JSON output (one JSON object per line) + containers = [] + for line in result.stdout.strip().split('\n'): + if line: + try: + container = json.loads(line) + containers.append({ + 'name': container.get('Name', 'unknown'), + 'state': container.get('State', 'unknown'), + 'status': container.get('Status', 'unknown'), + 'health': container.get('Health', 'none') + }) + except json.JSONDecodeError: + continue + + if not containers: + return {'status': 'stopped', 'containers': []} + + # Determine overall status + all_running = all(c['state'] == 'running' for c in containers) + any_running = any(c['state'] == 'running' for c in containers) + + if all_running: + status = 'running' + elif any_running: + status = 'partial' + else: + status = 'stopped' + + return {'status': status, 'containers': containers} + + except subprocess.TimeoutExpired: + return {'status': 'timeout', 'containers': []} + except Exception as e: + return {'status': 'error', 'containers': [], 'error': str(e)} + + +def check_http_health(url: str, timeout: int = 5) -> Dict[str, Any]: + """Check HTTP health endpoint""" + try: + response = requests.get(url, timeout=timeout) + + if response.status_code == 200: + # Try to parse JSON response + try: + data = response.json() + return {'healthy': True, 'status_code': 200, 'data': data} + except json.JSONDecodeError: + return {'healthy': True, 'status_code': 200, 'data': None} + else: + return {'healthy': False, 'status_code': response.status_code, 'data': None} + + except requests.exceptions.ConnectionError: + return {'healthy': False, 'error': 'Connection refused'} + except requests.exceptions.Timeout: + return {'healthy': False, 'error': 'Timeout'} + except Exception as e: + return {'healthy': False, 'error': str(e)} + + +def get_service_health(service_name: str) -> Dict[str, Any]: + """Get comprehensive health status for a service""" + # Check if configured + if not check_service_configured(service_name): + return { + 'configured': False, + 'container_status': 'not_configured', + 'health': None + } + + # Get container status + container_info = get_container_status(service_name) + + # Check HTTP health endpoint if available + health_check = None + if service_name in HEALTH_ENDPOINTS: + url = HEALTH_ENDPOINTS[service_name] + health_check = check_http_health(url) + + return { + 'configured': True, + 'container_status': container_info['status'], + 'containers': container_info.get('containers', []), + 'health': health_check + } + + +def show_quick_status(): + """Show quick status overview""" + console.print("\nšŸ„ [bold]Friend-Lite Health Status[/bold]\n") + + table = Table(title="Service Status Overview") + table.add_column("Service", style="cyan", no_wrap=True) + table.add_column("Config", justify="center") + table.add_column("Containers", justify="center") + table.add_column("Health", justify="center") + table.add_column("Description", style="dim") + + for service_name, service_info in SERVICES.items(): + status = get_service_health(service_name) + + # Config status + config_icon = "āœ…" if status['configured'] else "āŒ" + + # Container status + if not status['configured']: + container_icon = "⚪" + elif status['container_status'] == 'running': + container_icon = "🟢" + elif status['container_status'] == 'partial': + container_icon = "🟔" + elif status['container_status'] == 'stopped': + container_icon = "šŸ”“" + else: + container_icon = "⚫" + + # Health status + if status['health'] is None: + health_icon = "⚪" + elif status['health'].get('healthy'): + health_icon = "āœ…" + else: + health_icon = "āŒ" + + table.add_row( + service_name, + config_icon, + container_icon, + health_icon, + service_info['description'] + ) + + console.print(table) + + # Legend + console.print("\n[dim]Legend:[/dim]") + console.print("[dim] Containers: 🟢 Running | 🟔 Partial | šŸ”“ Stopped | ⚪ Not Configured | ⚫ Error[/dim]") + console.print("[dim] Health: āœ… Healthy | āŒ Unhealthy | ⚪ No Endpoint[/dim]") + + +def show_detailed_status(): + """Show detailed status with backend health breakdown""" + console.print("\nšŸ„ [bold]Friend-Lite Detailed Health Status[/bold]\n") + + # Get all service statuses + for service_name, service_info in SERVICES.items(): + status = get_service_health(service_name) + + # Service header + if status['configured']: + header = f"šŸ“¦ {service_name.upper()}" + else: + header = f"šŸ“¦ {service_name.upper()} (Not Configured)" + + console.print(f"\n[bold cyan]{header}[/bold cyan]") + console.print(f"[dim]{service_info['description']}[/dim]") + + if not status['configured']: + console.print("[yellow] āš ļø Not configured (no .env file)[/yellow]") + continue + + # Container status + console.print(f"\n [bold]Containers:[/bold]") + if status['container_status'] == 'running': + console.print(f" [green]🟢 All containers running[/green]") + elif status['container_status'] == 'partial': + console.print(f" [yellow]🟔 Some containers running[/yellow]") + elif status['container_status'] == 'stopped': + console.print(f" [red]šŸ”“ All containers stopped[/red]") + else: + console.print(f" [red]⚫ Error checking containers[/red]") + + # Show container details + for container in status.get('containers', []): + state_icon = "🟢" if container['state'] == 'running' else "šŸ”“" + health_status = f" ({container['health']})" if container['health'] != 'none' else "" + console.print(f" {state_icon} {container['name']}: {container['status']}{health_status}") + + # HTTP Health check + if status['health'] is not None: + console.print(f"\n [bold]HTTP Health:[/bold]") + + if status['health'].get('healthy'): + console.print(f" [green]āœ… Healthy[/green]") + + # For backend, show detailed health data + if service_name == 'backend' and status['health'].get('data'): + health_data = status['health']['data'] + + # Overall status + overall_status = health_data.get('status', 'unknown') + if overall_status == 'healthy': + console.print(f" Overall: [green]{overall_status}[/green]") + elif overall_status == 'degraded': + console.print(f" Overall: [yellow]{overall_status}[/yellow]") + else: + console.print(f" Overall: [red]{overall_status}[/red]") + + # Critical services + services = health_data.get('services', {}) + console.print(f"\n [bold]Critical Services:[/bold]") + + for svc_name in ['mongodb', 'redis']: + if svc_name in services: + svc = services[svc_name] + if svc.get('healthy'): + console.print(f" [green]āœ… {svc_name}: {svc.get('status', 'ok')}[/green]") + else: + console.print(f" [red]āŒ {svc_name}: {svc.get('status', 'error')}[/red]") + + # Optional services + console.print(f"\n [bold]Optional Services:[/bold]") + optional_services = ['audioai', 'memory_service', 'speech_to_text', 'speaker_recognition', 'openmemory_mcp'] + for svc_name in optional_services: + if svc_name in services: + svc = services[svc_name] + if svc.get('healthy'): + console.print(f" [green]āœ… {svc_name}: {svc.get('status', 'ok')}[/green]") + else: + console.print(f" [yellow]āš ļø {svc_name}: {svc.get('status', 'degraded')}[/yellow]") + + # Configuration info + config = health_data.get('config', {}) + if config: + console.print(f"\n [bold]Configuration:[/bold]") + console.print(f" LLM: {config.get('llm_provider', 'unknown')} ({config.get('llm_model', 'unknown')})") + console.print(f" Transcription: {config.get('transcription_service', 'unknown')}") + console.print(f" Active Clients: {config.get('active_clients', 0)}") + else: + error = status['health'].get('error', 'Unknown error') + console.print(f" [red]āŒ Unhealthy: {error}[/red]") + + console.print("") # Spacing + + +def show_json_status(): + """Show status in JSON format for programmatic consumption""" + status_data = {} + + for service_name in SERVICES.keys(): + status_data[service_name] = get_service_health(service_name) + + print(json.dumps(status_data, indent=2)) + + +def main(): + parser = argparse.ArgumentParser( + description="Friend-Lite Health Status Checker", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + ./status.sh Show quick status overview + ./status.sh --detailed Show detailed health information + ./status.sh --json Output status in JSON format + """ + ) + + parser.add_argument( + '--detailed', '-d', + action='store_true', + help='Show detailed health information including backend service breakdown' + ) + + parser.add_argument( + '--json', '-j', + action='store_true', + help='Output status in JSON format' + ) + + args = parser.parse_args() + + if args.json: + show_json_status() + elif args.detailed: + show_detailed_status() + else: + show_quick_status() + + console.print("\nšŸ’” [dim]Tip: Use './status.sh --detailed' for comprehensive health checks[/dim]\n") + + +if __name__ == "__main__": + main() diff --git a/status.sh b/status.sh new file mode 100755 index 00000000..a66fe459 --- /dev/null +++ b/status.sh @@ -0,0 +1,2 @@ +#!/bin/bash +uv run --with-requirements setup-requirements.txt python status.py "$@" diff --git a/wizard.py b/wizard.py index cfc5b861..a1875028 100755 --- a/wizard.py +++ b/wizard.py @@ -158,7 +158,7 @@ def run_service_setup(service_name, selected_services, https_enabled=False, serv # For advanced backend, pass URLs of other selected services and HTTPS config cmd = service['cmd'].copy() if 'speaker-recognition' in selected_services: - cmd.extend(['--speaker-service-url', 'http://host.docker.internal:8085']) + cmd.extend(['--speaker-service-url', 'http://speaker-service:8085']) if 'asr-services' in selected_services: cmd.extend(['--parakeet-asr-url', 'http://host.docker.internal:8767']) @@ -174,8 +174,34 @@ def run_service_setup(service_name, selected_services, https_enabled=False, serv if service_name == 'speaker-recognition' and https_enabled and server_ip: cmd.extend(['--enable-https', '--server-ip', server_ip]) - # For speaker-recognition, try to pass API keys and config if available + # For speaker-recognition, validate HF_TOKEN is required if service_name == 'speaker-recognition': + # HF_TOKEN is required for speaker-recognition + speaker_env_path = 'extras/speaker-recognition/.env' + hf_token = read_env_value(speaker_env_path, 'HF_TOKEN') + + # Check if HF_TOKEN is missing or is a placeholder + if not hf_token or is_placeholder(hf_token, 'your_huggingface_token_here', 'your-huggingface-token-here', 'hf_xxxxx'): + console.print("\n[red][ERROR][/red] HF_TOKEN is required for speaker-recognition service") + console.print("[yellow]Speaker recognition requires a Hugging Face token to download models[/yellow]") + console.print("Get your token from: https://huggingface.co/settings/tokens") + console.print() + + # Prompt for HF_TOKEN + try: + hf_token_input = console.input("[cyan]Enter your HF_TOKEN[/cyan]: ").strip() + if not hf_token_input or is_placeholder(hf_token_input, 'your_huggingface_token_here', 'your-huggingface-token-here', 'hf_xxxxx'): + console.print("[red][ERROR][/red] Invalid HF_TOKEN provided. Speaker-recognition setup cancelled.") + return False + hf_token = hf_token_input + except EOFError: + console.print("[red][ERROR][/red] HF_TOKEN is required. Speaker-recognition setup cancelled.") + return False + + # Pass HF Token to init script + cmd.extend(['--hf-token', hf_token]) + console.print("[green][SUCCESS][/green] HF_TOKEN configured") + # Pass Deepgram API key from backend if available backend_env_path = 'backends/advanced/.env' deepgram_key = read_env_value(backend_env_path, 'DEEPGRAM_API_KEY') @@ -183,13 +209,6 @@ def run_service_setup(service_name, selected_services, https_enabled=False, serv cmd.extend(['--deepgram-api-key', deepgram_key]) console.print("[blue][INFO][/blue] Found existing DEEPGRAM_API_KEY from backend config, reusing") - # Pass HF Token from existing speaker recognition .env if available - speaker_env_path = 'extras/speaker-recognition/.env' - hf_token = read_env_value(speaker_env_path, 'HF_TOKEN') - if hf_token and not is_placeholder(hf_token, 'your_huggingface_token_here', 'your-huggingface-token-here'): - cmd.extend(['--hf-token', hf_token]) - console.print("[blue][INFO][/blue] Found existing HF_TOKEN, reusing") - # Pass compute mode from existing .env if available compute_mode = read_env_value(speaker_env_path, 'COMPUTE_MODE') if compute_mode in ['cpu', 'gpu']: