diff --git a/Makefile b/Makefile index 4a2a3d96..4f470f94 100644 --- a/Makefile +++ b/Makefile @@ -185,9 +185,8 @@ config-docker: ## Generate Docker Compose configuration files @CONFIG_FILE=config.env.dev python3 scripts/generate-docker-configs.py @echo "βœ… Docker Compose configuration files generated" -config-k8s: ## Generate Kubernetes configuration files (Skaffold env + ConfigMap/Secret) +config-k8s: ## Generate Kubernetes configuration files (ConfigMap/Secret only - no .env files) @echo "☸️ Generating Kubernetes configuration files..." - @python3 scripts/generate-docker-configs.py @python3 scripts/generate-k8s-configs.py @echo "πŸ“¦ Applying ConfigMap and Secret to Kubernetes..." @kubectl apply -f k8s-manifests/configmap.yaml -n $(APPLICATION_NAMESPACE) 2>/dev/null || echo "⚠️ ConfigMap not applied (cluster not available?)" diff --git a/backends/advanced/Dockerfile.k8s b/backends/advanced/Dockerfile.k8s index f285b34d..097f5d7f 100644 --- a/backends/advanced/Dockerfile.k8s +++ b/backends/advanced/Dockerfile.k8s @@ -40,6 +40,9 @@ COPY memory_config.yaml ./ COPY start-k8s.sh start-workers.sh ./ RUN chmod +x start-k8s.sh start-workers.sh +# Activate virtual environment in PATH +ENV PATH="/app/.venv/bin:$PATH" + # Run the application with workers -# K8s startup script starts both FastAPI backend and RQ workers with --no-sync +# K8s startup script starts both FastAPI backend and RQ workers CMD ["./start-k8s.sh"] diff --git a/backends/advanced/Docs/architecture.md b/backends/advanced/Docs/architecture.md index 67919ae9..8211cb32 100644 --- a/backends/advanced/Docs/architecture.md +++ b/backends/advanced/Docs/architecture.md @@ -1005,7 +1005,7 @@ src/advanced_omi_backend/ - `GET /api/conversations/{conversation_id}/versions` - Get version history - `POST /api/conversations/{conversation_id}/activate-transcript` - Switch transcript version - `POST /api/conversations/{conversation_id}/activate-memory` - Switch memory version -- `POST /api/process-audio-files` - Batch audio file processing +- `POST /api/audio/upload` - Batch audio file upload and processing - WebSocket `/ws_omi` - Real-time Opus audio streaming with Wyoming protocol (OMI devices) - WebSocket `/ws_pcm` - Real-time PCM audio streaming with Wyoming protocol (all apps) diff --git a/backends/advanced/Docs/quickstart.md b/backends/advanced/Docs/quickstart.md index 272388f5..523218bc 100644 --- a/backends/advanced/Docs/quickstart.md +++ b/backends/advanced/Docs/quickstart.md @@ -260,13 +260,13 @@ The system supports processing existing audio files through the file upload API. export USER_TOKEN="your-jwt-token" # Upload single WAV file -curl -X POST "http://localhost:8000/api/process-audio-files" \ +curl -X POST "http://localhost:8000/api/audio/upload" \ -H "Authorization: Bearer $USER_TOKEN" \ -F "files=@/path/to/audio.wav" \ -F "device_name=file_upload" # Upload multiple WAV files -curl -X POST "http://localhost:8000/api/process-audio-files" \ +curl -X POST "http://localhost:8000/api/audio/upload" \ -H "Authorization: Bearer $USER_TOKEN" \ -F "files=@/path/to/recording1.wav" \ -F "files=@/path/to/recording2.wav" \ diff --git a/backends/advanced/docker-compose-test.yml b/backends/advanced/docker-compose-test.yml index 625b49be..7f2bb942 100644 --- a/backends/advanced/docker-compose-test.yml +++ b/backends/advanced/docker-compose-test.yml @@ -95,7 +95,7 @@ services: # Use test database name to ensure isolation command: mongod --dbpath /data/db --bind_ip_all healthcheck: - test: ["CMD", "mongo", "--eval", "db.runCommand('ping').ok", "--quiet"] + test: ["CMD", "mongosh", "--eval", "db.runCommand('ping').ok", "--quiet"] interval: 5s timeout: 5s retries: 10 @@ -114,6 +114,47 @@ services: timeout: 3s retries: 5 + workers-test: + build: + context: . + dockerfile: Dockerfile + command: ./start-workers.sh + volumes: + - ./src:/app/src + - ./data/test_audio_chunks:/app/audio_chunks + - ./data/test_debug_dir:/app/debug_dir + - ./data/test_data:/app/data + environment: + # Same environment as backend + - MONGODB_URI=mongodb://mongo-test:27017/test_db + - QDRANT_BASE_URL=qdrant-test + - QDRANT_PORT=6333 + - REDIS_URL=redis://redis-test:6379/0 + - DEBUG_DIR=/app/debug_dir + - DEEPGRAM_API_KEY=${DEEPGRAM_API_KEY} + - OPENAI_API_KEY=${OPENAI_API_KEY} + - LLM_PROVIDER=${LLM_PROVIDER:-openai} + - OPENAI_MODEL=${OPENAI_MODEL:-gpt-4o-mini} + - AUTH_SECRET_KEY=test-jwt-signing-key-for-integration-tests + - ADMIN_PASSWORD=test-admin-password-123 + - ADMIN_EMAIL=test-admin@example.com + - TRANSCRIPTION_PROVIDER=${TRANSCRIPTION_PROVIDER:-deepgram} + - MEMORY_PROVIDER=${MEMORY_PROVIDER:-friend_lite} + - OPENMEMORY_MCP_URL=${OPENMEMORY_MCP_URL:-http://host.docker.internal:8765} + - OPENMEMORY_USER_ID=${OPENMEMORY_USER_ID:-openmemory} + - DISABLE_SPEAKER_RECOGNITION=false + - SPEAKER_SERVICE_URL=https://localhost:8085 + depends_on: + friend-backend-test: + condition: service_healthy + mongo-test: + condition: service_healthy + redis-test: + condition: service_started + qdrant-test: + condition: service_started + restart: unless-stopped + # caddy: # image: caddy:2-alpine # ports: diff --git a/backends/advanced/init.py b/backends/advanced/init.py index cd13cbb4..667f5209 100644 --- a/backends/advanced/init.py +++ b/backends/advanced/init.py @@ -394,23 +394,37 @@ def setup_https(self): # Generate Caddyfile from template self.console.print("[blue][INFO][/blue] Creating Caddyfile configuration...") caddyfile_template = script_dir / "Caddyfile.template" + caddyfile_path = script_dir / "Caddyfile" + if caddyfile_template.exists(): try: - with open(caddyfile_template, 'r') as f: - caddyfile_content = f.read() + # Check if Caddyfile exists as a directory (common issue) + if caddyfile_path.exists() and caddyfile_path.is_dir(): + self.console.print("[red]❌ ERROR: 'Caddyfile' exists as a directory![/red]") + self.console.print("[yellow] Please remove it manually:[/yellow]") + self.console.print(f"[yellow] rm -rf {caddyfile_path}[/yellow]") + self.console.print("[red] HTTPS will NOT work without a proper Caddyfile![/red]") + self.config["HTTPS_ENABLED"] = "false" + else: + with open(caddyfile_template, 'r') as f: + caddyfile_content = f.read() - # Replace TAILSCALE_IP with server_ip - caddyfile_content = caddyfile_content.replace('TAILSCALE_IP', server_ip) + # Replace TAILSCALE_IP with server_ip + caddyfile_content = caddyfile_content.replace('TAILSCALE_IP', server_ip) - with open('Caddyfile', 'w') as f: - f.write(caddyfile_content) + with open(caddyfile_path, 'w') as f: + f.write(caddyfile_content) - self.console.print(f"[green][SUCCESS][/green] Caddyfile created for: {server_ip}") + self.console.print(f"[green][SUCCESS][/green] Caddyfile created for: {server_ip}") except Exception as e: - self.console.print(f"[yellow][WARNING][/yellow] Caddyfile generation failed: {e}") + self.console.print(f"[red]❌ ERROR: Caddyfile generation failed: {e}[/red]") + self.console.print("[red] HTTPS will NOT work without a proper Caddyfile![/red]") + self.config["HTTPS_ENABLED"] = "false" else: - self.console.print("[yellow][WARNING][/yellow] Caddyfile.template not found") + self.console.print("[red]❌ ERROR: Caddyfile.template not found[/red]") + self.console.print("[red] HTTPS will NOT work without a proper Caddyfile![/red]") + self.config["HTTPS_ENABLED"] = "false" else: self.config["HTTPS_ENABLED"] = "false" diff --git a/backends/advanced/src/advanced_omi_backend/auth.py b/backends/advanced/src/advanced_omi_backend/auth.py index fbb334a7..e47a3b9e 100644 --- a/backends/advanced/src/advanced_omi_backend/auth.py +++ b/backends/advanced/src/advanced_omi_backend/auth.py @@ -183,7 +183,7 @@ async def websocket_auth(websocket, token: Optional[str] = None) -> Optional[Use # Try JWT token from query parameter first if token: - logger.debug("Attempting WebSocket auth with query token.") + logger.info(f"Attempting WebSocket auth with query token (first 20 chars): {token[:20]}...") try: user_db_gen = get_user_db() user_db = await user_db_gen.__anext__() @@ -192,8 +192,10 @@ async def websocket_auth(websocket, token: Optional[str] = None) -> Optional[Use if user and user.is_active: logger.info(f"WebSocket auth successful for user {user.user_id} using query token.") return user + else: + logger.warning(f"Token validated but user inactive or not found: user={user}") except Exception as e: - logger.warning(f"WebSocket auth with query token failed: {e}") + logger.error(f"WebSocket auth with query token failed: {type(e).__name__}: {e}", exc_info=True) # Try cookie authentication logger.debug("Attempting WebSocket auth with cookie.") diff --git a/backends/advanced/src/advanced_omi_backend/client.py b/backends/advanced/src/advanced_omi_backend/client.py index 3c43a43a..30b3cc62 100644 --- a/backends/advanced/src/advanced_omi_backend/client.py +++ b/backends/advanced/src/advanced_omi_backend/client.py @@ -12,7 +12,6 @@ from pathlib import Path from typing import Dict, List, Optional, Tuple -from advanced_omi_backend.conversation_manager import get_conversation_manager from advanced_omi_backend.database import AudioChunksRepository from advanced_omi_backend.task_manager import get_task_manager from wyoming.audio import AudioChunk @@ -133,33 +132,19 @@ async def close_current_conversation(self): audio_logger.info(f"πŸ”’ No active conversation to close for client {self.client_id}") return - # Debug logging for memory processing investigation - audio_logger.info(f"πŸ” ClientState close_current_conversation debug for {self.client_id}:") - audio_logger.info(f" - current_audio_uuid: {self.current_audio_uuid}") - audio_logger.info(f" - user_id: {self.user_id}") - audio_logger.info(f" - user_email: {self.user_email}") - audio_logger.info(f" - client_id: {self.client_id}") - - # Use ConversationManager for clean separation of concerns - conversation_manager = get_conversation_manager() - success = await conversation_manager.close_conversation( - client_id=self.client_id, - audio_uuid=self.current_audio_uuid, - user_id=self.user_id, - user_email=self.user_email, - conversation_start_time=self.conversation_start_time, - speech_segments=self.speech_segments, - chunk_dir=self.chunk_dir, - ) + # NOTE: ClientState is legacy V1 code. In V2 architecture, conversation closure + # is handled by the websocket controllers using RQ jobs directly. + # This method is kept minimal for backward compatibility. - if success: - # Clean up speech segments for this conversation - if self.current_audio_uuid in self.speech_segments: - del self.speech_segments[self.current_audio_uuid] - if self.current_audio_uuid in self.current_speech_start: - del self.current_speech_start[self.current_audio_uuid] - else: - audio_logger.warning(f"⚠️ Conversation closure had issues for {self.current_audio_uuid}") + audio_logger.info(f"πŸ”’ Closing conversation for client {self.client_id}, audio_uuid: {self.current_audio_uuid}") + + # Clean up speech segments for this conversation + if self.current_audio_uuid in self.speech_segments: + del self.speech_segments[self.current_audio_uuid] + if self.current_audio_uuid in self.current_speech_start: + del self.current_speech_start[self.current_audio_uuid] + + audio_logger.info(f"βœ… Cleaned up state for {self.current_audio_uuid}") async def start_new_conversation(self): """Start a new conversation by closing current and resetting state.""" diff --git a/backends/advanced/src/advanced_omi_backend/controllers/audio_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/audio_controller.py index a805a6f0..51d0a2a1 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/audio_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/audio_controller.py @@ -3,19 +3,28 @@ Handles audio file uploads and processes them directly. Simplified to write files immediately and enqueue transcription. + +Also includes audio cropping operations that work with the audio_chunks collection. """ import logging import time import uuid +from pathlib import Path from fastapi import UploadFile from fastapi.responses import JSONResponse -from advanced_omi_backend.audio_utils import AudioValidationError, write_audio_file +from advanced_omi_backend.utils.audio_utils import ( + AudioValidationError, + write_audio_file, + _process_audio_cropping_with_relative_timestamps, +) from advanced_omi_backend.models.job import JobPriority from advanced_omi_backend.models.user import User from advanced_omi_backend.models.conversation import create_conversation +from advanced_omi_backend.database import AudioChunksRepository, chunks_col +from advanced_omi_backend.client_manager import client_belongs_to_user logger = logging.getLogger(__name__) audio_logger = logging.getLogger("audio_processing") @@ -46,7 +55,6 @@ async def upload_and_process_audio_files( return JSONResponse(status_code=400, content={"error": "No files provided"}) processed_files = [] - enqueued_jobs = [] client_id = generate_client_id(user, device_name) for file_index, file in enumerate(files): @@ -94,15 +102,13 @@ async def upload_and_process_audio_files( f"πŸ“Š {file.filename}: {duration:.1f}s β†’ {wav_filename}" ) - # Create conversation immediately for uploaded files - conversation_id = str(uuid.uuid4()) + # Create conversation immediately for uploaded files (conversation_id auto-generated) version_id = str(uuid.uuid4()) # Generate title from filename title = file.filename.rsplit('.', 1)[0][:50] if file.filename else "Uploaded Audio" conversation = create_conversation( - conversation_id=conversation_id, audio_uuid=audio_uuid, user_id=user.user_id, client_id=client_id, @@ -110,18 +116,19 @@ async def upload_and_process_audio_files( summary="Processing uploaded audio file..." ) await conversation.insert() + conversation_id = conversation.conversation_id # Get the auto-generated ID audio_logger.info(f"πŸ“ Created conversation {conversation_id} for uploaded file") - # Enqueue complete batch processing job chain - from advanced_omi_backend.controllers.queue_controller import start_batch_processing_jobs + # Enqueue post-conversation processing job chain + from advanced_omi_backend.controllers.queue_controller import start_post_conversation_jobs - job_ids = start_batch_processing_jobs( + job_ids = start_post_conversation_jobs( conversation_id=conversation_id, audio_uuid=audio_uuid, + audio_file_path=file_path, user_id=user.user_id, - user_email=user.email, - audio_file_path=file_path + post_transcription=True # Run batch transcription for uploads ) processed_files.append({ @@ -135,42 +142,193 @@ async def upload_and_process_audio_files( "duration_seconds": round(duration, 2), }) - enqueued_jobs.append({ - "transcript_job_id": job_ids['transcription'], - "speaker_job_id": job_ids['speaker_recognition'], - "memory_job_id": job_ids['memory'], - "conversation_id": conversation_id, - "audio_uuid": audio_uuid, - "filename": file.filename, - }) - audio_logger.info( f"βœ… Processed {file.filename} β†’ conversation {conversation_id}, " f"jobs: {job_ids['transcription']} β†’ {job_ids['speaker_recognition']} β†’ {job_ids['memory']}" ) + except (OSError, IOError) as e: + # File I/O errors during audio processing + audio_logger.exception(f"File I/O error processing {file.filename}") + processed_files.append({ + "filename": file.filename or "unknown", + "status": "error", + "error": str(e), + }) except Exception as e: - audio_logger.error(f"Error processing file {file.filename}: {e}") + # Unexpected errors during file processing + audio_logger.exception(f"Unexpected error processing file {file.filename}") processed_files.append({ "filename": file.filename or "unknown", "status": "error", "error": str(e), }) + successful_files = [f for f in processed_files if f.get("status") == "processing"] + failed_files = [f for f in processed_files if f.get("status") == "error"] + return { - "message": f"Uploaded and processing {len(enqueued_jobs)} file(s)", + "message": f"Uploaded and processing {len(successful_files)} file(s)", "client_id": client_id, "files": processed_files, - "jobs": enqueued_jobs, "summary": { "total": len(files), - "processing": len(enqueued_jobs), - "failed": len([f for f in processed_files if f.get("status") == "error"]), + "processing": len(successful_files), + "failed": len(failed_files), }, } + except (OSError, IOError) as e: + # File system errors during upload handling + audio_logger.exception("File I/O error in upload_and_process_audio_files") + return JSONResponse( + status_code=500, content={"error": f"File upload failed: {str(e)}"} + ) except Exception as e: - audio_logger.error(f"Error in upload_and_process_audio_files: {e}") + # Unexpected errors in upload handler + audio_logger.exception("Unexpected error in upload_and_process_audio_files") return JSONResponse( status_code=500, content={"error": f"File upload failed: {str(e)}"} ) + + +async def get_cropped_audio_info(audio_uuid: str, user: User): + """ + Get audio cropping metadata from the audio_chunks collection. + + This is an audio service operation that retrieves cropping-related metadata + such as speech segments, cropped audio path, and cropping timestamps. + + Used for: Checking cropping status and retrieving audio processing details. + Works with: audio_chunks collection (audio service operations). + """ + try: + # Find the audio chunk + chunk = await chunks_col.find_one({"audio_uuid": audio_uuid}) + if not chunk: + return JSONResponse(status_code=404, content={"error": "Conversation not found"}) + + # Check ownership for non-admin users + if not user.is_superuser: + if not client_belongs_to_user(chunk["client_id"], user.user_id): + return JSONResponse(status_code=404, content={"error": "Conversation not found"}) + + return { + "audio_uuid": audio_uuid, + "cropped_audio_path": chunk.get("cropped_audio_path"), + "speech_segments": chunk.get("speech_segments", []), + "cropped_duration": chunk.get("cropped_duration"), + "cropped_at": chunk.get("cropped_at"), + "original_audio_path": chunk.get("audio_path"), + } + + except Exception as e: + # Database or unexpected errors when fetching audio metadata + audio_logger.exception("Error fetching cropped audio info") + return JSONResponse(status_code=500, content={"error": "Error fetching cropped audio info"}) + + +async def reprocess_audio_cropping(audio_uuid: str, user: User): + """ + Re-process audio cropping operation for an audio file. + + This is an audio service operation that re-runs the audio cropping process + to extract only speech segments from the full audio file. + + Used for: Re-processing audio when cropping failed or needs updating. + Works with: audio_chunks collection and audio_utils cropping functions. + """ + try: + # Find the audio chunk + chunk = await chunks_col.find_one({"audio_uuid": audio_uuid}) + if not chunk: + return JSONResponse(status_code=404, content={"error": "Conversation not found"}) + + # Check ownership for non-admin users + if not user.is_superuser: + if not client_belongs_to_user(chunk["client_id"], user.user_id): + return JSONResponse(status_code=404, content={"error": "Conversation not found"}) + + audio_path = chunk.get("audio_path") + if not audio_path: + return JSONResponse( + status_code=400, content={"error": "No audio file found for this conversation"} + ) + + # Check if file exists - try multiple possible locations + possible_paths = [ + Path("/app/audio_chunks") / audio_path, + Path(audio_path), # fallback to relative path + ] + + full_audio_path = None + for path in possible_paths: + if path.exists(): + full_audio_path = path + break + + if not full_audio_path: + return JSONResponse( + status_code=422, + content={ + "error": "Audio file not found on disk", + "details": f"Conversation exists but audio file '{audio_path}' is missing from expected locations", + "searched_paths": [str(p) for p in possible_paths] + } + ) + + # Get speech segments from the chunk + speech_segments = chunk.get("speech_segments", []) + if not speech_segments: + return JSONResponse( + status_code=400, + content={"error": "No speech segments found for this conversation"} + ) + + # Generate output path for cropped audio + cropped_filename = f"cropped_{audio_uuid}.wav" + output_path = Path("/app/audio_chunks") / cropped_filename + + # Get repository for database updates + chunk_repo = AudioChunksRepository(chunks_col) + + # Reprocess the audio cropping + try: + result = await _process_audio_cropping_with_relative_timestamps( + str(full_audio_path), + speech_segments, + str(output_path), + audio_uuid, + chunk_repo + ) + + if result: + audio_logger.info(f"Successfully reprocessed audio cropping for {audio_uuid}") + return JSONResponse( + content={"message": f"Audio cropping reprocessed for {audio_uuid}"} + ) + else: + audio_logger.error(f"Failed to reprocess audio cropping for {audio_uuid}") + return JSONResponse( + status_code=500, content={"error": "Failed to reprocess audio cropping"} + ) + + except (OSError, IOError) as processing_error: + # File I/O errors during audio cropping + audio_logger.exception("File I/O error during audio cropping reprocessing") + return JSONResponse( + status_code=500, + content={"error": f"Audio processing failed: {str(processing_error)}"}, + ) + except Exception as processing_error: + # Unexpected errors during cropping operation + audio_logger.exception("Unexpected error during audio cropping reprocessing") + return JSONResponse( + status_code=500, + content={"error": f"Audio processing failed: {str(processing_error)}"}, + ) + + except Exception as e: + # Database or unexpected errors in reprocessing handler + audio_logger.exception("Error reprocessing audio cropping") + return JSONResponse(status_code=500, content={"error": "Error reprocessing audio cropping"}) diff --git a/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py index c9233dc7..86e00ad3 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py @@ -2,23 +2,16 @@ Conversation controller for handling conversation-related business logic. """ -import asyncio -import hashlib import logging import time -from datetime import datetime, timezone from pathlib import Path from typing import Optional -from advanced_omi_backend.audio_utils import ( - _process_audio_cropping_with_relative_timestamps, -) from advanced_omi_backend.client_manager import ( ClientManager, client_belongs_to_user, - get_user_clients_all, ) -from advanced_omi_backend.database import AudioChunksRepository, chunks_col +from advanced_omi_backend.database import chunks_col from advanced_omi_backend.models.conversation import Conversation from advanced_omi_backend.users import User from fastapi.responses import JSONResponse @@ -26,9 +19,9 @@ logger = logging.getLogger(__name__) audio_logger = logging.getLogger("audio_processing") -# Initialize repositories (legacy collections only) -chunk_repo = AudioChunksRepository(chunks_col) -# ProcessingRunsRepository removed - using RQ job tracking instead +# Legacy audio_chunks collection is still used by some endpoints (speaker assignment, segment updates) +# But conversation queries now use the Conversation model directly +# Audio cropping operations are handled in audio_controller.py async def close_current_conversation(client_id: str, user: User, client_manager: ClientManager): @@ -103,23 +96,16 @@ async def get_conversation(conversation_id: str, user: User): if not user.is_superuser and conversation.user_id != str(user.user_id): return JSONResponse(status_code=403, content={"error": "Access forbidden"}) - # Get audio file paths from audio_chunks collection - audio_chunk = await chunk_repo.get_chunk_by_audio_uuid(conversation.audio_uuid) - audio_path = audio_chunk.get("audio_path") if audio_chunk else None - cropped_audio_path = audio_chunk.get("cropped_audio_path") if audio_chunk else None - # Format conversation for API response - use model_dump and add computed fields formatted_conversation = conversation.model_dump( mode='json', # Automatically converts datetime to ISO strings, handles nested models exclude={'id'} # Exclude MongoDB internal _id ) - # Add computed/external fields not in the model + # Add computed fields not in the model formatted_conversation.update({ "timestamp": 0, # Legacy field - using created_at instead "has_memory": bool(conversation.memories), - "audio_path": audio_path, - "cropped_audio_path": cropped_audio_path, "version_info": { "transcript_count": len(conversation.transcript_versions), "memory_count": len(conversation.memory_versions), @@ -148,27 +134,13 @@ async def get_conversations(user: User): # Admins see all conversations user_conversations = await Conversation.find_all().sort(-Conversation.created_at).to_list() - # Batch fetch all audio chunks in one query to avoid N+1 problem - audio_uuids = [conv.audio_uuid for conv in user_conversations] - audio_chunks_dict = {} - if audio_uuids: - # Fetch all audio chunks at once - chunks_cursor = chunk_repo.col.find({"audio_uuid": {"$in": audio_uuids}}) - async for chunk in chunks_cursor: - audio_chunks_dict[chunk["audio_uuid"]] = chunk - # Convert conversations to API format conversations = [] for conv in user_conversations: - # Get audio file paths from pre-fetched chunks - audio_chunk = audio_chunks_dict.get(conv.audio_uuid) - audio_path = audio_chunk.get("audio_path") if audio_chunk else None - cropped_audio_path = audio_chunk.get("cropped_audio_path") if audio_chunk else None - # Format conversation for list - use model_dump with exclusions conv_dict = conv.model_dump( mode='json', # Automatically converts datetime to ISO strings - exclude={'id', 'transcript', 'segments'} # Exclude large fields for list view + exclude={'id', 'transcript', 'segments', 'transcript_versions', 'memory_versions'} # Exclude large fields for list view ) # Add computed/external fields @@ -176,8 +148,6 @@ async def get_conversations(user: User): "timestamp": 0, # Legacy field - using created_at instead "segment_count": len(conv.segments) if conv.segments else 0, "has_memory": bool(conv.memories), - "audio_path": audio_path, - "cropped_audio_path": cropped_audio_path, "version_info": { "transcript_count": len(conv.transcript_versions), "memory_count": len(conv.memory_versions), @@ -195,257 +165,6 @@ async def get_conversations(user: User): return JSONResponse(status_code=500, content={"error": "Error fetching conversations"}) -async def get_conversation_by_id(conversation_id: str, user: User): - """Get a specific conversation by conversation_id (speech-driven architecture).""" - try: - # Get the conversation using Beanie - conversation_model = await Conversation.find_one(Conversation.conversation_id == conversation_id) - if not conversation_model: - return JSONResponse( - status_code=404, - content={"error": "Conversation not found"} - ) - - # Check if user owns this conversation - if not user.is_superuser and conversation_model.user_id != str(user.user_id): - return JSONResponse( - status_code=403, - content={"error": "Access forbidden. You can only access your own conversations."} - ) - - # Get audio file paths from audio_chunks collection - audio_chunk = await chunk_repo.get_chunk_by_audio_uuid(conversation_model.audio_uuid) - audio_path = audio_chunk.get("audio_path") if audio_chunk else None - cropped_audio_path = audio_chunk.get("cropped_audio_path") if audio_chunk else None - - # Format conversation for API response - use model_dump and add computed fields - formatted_conversation = conversation_model.model_dump( - mode='json', # Automatically converts datetime to ISO strings, handles nested models - exclude={'id'} # Exclude MongoDB internal _id - ) - - # Add computed/external fields not in the model - formatted_conversation.update({ - "timestamp": 0, # Legacy field - using created_at instead - "has_memory": bool(conversation_model.memories), - "audio_path": audio_path, - "cropped_audio_path": cropped_audio_path, - "version_info": { - "transcript_count": len(conversation_model.transcript_versions), - "memory_count": len(conversation_model.memory_versions), - "active_transcript_version": conversation_model.active_transcript_version, - "active_memory_version": conversation_model.active_memory_version - } - }) - - return {"conversation": formatted_conversation} - - except Exception as e: - logger.error(f"Error fetching conversation {conversation_id}: {e}") - return JSONResponse(status_code=500, content={"error": "Error fetching conversation"}) - - -async def get_cropped_audio_info(audio_uuid: str, user: User): - """Get cropped audio information for a conversation. Users can only access their own conversations.""" - try: - # Find the conversation - chunk = await chunks_col.find_one({"audio_uuid": audio_uuid}) - if not chunk: - return JSONResponse(status_code=404, content={"error": "Conversation not found"}) - - # Check ownership for non-admin users - if not user.is_superuser: - if not client_belongs_to_user(chunk["client_id"], user.user_id): - return JSONResponse(status_code=404, content={"error": "Conversation not found"}) - - return { - "audio_uuid": audio_uuid, - "cropped_audio_path": chunk.get("cropped_audio_path"), - "speech_segments": chunk.get("speech_segments", []), - "cropped_duration": chunk.get("cropped_duration"), - "cropped_at": chunk.get("cropped_at"), - "original_audio_path": chunk.get("audio_path"), - } - - except Exception as e: - logger.error(f"Error fetching cropped audio info: {e}") - return JSONResponse(status_code=500, content={"error": "Error fetching cropped audio info"}) - - -async def reprocess_audio_cropping(audio_uuid: str, user: User): - """Reprocess audio cropping for a conversation. Users can only reprocess their own conversations.""" - try: - # Find the conversation - chunk = await chunks_col.find_one({"audio_uuid": audio_uuid}) - if not chunk: - return JSONResponse(status_code=404, content={"error": "Conversation not found"}) - - # Check ownership for non-admin users - if not user.is_superuser: - if not client_belongs_to_user(chunk["client_id"], user.user_id): - return JSONResponse(status_code=404, content={"error": "Conversation not found"}) - - audio_path = chunk.get("audio_path") - if not audio_path: - return JSONResponse( - status_code=400, content={"error": "No audio file found for this conversation"} - ) - - # Check if file exists - try multiple possible locations - possible_paths = [ - Path("/app/audio_chunks") / audio_path, - Path(audio_path), # fallback to relative path - ] - - full_audio_path = None - for path in possible_paths: - if path.exists(): - full_audio_path = path - break - - if not full_audio_path: - return JSONResponse( - status_code=422, - content={ - "error": "Audio file not found on disk", - "details": f"Conversation exists but audio file '{audio_path}' is missing from expected locations", - "searched_paths": [str(p) for p in possible_paths] - } - ) - - # Get speech segments from the chunk - speech_segments = chunk.get("speech_segments", []) - if not speech_segments: - return JSONResponse( - status_code=400, - content={"error": "No speech segments found for this conversation"} - ) - - # Generate output path for cropped audio - cropped_filename = f"cropped_{audio_uuid}.wav" - output_path = Path("/app/audio_chunks") / cropped_filename - - # Get repository for database updates - chunk_repo = AudioChunksRepository(chunks_col) - - # Reprocess the audio cropping - try: - result = await _process_audio_cropping_with_relative_timestamps( - str(full_audio_path), - speech_segments, - str(output_path), - audio_uuid, - chunk_repo - ) - - if result: - audio_logger.info(f"Successfully reprocessed audio cropping for {audio_uuid}") - return JSONResponse( - content={"message": f"Audio cropping reprocessed for {audio_uuid}"} - ) - else: - audio_logger.error(f"Failed to reprocess audio cropping for {audio_uuid}") - return JSONResponse( - status_code=500, content={"error": "Failed to reprocess audio cropping"} - ) - - except Exception as processing_error: - audio_logger.error(f"Error during audio cropping reprocessing: {processing_error}") - return JSONResponse( - status_code=500, - content={"error": f"Audio processing failed: {str(processing_error)}"}, - ) - - except Exception as e: - logger.error(f"Error reprocessing audio cropping: {e}") - return JSONResponse(status_code=500, content={"error": "Error reprocessing audio cropping"}) - - -async def add_speaker_to_conversation(audio_uuid: str, speaker_id: str, user: User): - """Add a speaker to the speakers_identified list for a conversation. Users can only modify their own conversations.""" - try: - # Find the conversation first - chunk = await chunks_col.find_one({"audio_uuid": audio_uuid}) - if not chunk: - return JSONResponse(status_code=404, content={"error": "Conversation not found"}) - - # Check ownership for non-admin users - if not user.is_superuser: - if not client_belongs_to_user(chunk["client_id"], user.user_id): - return JSONResponse(status_code=404, content={"error": "Conversation not found"}) - - # Update the speakers_identified list - speakers = chunk.get("speakers_identified", []) - if speaker_id not in speakers: - speakers.append(speaker_id) - await chunks_col.update_one( - {"audio_uuid": audio_uuid}, {"$set": {"speakers_identified": speakers}} - ) - - return { - "message": f"Speaker {speaker_id} added to conversation", - "speakers_identified": speakers, - } - - except Exception as e: - logger.error(f"Error adding speaker to conversation: {e}") - return JSONResponse( - status_code=500, content={"error": "Error adding speaker to conversation"} - ) - - -async def update_transcript_segment( - audio_uuid: str, - segment_index: int, - user: User, - speaker_id: Optional[str] = None, - start_time: Optional[float] = None, - end_time: Optional[float] = None, -): - """Update a specific transcript segment with speaker or timing information. Users can only modify their own conversations.""" - try: - # Find the conversation first - chunk = await chunks_col.find_one({"audio_uuid": audio_uuid}) - if not chunk: - return JSONResponse(status_code=404, content={"error": "Conversation not found"}) - - # Check ownership for non-admin users - if not user.is_superuser: - if not client_belongs_to_user(chunk["client_id"], user.user_id): - return JSONResponse(status_code=404, content={"error": "Conversation not found"}) - - update_doc = {} - - if speaker_id is not None: - update_doc[f"transcript.{segment_index}.speaker"] = speaker_id - # Add to speakers_identified if not already present - speakers = chunk.get("speakers_identified", []) - if speaker_id not in speakers: - speakers.append(speaker_id) - await chunks_col.update_one( - {"audio_uuid": audio_uuid}, {"$set": {"speakers_identified": speakers}} - ) - - if start_time is not None: - update_doc[f"transcript.{segment_index}.start"] = start_time - - if end_time is not None: - update_doc[f"transcript.{segment_index}.end"] = end_time - - if not update_doc: - return JSONResponse(status_code=400, content={"error": "No update parameters provided"}) - - result = await chunks_col.update_one({"audio_uuid": audio_uuid}, {"$set": update_doc}) - - if result.modified_count == 0: - return JSONResponse(status_code=400, content={"error": "No changes were made"}) - - return JSONResponse(content={"message": "Transcript segment updated successfully"}) - - except Exception as e: - audio_logger.error(f"Error updating transcript segment: {e}") - return JSONResponse(status_code=500, content={"error": "Internal server error"}) - async def delete_conversation(audio_uuid: str, user: User): """Delete a conversation and its associated audio file. Users can only delete their own conversations.""" try: @@ -590,15 +309,10 @@ async def reprocess_transcript(conversation_id: str, user: User): if not user.is_superuser and conversation_model.user_id != str(user.user_id): return JSONResponse(status_code=403, content={"error": "Access forbidden. You can only reprocess your own conversations."}) - # Get audio_uuid for file access + # Get audio_uuid and file path from conversation audio_uuid = conversation_model.audio_uuid + audio_path = conversation_model.audio_path - # Get audio file path from audio_chunks collection - chunk = await chunks_col.find_one({"audio_uuid": audio_uuid}) - if not chunk: - return JSONResponse(status_code=404, content={"error": "Audio session not found"}) - - audio_path = chunk.get("audio_path") if not audio_path: return JSONResponse( status_code=400, content={"error": "No audio file found for this conversation"} @@ -630,10 +344,12 @@ async def reprocess_transcript(conversation_id: str, user: User): import uuid version_id = str(uuid.uuid4()) - # Enqueue job chain with RQ (transcription -> speaker recognition -> memory) - from advanced_omi_backend.workers.transcription_jobs import transcribe_full_audio_job, recognise_speakers_job + # Enqueue job chain with RQ (transcription -> speaker recognition -> cropping -> memory) + from advanced_omi_backend.workers.transcription_jobs import transcribe_full_audio_job + from advanced_omi_backend.workers.speaker_jobs import recognise_speakers_job + from advanced_omi_backend.workers.audio_jobs import process_cropping_job from advanced_omi_backend.workers.memory_jobs import process_memory_job - from advanced_omi_backend.controllers.queue_controller import transcription_queue, memory_queue, JOB_RESULT_TTL + from advanced_omi_backend.controllers.queue_controller import transcription_queue, memory_queue, default_queue, JOB_RESULT_TTL, redis_conn # Job 1: Transcribe audio to text transcript_job = transcription_queue.enqueue( @@ -648,7 +364,7 @@ async def reprocess_transcript(conversation_id: str, user: User): result_ttl=JOB_RESULT_TTL, job_id=f"reprocess_{conversation_id[:8]}", description=f"Transcribe audio for {conversation_id[:8]}", - meta={'audio_uuid': audio_uuid} + meta={'audio_uuid': audio_uuid, 'conversation_id': conversation_id} ) logger.info(f"πŸ“₯ RQ: Enqueued transcription job {transcript_job.id}") @@ -658,7 +374,6 @@ async def reprocess_transcript(conversation_id: str, user: User): conversation_id, version_id, str(full_audio_path), - str(user.user_id), "", # transcript_text - will be read from DB [], # words - will be read from DB depends_on=transcript_job, @@ -666,25 +381,37 @@ async def reprocess_transcript(conversation_id: str, user: User): result_ttl=JOB_RESULT_TTL, job_id=f"speaker_{conversation_id[:8]}", description=f"Recognize speakers for {conversation_id[:8]}", - meta={'audio_uuid': audio_uuid} + meta={'audio_uuid': audio_uuid, 'conversation_id': conversation_id} ) logger.info(f"πŸ“₯ RQ: Enqueued speaker recognition job {speaker_job.id} (depends on {transcript_job.id})") - # Job 3: Extract memories (depends on speaker recognition) + # Job 3: Audio cropping (depends on speaker recognition) + cropping_job = default_queue.enqueue( + process_cropping_job, + conversation_id, + str(full_audio_path), + depends_on=speaker_job, + job_timeout=300, + result_ttl=JOB_RESULT_TTL, + job_id=f"crop_{conversation_id[:8]}", + description=f"Crop audio for {conversation_id[:8]}", + meta={'audio_uuid': audio_uuid, 'conversation_id': conversation_id} + ) + logger.info(f"πŸ“₯ RQ: Enqueued audio cropping job {cropping_job.id} (depends on {speaker_job.id})") + + # Job 4: Extract memories (depends on cropping) memory_job = memory_queue.enqueue( process_memory_job, - None, # client_id - will be read from conversation in DB - str(user.user_id), - "", # user_email - will be read from user in DB conversation_id, - depends_on=speaker_job, + redis_conn, + depends_on=cropping_job, job_timeout=1800, result_ttl=JOB_RESULT_TTL, job_id=f"memory_{conversation_id[:8]}", description=f"Extract memories for {conversation_id[:8]}", - meta={'audio_uuid': audio_uuid} + meta={'audio_uuid': audio_uuid, 'conversation_id': conversation_id} ) - logger.info(f"πŸ“₯ RQ: Enqueued memory job {memory_job.id} (depends on {speaker_job.id})") + logger.info(f"πŸ“₯ RQ: Enqueued memory job {memory_job.id} (depends on {cropping_job.id})") job = transcript_job # For backward compatibility with return value logger.info(f"Created transcript reprocessing job {job.id} (version: {version_id}) for conversation {conversation_id}") @@ -713,9 +440,6 @@ async def reprocess_memory(conversation_id: str, transcript_version_id: str, use if not user.is_superuser and conversation_model.user_id != str(user.user_id): return JSONResponse(status_code=403, content={"error": "Access forbidden. You can only reprocess your own conversations."}) - # Get audio_uuid for processing run tracking - audio_uuid = conversation_model.audio_uuid - # Resolve transcript version ID # Handle special "active" version ID if transcript_version_id == "active": diff --git a/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py index dcd657dc..a6a406c8 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py @@ -8,21 +8,23 @@ - Beanie initialization for workers """ +import asyncio import os import logging +import uuid +from datetime import datetime from typing import Dict, Any, Optional import redis from rq import Queue, Worker from rq.job import Job +from rq.registry import ScheduledJobRegistry, DeferredJobRegistry from advanced_omi_backend.models.job import JobPriority +from advanced_omi_backend.models.conversation import Conversation logger = logging.getLogger(__name__) -# Global flag to track if Beanie is initialized in this process -_beanie_initialized = False - # Redis connection configuration REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/0") redis_conn = redis.from_url(REDIS_URL) @@ -30,14 +32,19 @@ # Queue name constants TRANSCRIPTION_QUEUE = "transcription" MEMORY_QUEUE = "memory" +AUDIO_QUEUE = "audio" DEFAULT_QUEUE = "default" +# Centralized list of all queue names +QUEUE_NAMES = [DEFAULT_QUEUE, TRANSCRIPTION_QUEUE, MEMORY_QUEUE, AUDIO_QUEUE] + # Job retention configuration JOB_RESULT_TTL = int(os.getenv("RQ_RESULT_TTL", 3600)) # 1 hour default # Create queues with custom result TTL transcription_queue = Queue(TRANSCRIPTION_QUEUE, connection=redis_conn, default_timeout=300) memory_queue = Queue(MEMORY_QUEUE, connection=redis_conn, default_timeout=300) +audio_queue = Queue(AUDIO_QUEUE, connection=redis_conn, default_timeout=3600) # 1 hour timeout for long sessions default_queue = Queue(DEFAULT_QUEUE, connection=redis_conn, default_timeout=300) @@ -46,50 +53,14 @@ def get_queue(queue_name: str = DEFAULT_QUEUE) -> Queue: queues = { TRANSCRIPTION_QUEUE: transcription_queue, MEMORY_QUEUE: memory_queue, + AUDIO_QUEUE: audio_queue, DEFAULT_QUEUE: default_queue, } return queues.get(queue_name, default_queue) -async def _ensure_beanie_initialized(): - """Ensure Beanie is initialized in the current process (for RQ workers).""" - global _beanie_initialized - - if _beanie_initialized: - return - - try: - from motor.motor_asyncio import AsyncIOMotorClient - from beanie import init_beanie - from advanced_omi_backend.models.conversation import Conversation - from advanced_omi_backend.models.audio_file import AudioFile - from advanced_omi_backend.models.user import User - - # Get MongoDB URI from environment - mongodb_uri = os.getenv("MONGODB_URI", "mongodb://localhost:27017") - - # Create MongoDB client - client = AsyncIOMotorClient(mongodb_uri) - database = client.get_default_database("friend-lite") - - # Initialize Beanie - await init_beanie( - database=database, - document_models=[User, Conversation, AudioFile], - ) - - _beanie_initialized = True - logger.info("βœ… Beanie initialized in RQ worker process") - - except Exception as e: - logger.error(f"❌ Failed to initialize Beanie in RQ worker: {e}") - raise - - def get_job_stats() -> Dict[str, Any]: """Get statistics about jobs in all queues matching frontend expectations.""" - from datetime import datetime - total_jobs = 0 queued_jobs = 0 processing_jobs = 0 @@ -98,7 +69,7 @@ def get_job_stats() -> Dict[str, Any]: cancelled_jobs = 0 deferred_jobs = 0 # Jobs waiting for dependencies (depends_on) - for queue_name in [TRANSCRIPTION_QUEUE, MEMORY_QUEUE, DEFAULT_QUEUE]: + for queue_name in QUEUE_NAMES: queue = get_queue(queue_name) queued_jobs += len(queue) @@ -136,7 +107,7 @@ def get_jobs(limit: int = 20, offset: int = 0, queue_name: str = None) -> Dict[s """ all_jobs = [] - queues_to_check = [queue_name] if queue_name else [TRANSCRIPTION_QUEUE, MEMORY_QUEUE, DEFAULT_QUEUE] + queues_to_check = [queue_name] if queue_name else QUEUE_NAMES for qname in queues_to_check: queue = get_queue(qname) @@ -172,6 +143,7 @@ def get_jobs(limit: int = 20, offset: int = 0, queue_name: str = None) -> Dict[s "queue": qname, }, "result": job.result if hasattr(job, 'result') else None, + "meta": job.meta if job.meta else {}, # Include job metadata "error_message": str(job.exc_info) if job.exc_info else None, "created_at": job.created_at.isoformat() if job.created_at else None, "started_at": job.started_at.isoformat() if job.started_at else None, @@ -207,12 +179,8 @@ def all_jobs_complete_for_session(session_id: str) -> bool: """ Check if all jobs associated with a session are in terminal states. - A session is considered complete only when all its jobs are in terminal states - (completed, failed, or cancelled). Jobs that are queued or processing keep the - session in active state. - - This function now traverses dependency chains to find dependent jobs that may - not be in any registry yet (they're stored via job.dependent_ids). + Only checks jobs with audio_uuid in job.meta (no backward compatibility). + Traverses dependency chains to include dependent jobs. Args: session_id: The audio_uuid (session ID) to check jobs for @@ -220,135 +188,79 @@ def all_jobs_complete_for_session(session_id: str) -> bool: Returns: True if all jobs are complete (or no jobs found), False if any job is still processing """ - from rq.registry import ScheduledJobRegistry, DeferredJobRegistry - from advanced_omi_backend.models.conversation import Conversation - import asyncio - - # First, get conversation_id(s) for this session (for memory jobs) - conversation_ids = set() - try: - # Run async query in sync context - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - conversations = loop.run_until_complete( - Conversation.find(Conversation.audio_uuid == session_id).to_list() - ) - conversation_ids = {conv.conversation_id for conv in conversations} - loop.close() - except Exception as e: - logger.debug(f"Error fetching conversations for session {session_id}: {e}") + processed_job_ids = set() - processed_job_ids = set() # Track which jobs we've already checked - session_jobs_found = [] # Track all jobs found for this session - - def check_job_and_dependents(job): - """ - Recursively check a job and all its dependents. - Returns True if all are terminal, False if any are non-terminal. - """ + def is_job_complete(job): + """Recursively check if job and all its dependents are terminal.""" if job.id in processed_job_ids: return True - processed_job_ids.add(job.id) - # Check if this job is in a terminal state - is_terminal = job.is_finished or job.is_failed or job.is_canceled - - if not is_terminal: - # Job is still queued, processing, or scheduled - session not complete - logger.debug(f"Job {job.id} ({job.func_name}) is not terminal (queued/processing/scheduled)") + # Check if this job is terminal + if not (job.is_finished or job.is_failed or job.is_canceled): + logger.debug(f"Job {job.id} ({job.func_name}) is not terminal") return False - # Check dependent jobs (jobs that depend on this one) - try: - dependent_ids = job.dependent_ids - if dependent_ids: - logger.debug(f"Job {job.id} has {len(dependent_ids)} dependents") - for dep_id in dependent_ids: - try: - dep_job = Job.fetch(dep_id, connection=redis_conn) - # Recursively check dependent job - if not check_job_and_dependents(dep_job): - return False - except Exception as e: - logger.debug(f"Error fetching dependent job {dep_id}: {e}") - except Exception as e: - logger.debug(f"Error checking dependents for job {job.id}: {e}") + # Check dependent jobs + for dep_id in (job.dependent_ids or []): + try: + dep_job = Job.fetch(dep_id, connection=redis_conn) + if not is_job_complete(dep_job): + return False + except Exception as e: + logger.debug(f"Error fetching dependent job {dep_id}: {e}") return True - # Check all queues and registries - for queue in [transcription_queue, memory_queue, default_queue]: - # Check all job registries for this queue (including scheduled/deferred) + # Find all jobs for this session + all_queues = [transcription_queue, memory_queue, audio_queue, default_queue] + for queue in all_queues: registries = [ - queue.job_ids, # Queued jobs - queue.started_job_registry.get_job_ids(), # Processing jobs - queue.finished_job_registry.get_job_ids(), # Completed - queue.failed_job_registry.get_job_ids(), # Failed - queue.canceled_job_registry.get_job_ids(), # Cancelled - ScheduledJobRegistry(queue=queue).get_job_ids(), # Scheduled (dependent jobs) - DeferredJobRegistry(queue=queue).get_job_ids(), # Deferred (retrying) + queue.job_ids, + queue.started_job_registry.get_job_ids(), + queue.finished_job_registry.get_job_ids(), + queue.failed_job_registry.get_job_ids(), + queue.canceled_job_registry.get_job_ids(), + ScheduledJobRegistry(queue=queue).get_job_ids(), + DeferredJobRegistry(queue=queue).get_job_ids(), ] for job_ids in registries: for job_id in job_ids: try: job = Job.fetch(job_id, connection=redis_conn) - matches_session = False - - # Check job.meta first (preferred method for all new jobs) - if job.meta and 'audio_uuid' in job.meta: - if job.meta['audio_uuid'] == session_id: - matches_session = True - # FALLBACK: Check args for backward compatibility - elif job.args and len(job.args) > 0: - # Check args[0] first (most common for streaming jobs) - if job.args[0] == session_id: - matches_session = True - # Check args[1] for transcription jobs - elif len(job.args) > 1 and job.args[1] == session_id: - matches_session = True - # Check args[3] for memory jobs (conversation_id) - elif len(job.args) > 3 and job.args[3] in conversation_ids: - matches_session = True - - if matches_session: - session_jobs_found.append(job.id) - # Check this job and all its dependents - if not check_job_and_dependents(job): - logger.debug(f"Session {session_id} has incomplete jobs (found {len(session_jobs_found)} jobs)") - return False + # Only check jobs with audio_uuid in meta + if job.meta and job.meta.get('audio_uuid') == session_id: + if not is_job_complete(job): + return False except Exception as e: logger.debug(f"Error checking job {job_id}: {e}") - continue - # All jobs are in terminal states (or no jobs found) - logger.debug(f"Session {session_id} all jobs complete ({len(session_jobs_found)} jobs checked)") return True def start_streaming_jobs( session_id: str, user_id: str, - user_email: str, client_id: str ) -> Dict[str, str]: """ - Enqueue jobs for streaming audio session. + Enqueue jobs for streaming audio session (initial session setup). - This starts the parallel job processing for a streaming session: + This starts the parallel job processing for a NEW streaming session: 1. Speech detection job - monitors transcription results for speech - 2. Audio persistence job - writes audio chunks to WAV file + 2. Audio persistence job - writes audio chunks to WAV file (file rotation per conversation) Args: session_id: Stream session ID (audio_uuid) user_id: User identifier - user_email: User email client_id: Client identifier Returns: Dict with job IDs: {'speech_detection': job_id, 'audio_persistence': job_id} + + Note: user_email is fetched from the database when needed. """ from advanced_omi_backend.workers.transcription_jobs import stream_speech_detection_job from advanced_omi_backend.workers.audio_jobs import audio_streaming_persistence_job @@ -358,30 +270,37 @@ def start_streaming_jobs( stream_speech_detection_job, session_id, user_id, - user_email, client_id, job_timeout=3600, # 1 hour for long recordings result_ttl=JOB_RESULT_TTL, job_id=f"speech-detect_{session_id[:12]}", - description=f"Stream speech detection for {session_id[:12]}", - meta={'audio_uuid': session_id} + description=f"Listening for speech...", + meta={'audio_uuid': session_id, 'client_id': client_id, 'session_level': True} ) logger.info(f"πŸ“₯ RQ: Enqueued speech detection job {speech_job.id}") - # Enqueue audio persistence job in parallel - audio_job = transcription_queue.enqueue( + # Store job ID for cleanup (keyed by client_id for easy WebSocket cleanup) + try: + redis_conn.set(f"speech_detection_job:{client_id}", speech_job.id, ex=3600) # 1 hour TTL + logger.info(f"πŸ“Œ Stored speech detection job ID for client {client_id}") + except Exception as e: + logger.warning(f"⚠️ Failed to store job ID for {client_id}: {e}") + + # Enqueue audio persistence job on dedicated audio queue + # NOTE: This job handles file rotation for multiple conversations automatically + # Runs for entire session, not tied to individual conversations + audio_job = audio_queue.enqueue( audio_streaming_persistence_job, session_id, user_id, - user_email, client_id, job_timeout=3600, # 1 hour for long recordings result_ttl=JOB_RESULT_TTL, job_id=f"audio-persist_{session_id[:12]}", - description=f"Audio persistence for {session_id[:12]}", - meta={'audio_uuid': session_id} + description=f"Audio persistence for session {session_id[:12]}", + meta={'audio_uuid': session_id, 'session_level': True} # Mark as session-level job ) - logger.info(f"πŸ“₯ RQ: Enqueued audio persistence job {audio_job.id}") + logger.info(f"πŸ“₯ RQ: Enqueued audio persistence job {audio_job.id} on audio queue") return { 'speech_detection': speech_job.id, @@ -389,104 +308,153 @@ def start_streaming_jobs( } -def start_batch_processing_jobs( +def start_post_conversation_jobs( conversation_id: str, audio_uuid: str, + audio_file_path: str, user_id: str, - user_email: str, - audio_file_path: str + post_transcription: bool = True, + transcript_version_id: Optional[str] = None, + depends_on_job = None ) -> Dict[str, str]: """ - Enqueue complete batch processing job chain with dependencies. + Start post-conversation processing jobs after conversation is created. - This creates the full processing pipeline: - 1. Transcription job (transcribe audio file) - 2. Speaker recognition job (depends on transcription) - 3. Memory extraction job (depends on speaker recognition) + This creates the standard processing chain after a conversation is created: + 1. Audio cropping job - Removes silence from audio + 2. [Optional] Transcription job - Batch transcription (if post_transcription=True) + 3. Speaker recognition job - Identifies speakers in audio + 4. Memory extraction job - Extracts memories from conversation (parallel) + 5. Title/summary generation job - Generates title and summary (parallel) Args: conversation_id: Conversation identifier - audio_uuid: Audio file UUID - user_id: User identifier - user_email: User email + audio_uuid: Audio UUID for job tracking audio_file_path: Path to audio file + user_id: User identifier + post_transcription: If True, run batch transcription step (for uploads) + If False, skip transcription (streaming already has it) + transcript_version_id: Transcript version ID (auto-generated if None) + depends_on_job: Optional job dependency for cropping job Returns: - Dict with job IDs: { - 'transcription': job_id, - 'speaker_recognition': job_id, - 'memory': job_id - } + Dict with job IDs (transcription will be None if post_transcription=False) """ - import uuid from advanced_omi_backend.workers.transcription_jobs import transcribe_full_audio_job - from advanced_omi_backend.workers.transcription_jobs import recognise_speakers_job + from advanced_omi_backend.workers.speaker_jobs import recognise_speakers_job + from advanced_omi_backend.workers.audio_jobs import process_cropping_job from advanced_omi_backend.workers.memory_jobs import process_memory_job + from advanced_omi_backend.workers.conversation_jobs import generate_title_summary_job + + version_id = transcript_version_id or str(uuid.uuid4()) - # Generate version IDs for transcript and speaker processing - transcript_version_id = str(uuid.uuid4()) + # Step 1: Audio cropping job + crop_job_id = f"crop_{conversation_id[:12]}" + logger.info(f"πŸ” DEBUG: Creating crop job with job_id={crop_job_id}, conversation_id={conversation_id[:12]}, audio_uuid={audio_uuid[:12]}") - # Step 1: Transcription job (no dependencies) - # Signature: transcribe_full_audio_job(conversation_id, audio_uuid, audio_path, version_id, user_id, trigger, redis_client) - transcription_job = transcription_queue.enqueue( - transcribe_full_audio_job, + cropping_job = default_queue.enqueue( + process_cropping_job, conversation_id, - audio_uuid, audio_file_path, - transcript_version_id, - user_id, - "batch", # trigger - job_timeout=getattr(transcribe_full_audio_job, 'job_timeout', 1800), # Use decorator default or 30 min - result_ttl=getattr(transcribe_full_audio_job, 'result_ttl', JOB_RESULT_TTL), - job_id=f"transcribe_{audio_uuid[:12]}", - description=f"Transcribe audio {audio_uuid[:12]}", + job_timeout=300, # 5 minutes + result_ttl=JOB_RESULT_TTL, + depends_on=depends_on_job, + job_id=crop_job_id, + description=f"Crop audio for conversation {conversation_id[:8]}", meta={'audio_uuid': audio_uuid, 'conversation_id': conversation_id} ) - logger.info(f"πŸ“₯ RQ: Enqueued transcription job {transcription_job.id}") + logger.info(f"πŸ“₯ RQ: Enqueued cropping job {cropping_job.id}, meta={cropping_job.meta}") + + # Step 2: Transcription job (conditional) + transcription_job = None + if post_transcription: + transcribe_job_id = f"transcribe_{conversation_id[:12]}" + logger.info(f"πŸ” DEBUG: Creating transcribe job with job_id={transcribe_job_id}, conversation_id={conversation_id[:12]}, audio_uuid={audio_uuid[:12]}") + + transcription_job = transcription_queue.enqueue( + transcribe_full_audio_job, + conversation_id, + audio_uuid, + audio_file_path, + version_id, + "batch", # trigger + job_timeout=1800, # 30 minutes + result_ttl=JOB_RESULT_TTL, + depends_on=cropping_job, + job_id=transcribe_job_id, + description=f"Transcribe conversation {conversation_id[:8]}", + meta={'audio_uuid': audio_uuid, 'conversation_id': conversation_id} + ) + logger.info(f"πŸ“₯ RQ: Enqueued transcription job {transcription_job.id}, meta={transcription_job.meta} (depends on {cropping_job.id})") + speaker_depends_on = transcription_job + else: + logger.info(f"⏭️ RQ: Skipping transcription (streaming already has transcript)") + speaker_depends_on = cropping_job + + # Step 3: Speaker recognition job + speaker_job_id = f"speaker_{conversation_id[:12]}" + logger.info(f"πŸ” DEBUG: Creating speaker job with job_id={speaker_job_id}, conversation_id={conversation_id[:12]}, audio_uuid={audio_uuid[:12]}") - # Step 2: Speaker recognition job (depends on transcription) - # Signature: recognise_speakers_job(conversation_id, version_id, audio_path, user_id, transcript_text, words, redis_client) speaker_job = transcription_queue.enqueue( recognise_speakers_job, conversation_id, - transcript_version_id, + version_id, audio_file_path, - user_id, "", # transcript_text - will be read from DB [], # words - will be read from DB - job_timeout=getattr(recognise_speakers_job, 'job_timeout', 1200), # Use decorator default or 20 min - result_ttl=getattr(recognise_speakers_job, 'result_ttl', JOB_RESULT_TTL), - depends_on=transcription_job, - job_id=f"speaker_{audio_uuid[:12]}", - description=f"Speaker recognition for {audio_uuid[:12]}", + job_timeout=1200, # 20 minutes + result_ttl=JOB_RESULT_TTL, + depends_on=speaker_depends_on, + job_id=speaker_job_id, + description=f"Speaker recognition for conversation {conversation_id[:8]}", meta={'audio_uuid': audio_uuid, 'conversation_id': conversation_id} ) - logger.info(f"πŸ“₯ RQ: Enqueued speaker recognition job {speaker_job.id} (depends on {transcription_job.id})") + logger.info(f"πŸ“₯ RQ: Enqueued speaker recognition job {speaker_job.id}, meta={speaker_job.meta} (depends on {speaker_depends_on.id})") + + # Step 4: Memory extraction job (parallel with title/summary) + memory_job_id = f"memory_{conversation_id[:12]}" + logger.info(f"πŸ” DEBUG: Creating memory job with job_id={memory_job_id}, conversation_id={conversation_id[:12]}, audio_uuid={audio_uuid[:12]}") - # Step 3: Memory extraction job (depends on speaker recognition) - # Signature: process_memory_job(client_id, user_id, user_email, conversation_id, redis_client) memory_job = memory_queue.enqueue( process_memory_job, - None, # client_id - will be read from conversation in DB - user_id, - user_email, conversation_id, - job_timeout=getattr(process_memory_job, 'job_timeout', 900), # Use decorator default or 15 min - result_ttl=getattr(process_memory_job, 'result_ttl', JOB_RESULT_TTL), + job_timeout=900, # 15 minutes + result_ttl=JOB_RESULT_TTL, depends_on=speaker_job, - job_id=f"memory_{audio_uuid[:12]}", - description=f"Memory extraction for {audio_uuid[:12]}", + job_id=memory_job_id, + description=f"Memory extraction for conversation {conversation_id[:8]}", + meta={'audio_uuid': audio_uuid, 'conversation_id': conversation_id} + ) + logger.info(f"πŸ“₯ RQ: Enqueued memory extraction job {memory_job.id}, meta={memory_job.meta} (depends on {speaker_job.id})") + + # Step 5: Title/summary generation job (parallel with memory, independent) + # This ensures conversations always get titles/summaries even if memory job fails + title_job_id = f"title_summary_{conversation_id[:12]}" + logger.info(f"πŸ” DEBUG: Creating title/summary job with job_id={title_job_id}, conversation_id={conversation_id[:12]}, audio_uuid={audio_uuid[:12]}") + + title_summary_job = default_queue.enqueue( + generate_title_summary_job, + conversation_id, + job_timeout=300, # 5 minutes + result_ttl=JOB_RESULT_TTL, + depends_on=speaker_job, # Depends on speaker job, NOT memory job + job_id=title_job_id, + description=f"Generate title and summary for conversation {conversation_id[:8]}", meta={'audio_uuid': audio_uuid, 'conversation_id': conversation_id} ) - logger.info(f"πŸ“₯ RQ: Enqueued memory extraction job {memory_job.id} (depends on {speaker_job.id})") + logger.info(f"πŸ“₯ RQ: Enqueued title/summary job {title_summary_job.id}, meta={title_summary_job.meta} (depends on {speaker_job.id})") return { - 'transcription': transcription_job.id, + 'cropping': cropping_job.id, + 'transcription': transcription_job.id if transcription_job else None, 'speaker_recognition': speaker_job.id, - 'memory': memory_job.id + 'memory': memory_job.id, + 'title_summary': title_summary_job.id } + + def get_queue_health() -> Dict[str, Any]: """Get health status of all queues and workers.""" health = { @@ -507,7 +475,7 @@ def get_queue_health() -> Dict[str, Any]: return health # Check each queue - for queue_name in [TRANSCRIPTION_QUEUE, MEMORY_QUEUE, DEFAULT_QUEUE]: + for queue_name in QUEUE_NAMES: queue = get_queue(queue_name) health["queues"][queue_name] = { "count": len(queue), @@ -538,3 +506,214 @@ def get_queue_health() -> Dict[str, Any]: }) return health + +# needs tidying but works for now +async def cleanup_stuck_stream_workers(request): + """Clean up stuck Redis Stream consumers and pending messages from all active streams.""" + import time + from fastapi.responses import JSONResponse + + try: + # Get Redis client from request.app.state (initialized during startup) + redis_client = request.app.state.redis_audio_stream + + if not redis_client: + return JSONResponse( + status_code=503, + content={"error": "Redis client for audio streaming not initialized"} + ) + + cleanup_results = {} + total_cleaned = 0 + total_deleted_consumers = 0 + total_deleted_streams = 0 + current_time = time.time() + + # Discover all audio streams (per-client streams) + stream_keys = await redis_client.keys("audio:stream:*") + + for stream_key in stream_keys: + stream_name = stream_key.decode() if isinstance(stream_key, bytes) else stream_key + + try: + # First check stream age - delete old streams (>1 hour) immediately + stream_info = await redis_client.execute_command('XINFO', 'STREAM', stream_name) + + # Parse stream info + info_dict = {} + for i in range(0, len(stream_info), 2): + key_name = stream_info[i].decode() if isinstance(stream_info[i], bytes) else str(stream_info[i]) + info_dict[key_name] = stream_info[i+1] + + stream_length = int(info_dict.get("length", 0)) + last_entry = info_dict.get("last-entry") + + # Check if stream is old + should_delete_stream = False + stream_age = 0 + + if stream_length == 0: + should_delete_stream = True + stream_age = 0 + elif last_entry and isinstance(last_entry, list) and len(last_entry) > 0: + try: + last_id = last_entry[0] + if isinstance(last_id, bytes): + last_id = last_id.decode() + last_timestamp_ms = int(last_id.split('-')[0]) + last_timestamp_s = last_timestamp_ms / 1000 + stream_age = current_time - last_timestamp_s + + # Delete streams older than 1 hour (3600 seconds) + if stream_age > 3600: + should_delete_stream = True + except (ValueError, IndexError): + pass + + if should_delete_stream: + await redis_client.delete(stream_name) + total_deleted_streams += 1 + cleanup_results[stream_name] = { + "message": f"Deleted old stream (age: {stream_age:.0f}s, length: {stream_length})", + "cleaned": 0, + "deleted_consumers": 0, + "deleted_stream": True, + "stream_age": stream_age + } + continue + + # Get consumer groups + groups = await redis_client.execute_command('XINFO', 'GROUPS', stream_name) + + if not groups: + cleanup_results[stream_name] = {"message": "No consumer groups found", "cleaned": 0, "deleted_stream": False} + continue + + # Parse first group + group_dict = {} + group = groups[0] + for i in range(0, len(group), 2): + key = group[i].decode() if isinstance(group[i], bytes) else str(group[i]) + value = group[i+1] + if isinstance(value, bytes): + try: + value = value.decode() + except UnicodeDecodeError: + value = str(value) + group_dict[key] = value + + group_name = group_dict.get("name", "unknown") + if isinstance(group_name, bytes): + group_name = group_name.decode() + + pending_count = int(group_dict.get("pending", 0)) + + # Get consumers for this group to check per-consumer pending + consumers = await redis_client.execute_command('XINFO', 'CONSUMERS', stream_name, group_name) + + cleaned_count = 0 + total_consumer_pending = 0 + + # Clean up pending messages for each consumer AND delete dead consumers + deleted_consumers = 0 + for consumer in consumers: + consumer_dict = {} + for i in range(0, len(consumer), 2): + key = consumer[i].decode() if isinstance(consumer[i], bytes) else str(consumer[i]) + value = consumer[i+1] + if isinstance(value, bytes): + try: + value = value.decode() + except UnicodeDecodeError: + value = str(value) + consumer_dict[key] = value + + consumer_name = consumer_dict.get("name", "unknown") + if isinstance(consumer_name, bytes): + consumer_name = consumer_name.decode() + + consumer_pending = int(consumer_dict.get("pending", 0)) + consumer_idle_ms = int(consumer_dict.get("idle", 0)) + total_consumer_pending += consumer_pending + + # Check if consumer is dead (idle > 5 minutes = 300000ms) + is_dead = consumer_idle_ms > 300000 + + if consumer_pending > 0: + logger.info(f"Found {consumer_pending} pending messages for consumer {consumer_name} (idle: {consumer_idle_ms}ms)") + + # Get pending messages for this specific consumer + try: + pending_messages = await redis_client.execute_command( + 'XPENDING', stream_name, group_name, '-', '+', str(consumer_pending), consumer_name + ) + + # XPENDING returns flat list: [msg_id, consumer, idle_ms, delivery_count, msg_id, ...] + # Parse in groups of 4 + for i in range(0, len(pending_messages), 4): + if i < len(pending_messages): + msg_id = pending_messages[i] + if isinstance(msg_id, bytes): + msg_id = msg_id.decode() + + # Claim the message to a cleanup worker + try: + await redis_client.execute_command( + 'XCLAIM', stream_name, group_name, 'cleanup-worker', '0', msg_id + ) + + # Acknowledge it immediately + await redis_client.xack(stream_name, group_name, msg_id) + cleaned_count += 1 + except Exception as claim_error: + logger.warning(f"Failed to claim/ack message {msg_id}: {claim_error}") + + except Exception as consumer_error: + logger.error(f"Error processing consumer {consumer_name}: {consumer_error}") + + # Delete dead consumers (idle > 5 minutes with no pending messages) + if is_dead and consumer_pending == 0: + try: + await redis_client.execute_command( + 'XGROUP', 'DELCONSUMER', stream_name, group_name, consumer_name + ) + deleted_consumers += 1 + logger.info(f"🧹 Deleted dead consumer {consumer_name} (idle: {consumer_idle_ms}ms)") + except Exception as delete_error: + logger.warning(f"Failed to delete consumer {consumer_name}: {delete_error}") + + if total_consumer_pending == 0 and deleted_consumers == 0: + cleanup_results[stream_name] = {"message": "No pending messages or dead consumers", "cleaned": 0, "deleted_consumers": 0, "deleted_stream": False} + continue + + total_cleaned += cleaned_count + total_deleted_consumers += deleted_consumers + cleanup_results[stream_name] = { + "message": f"Cleaned {cleaned_count} pending messages, deleted {deleted_consumers} dead consumers", + "cleaned": cleaned_count, + "deleted_consumers": deleted_consumers, + "deleted_stream": False, + "original_pending": pending_count + } + + except Exception as e: + cleanup_results[stream_name] = { + "error": str(e), + "cleaned": 0 + } + + return { + "success": True, + "total_cleaned": total_cleaned, + "total_deleted_consumers": total_deleted_consumers, + "total_deleted_streams": total_deleted_streams, + "streams": cleanup_results, # New key for per-stream results + "providers": cleanup_results, # Keep for backward compatibility with frontend + "timestamp": time.time() + } + + except Exception as e: + logger.error(f"Error cleaning up stuck workers: {e}", exc_info=True) + return JSONResponse( + status_code=500, content={"error": f"Failed to cleanup stuck workers: {str(e)}"} + ) diff --git a/backends/advanced/src/advanced_omi_backend/controllers/session_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/session_controller.py new file mode 100644 index 00000000..a3836898 --- /dev/null +++ b/backends/advanced/src/advanced_omi_backend/controllers/session_controller.py @@ -0,0 +1,586 @@ +""" +Session controller for handling audio session-related business logic. + +This module manages Redis-based audio streaming sessions, including: +- Session metadata and status +- Conversation counts per session +- Session lifecycle tracking +""" + +import logging +import time +from typing import Dict, List, Optional + +from fastapi.responses import JSONResponse + +logger = logging.getLogger(__name__) + + +async def get_session_info(redis_client, session_id: str) -> Optional[Dict]: + """ + Get detailed information about a specific session. + + Args: + redis_client: Redis async client + session_id: Session UUID + + Returns: + Dict with session information or None if not found + """ + try: + session_key = f"audio:session:{session_id}" + session_data = await redis_client.hgetall(session_key) + + if not session_data: + return None + + # Get conversation count for this session + conversation_count_key = f"session:conversation_count:{session_id}" + conversation_count_bytes = await redis_client.get(conversation_count_key) + conversation_count = int(conversation_count_bytes.decode()) if conversation_count_bytes else 0 + + started_at = float(session_data.get(b"started_at", b"0")) + last_chunk_at = float(session_data.get(b"last_chunk_at", b"0")) + + return { + "session_id": session_id, + "user_id": session_data.get(b"user_id", b"").decode(), + "client_id": session_data.get(b"client_id", b"").decode(), + "provider": session_data.get(b"provider", b"").decode(), + "mode": session_data.get(b"mode", b"").decode(), + "status": session_data.get(b"status", b"").decode(), + "chunks_published": int(session_data.get(b"chunks_published", b"0")), + "started_at": started_at, + "last_chunk_at": last_chunk_at, + "age_seconds": time.time() - started_at, + "idle_seconds": time.time() - last_chunk_at, + "conversation_count": conversation_count, + # Speech detection events + "last_event": session_data.get(b"last_event", b"").decode(), + "speech_detected_at": session_data.get(b"speech_detected_at", b"").decode(), + "speaker_check_status": session_data.get(b"speaker_check_status", b"").decode(), + "identified_speakers": session_data.get(b"identified_speakers", b"").decode() + } + + except Exception as e: + logger.error(f"Error getting session info for {session_id}: {e}") + return None + + +async def get_all_sessions(redis_client, limit: int = 100) -> List[Dict]: + """ + Get information about all active sessions. + + Args: + redis_client: Redis async client + limit: Maximum number of sessions to return + + Returns: + List of session info dictionaries + """ + try: + # Get all session keys + session_keys = [] + cursor = b"0" + while cursor and len(session_keys) < limit: + cursor, keys = await redis_client.scan( + cursor, match="audio:session:*", count=limit + ) + session_keys.extend(keys[:limit - len(session_keys)]) + + # Get info for each session + sessions = [] + for key in session_keys: + session_id = key.decode().replace("audio:session:", "") + session_info = await get_session_info(redis_client, session_id) + if session_info: + sessions.append(session_info) + + return sessions + + except Exception as e: + logger.error(f"Error getting all sessions: {e}") + return [] + + +async def get_session_conversation_count(redis_client, session_id: str) -> int: + """ + Get the conversation count for a specific session. + + Args: + redis_client: Redis async client + session_id: Session UUID + + Returns: + Number of conversations created in this session + """ + try: + conversation_count_key = f"session:conversation_count:{session_id}" + conversation_count_bytes = await redis_client.get(conversation_count_key) + return int(conversation_count_bytes.decode()) if conversation_count_bytes else 0 + except Exception as e: + logger.error(f"Error getting conversation count for session {session_id}: {e}") + return 0 + + +async def increment_session_conversation_count(redis_client, session_id: str) -> int: + """ + Increment and return the conversation count for a session. + + Args: + redis_client: Redis async client + session_id: Session UUID + + Returns: + New conversation count + """ + try: + conversation_count_key = f"session:conversation_count:{session_id}" + count = await redis_client.incr(conversation_count_key) + await redis_client.expire(conversation_count_key, 3600) # 1 hour TTL + logger.info(f"πŸ“Š Conversation count for session {session_id}: {count}") + return count + except Exception as e: + logger.error(f"Error incrementing conversation count for session {session_id}: {e}") + return 0 + + +async def get_streaming_status(request): + """Get status of active streaming sessions and Redis Streams health.""" + from advanced_omi_backend.controllers.queue_controller import ( + transcription_queue, + memory_queue, + default_queue, + all_jobs_complete_for_session + ) + + try: + # Get Redis client from request.app.state (initialized during startup) + redis_client = request.app.state.redis_audio_stream + + if not redis_client: + return JSONResponse( + status_code=503, + content={"error": "Redis client for audio streaming not initialized"} + ) + + # Get all sessions (both active and completed) + session_keys = await redis_client.keys("audio:session:*") + active_sessions = [] + completed_sessions_from_redis = [] + + for key in session_keys: + session_id = key.decode().split(":")[-1] + + # Use session_controller to get complete session info including conversation_count + session_obj = await get_session_info(redis_client, session_id) + if not session_obj: + continue + + status = session_obj.get("status", "") + + # Separate active and completed sessions + # Check if all jobs are complete (including failed jobs) + all_jobs_done = all_jobs_complete_for_session(session_id) + + # Session is completed if: + # 1. Redis status says complete/finalized AND all jobs done, OR + # 2. All jobs are done (even if status isn't complete yet) + # This ensures sessions with failed jobs move to completed + if status in ["complete", "completed", "finalized"] or all_jobs_done: + if all_jobs_done: + # All jobs complete - this is truly a completed session + # Update Redis status if it wasn't already marked complete + if status not in ["complete", "completed", "finalized"]: + await redis_client.hset(key, "status", "complete") + logger.info(f"βœ… Marked session {session_id} as complete (all jobs terminal)") + + # Get additional session data for completed sessions + session_key = f"audio:session:{session_id}" + session_data = await redis_client.hgetall(session_key) + + completed_sessions_from_redis.append({ + "session_id": session_id, + "client_id": session_obj.get("client_id", ""), + "conversation_id": session_data.get(b"conversation_id", b"").decode() if session_data and b"conversation_id" in session_data else None, + "has_conversation": bool(session_data and session_data.get(b"conversation_id", b"")), + "action": session_data.get(b"action", b"complete").decode() if session_data and b"action" in session_data else "complete", + "reason": session_data.get(b"reason", b"").decode() if session_data and b"reason" in session_data else "", + "completed_at": session_obj.get("last_chunk_at", 0), + "audio_file": session_data.get(b"audio_file", b"").decode() if session_data and b"audio_file" in session_data else "", + "conversation_count": session_obj.get("conversation_count", 0) + }) + else: + # Status says complete but jobs still processing - keep in active + active_sessions.append(session_obj) + else: + # This is an active session + active_sessions.append(session_obj) + + # Get stream health for all streams (per-client streams) + # Categorize as active or completed based on consumer activity + active_streams = {} + completed_streams = {} + + # Create a map of client_id to session for quick lookup + client_to_session = {} + for session in active_sessions + completed_sessions_from_redis: + client_id = session.get("client_id") + if client_id: + client_to_session[client_id] = session + + # Discover all audio streams + stream_keys = await redis_client.keys("audio:stream:*") + current_time = time.time() + + for stream_key in stream_keys: + stream_name = stream_key.decode() if isinstance(stream_key, bytes) else stream_key + try: + # Check if stream exists + stream_info = await redis_client.execute_command('XINFO', 'STREAM', stream_name) + + # Parse stream info (returns flat list of key-value pairs) + info_dict = {} + for i in range(0, len(stream_info), 2): + key = stream_info[i].decode() if isinstance(stream_info[i], bytes) else str(stream_info[i]) + value = stream_info[i+1] + + # Skip complex binary structures like first-entry and last-entry + # which contain message data that can't be JSON serialized + if key in ["first-entry", "last-entry"]: + # Just extract the message ID (first element) + if isinstance(value, list) and len(value) > 0: + msg_id = value[0] + if isinstance(msg_id, bytes): + msg_id = msg_id.decode() + value = msg_id + else: + value = None + elif isinstance(value, bytes): + try: + value = value.decode() + except UnicodeDecodeError: + # Binary data that can't be decoded, skip it + value = "" + + info_dict[key] = value + + # Calculate stream age from last entry (for determining if stream is stale) + stream_age_seconds = 0 + last_entry_id = info_dict.get("last-entry") + if last_entry_id: + try: + # Redis Stream IDs format: "milliseconds-sequence" + last_timestamp_ms = int(last_entry_id.split('-')[0]) + last_timestamp_s = last_timestamp_ms / 1000 + stream_age_seconds = current_time - last_timestamp_s + except (ValueError, IndexError, AttributeError): + stream_age_seconds = 0 + + # Extract client_id from stream name (audio:stream:{client_id}) + client_id = stream_name.split(":")[-1] if ":" in stream_name else "" + + # Get session age from associated session (more meaningful than stream age) + session_age_seconds = 0 + session_idle_seconds = 0 + if client_id and client_id in client_to_session: + session_data = client_to_session[client_id] + session_age_seconds = session_data.get("age_seconds", 0) + session_idle_seconds = session_data.get("idle_seconds", 0) + + # Get consumer groups + groups = await redis_client.execute_command('XINFO', 'GROUPS', stream_name) + + stream_data = { + "stream_length": info_dict.get("length", 0), + "first_entry_id": info_dict.get("first-entry"), + "last_entry_id": last_entry_id, + "session_age_seconds": session_age_seconds, # Age since session started + "session_idle_seconds": session_idle_seconds, # Time since last audio chunk + "client_id": client_id, # Include client_id for reference + "consumer_groups": [] + } + + # Track if stream has any active consumers + has_active_consumer = False + min_consumer_idle_ms = float('inf') + + # Parse consumer groups + for group in groups: + group_dict = {} + for i in range(0, len(group), 2): + key = group[i].decode() if isinstance(group[i], bytes) else str(group[i]) + value = group[i+1] + if isinstance(value, bytes): + try: + value = value.decode() + except UnicodeDecodeError: + value = "" + group_dict[key] = value + + group_name = group_dict.get("name", "unknown") + if isinstance(group_name, bytes): + group_name = group_name.decode() + + # Get consumers for this group + consumers = await redis_client.execute_command('XINFO', 'CONSUMERS', stream_name, group_name) + consumer_list = [] + consumer_pending_total = 0 + + for consumer in consumers: + consumer_dict = {} + for i in range(0, len(consumer), 2): + key = consumer[i].decode() if isinstance(consumer[i], bytes) else str(consumer[i]) + value = consumer[i+1] + if isinstance(value, bytes): + try: + value = value.decode() + except UnicodeDecodeError: + value = "" + consumer_dict[key] = value + + consumer_name = consumer_dict.get("name", "unknown") + if isinstance(consumer_name, bytes): + consumer_name = consumer_name.decode() + + consumer_pending = int(consumer_dict.get("pending", 0)) + consumer_idle_ms = int(consumer_dict.get("idle", 0)) + consumer_pending_total += consumer_pending + + # Track minimum idle time + min_consumer_idle_ms = min(min_consumer_idle_ms, consumer_idle_ms) + + # Consumer is active if idle < 5 minutes (300000ms) + if consumer_idle_ms < 300000: + has_active_consumer = True + + consumer_list.append({ + "name": consumer_name, + "pending": consumer_pending, + "idle_ms": consumer_idle_ms + }) + + # Get group-level pending count (may be 0 even if consumers have pending) + try: + pending = await redis_client.xpending(stream_name, group_name) + group_pending_count = int(pending[0]) if pending else 0 + except Exception: + group_pending_count = 0 + + # Use the maximum of group-level pending or sum of consumer pending + # (Sometimes group pending is 0 but consumers still have pending messages) + effective_pending = max(group_pending_count, consumer_pending_total) + + stream_data["consumer_groups"].append({ + "name": str(group_name), + "consumers": consumer_list, + "pending": int(effective_pending) + }) + + # Determine if stream is active or completed + # Active: has active consumers OR pending messages OR recent activity (< 5 min) + # Completed: no active consumers and idle > 5 minutes but < 1 hour + total_pending = sum(group["pending"] for group in stream_data["consumer_groups"]) + is_active = ( + has_active_consumer or + total_pending > 0 or + stream_age_seconds < 300 # Less than 5 minutes old + ) + + if is_active: + active_streams[stream_name] = stream_data + else: + # Mark as completed (will be cleaned up when > 1 hour old) + stream_data["idle_seconds"] = stream_age_seconds + completed_streams[stream_name] = stream_data + + except Exception as e: + # Stream doesn't exist or error getting info + logger.debug(f"Error processing stream {stream_name}: {e}") + continue + + # Get RQ queue stats - include all registries + rq_stats = { + "transcription_queue": { + "queued": transcription_queue.count, + "processing": len(transcription_queue.started_job_registry), + "completed": len(transcription_queue.finished_job_registry), + "failed": len(transcription_queue.failed_job_registry), + "cancelled": len(transcription_queue.canceled_job_registry), + "deferred": len(transcription_queue.deferred_job_registry) + }, + "memory_queue": { + "queued": memory_queue.count, + "processing": len(memory_queue.started_job_registry), + "completed": len(memory_queue.finished_job_registry), + "failed": len(memory_queue.failed_job_registry), + "cancelled": len(memory_queue.canceled_job_registry), + "deferred": len(memory_queue.deferred_job_registry) + }, + "default_queue": { + "queued": default_queue.count, + "processing": len(default_queue.started_job_registry), + "completed": len(default_queue.finished_job_registry), + "failed": len(default_queue.failed_job_registry), + "cancelled": len(default_queue.canceled_job_registry), + "deferred": len(default_queue.deferred_job_registry) + } + } + + return { + "active_sessions": active_sessions, + "completed_sessions": completed_sessions_from_redis, + "active_streams": active_streams, + "completed_streams": completed_streams, + "stream_health": active_streams, # Backward compatibility - use active_streams + "rq_queues": rq_stats, + "timestamp": time.time() + } + + except Exception as e: + logger.error(f"Error getting streaming status: {e}", exc_info=True) + return JSONResponse( + status_code=500, + content={"error": f"Failed to get streaming status: {str(e)}"} + ) + + +async def cleanup_old_sessions(request, max_age_seconds: int = 3600): + """Clean up old session tracking metadata and old audio streams from Redis.""" + import time + from fastapi.responses import JSONResponse + + try: + # Get Redis client from request.app.state (initialized during startup) + redis_client = request.app.state.redis_audio_stream + + if not redis_client: + return JSONResponse( + status_code=503, + content={"error": "Redis client for audio streaming not initialized"} + ) + + # Get all session keys + session_keys = await redis_client.keys("audio:session:*") + cleaned_sessions = 0 + old_sessions = [] + + current_time = time.time() + + for key in session_keys: + session_data = await redis_client.hgetall(key) + if not session_data: + continue + + session_id = key.decode().split(":")[-1] + started_at = float(session_data.get(b"started_at", b"0")) + status = session_data.get(b"status", b"").decode() + + age_seconds = current_time - started_at + + # Clean up sessions older than max_age or stuck in "finalizing" + should_clean = ( + age_seconds > max_age_seconds or + (status == "finalizing" and age_seconds > 300) # Finalizing for more than 5 minutes + ) + + if should_clean: + old_sessions.append({ + "session_id": session_id, + "age_seconds": age_seconds, + "status": status + }) + await redis_client.delete(key) + cleaned_sessions += 1 + + # Also clean up old audio streams (per-client streams that are inactive) + stream_keys = await redis_client.keys("audio:stream:*") + cleaned_streams = 0 + old_streams = [] + + for stream_key in stream_keys: + stream_name = stream_key.decode() if isinstance(stream_key, bytes) else stream_key + + try: + # Check stream info to get last activity + stream_info = await redis_client.execute_command('XINFO', 'STREAM', stream_name) + + # Parse stream info + info_dict = {} + for i in range(0, len(stream_info), 2): + key_name = stream_info[i].decode() if isinstance(stream_info[i], bytes) else str(stream_info[i]) + info_dict[key_name] = stream_info[i+1] + + stream_length = int(info_dict.get("length", 0)) + last_entry = info_dict.get("last-entry") + + # Check stream age via last entry ID (Redis Stream IDs are timestamps) + should_delete = False + age_seconds = 0 + + if stream_length == 0: + # Empty stream - safe to delete + should_delete = True + reason = "empty" + elif last_entry and isinstance(last_entry, list) and len(last_entry) > 0: + # Extract timestamp from last entry ID + last_id = last_entry[0] + if isinstance(last_id, bytes): + last_id = last_id.decode() + + # Redis Stream IDs format: "milliseconds-sequence" + try: + last_timestamp_ms = int(last_id.split('-')[0]) + last_timestamp_s = last_timestamp_ms / 1000 + age_seconds = current_time - last_timestamp_s + + # Delete streams older than max_age regardless of size + if age_seconds > max_age_seconds: + should_delete = True + reason = "old" + except (ValueError, IndexError): + # If we can't parse timestamp, check if first entry is old + first_entry = info_dict.get("first-entry") + if first_entry and isinstance(first_entry, list) and len(first_entry) > 0: + try: + first_id = first_entry[0] + if isinstance(first_id, bytes): + first_id = first_id.decode() + first_timestamp_ms = int(first_id.split('-')[0]) + first_timestamp_s = first_timestamp_ms / 1000 + age_seconds = current_time - first_timestamp_s + + if age_seconds > max_age_seconds: + should_delete = True + reason = "old_unparseable" + except (ValueError, IndexError): + pass + + if should_delete: + await redis_client.delete(stream_name) + cleaned_streams += 1 + old_streams.append({ + "stream_name": stream_name, + "reason": reason, + "age_seconds": age_seconds, + "length": stream_length + }) + + except Exception as e: + logger.debug(f"Error checking stream {stream_name}: {e}") + continue + + return { + "success": True, + "cleaned_sessions": cleaned_sessions, + "cleaned_streams": cleaned_streams, + "cleaned_session_details": old_sessions, + "cleaned_stream_details": old_streams, + "timestamp": time.time() + } + + except Exception as e: + logger.error(f"Error cleaning up old sessions: {e}", exc_info=True) + return JSONResponse( + status_code=500, content={"error": f"Failed to cleanup old sessions: {str(e)}"} + ) diff --git a/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py index 045a7007..3a4e5163 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py @@ -560,643 +560,3 @@ async def delete_all_user_memories(user: User): ) -async def get_streaming_status(request): - """Get status of active streaming sessions and Redis Streams health.""" - import time - from advanced_omi_backend.controllers.queue_controller import ( - transcription_queue, - memory_queue, - default_queue, - all_jobs_complete_for_session - ) - - try: - # Get Redis client from request.app.state (initialized during startup) - redis_client = request.app.state.redis_audio_stream - - if not redis_client: - return JSONResponse( - status_code=503, - content={"error": "Redis client for audio streaming not initialized"} - ) - - # Get all sessions (both active and completed) - session_keys = await redis_client.keys("audio:session:*") - active_sessions = [] - completed_sessions_from_redis = [] - - for key in session_keys: - session_data = await redis_client.hgetall(key) - if not session_data: - continue - - session_id = key.decode().split(":")[-1] - started_at = float(session_data.get(b"started_at", b"0")) - last_chunk_at = float(session_data.get(b"last_chunk_at", b"0")) - status = session_data.get(b"status", b"").decode() - - session_obj = { - "session_id": session_id, - "user_id": session_data.get(b"user_id", b"").decode(), - "client_id": session_data.get(b"client_id", b"").decode(), - "provider": session_data.get(b"provider", b"").decode(), - "mode": session_data.get(b"mode", b"").decode(), - "status": status, - "chunks_published": int(session_data.get(b"chunks_published", b"0")), - "started_at": started_at, - "last_chunk_at": last_chunk_at, - "age_seconds": time.time() - started_at, - "idle_seconds": time.time() - last_chunk_at - } - - # Separate active and completed sessions - # Check if all jobs are complete (including failed jobs) - all_jobs_done = all_jobs_complete_for_session(session_id) - - # Session is completed if: - # 1. Redis status says complete/finalized AND all jobs done, OR - # 2. All jobs are done (even if status isn't complete yet) - # This ensures sessions with failed jobs move to completed - if status in ["complete", "completed", "finalized"] or all_jobs_done: - if all_jobs_done: - # All jobs complete - this is truly a completed session - # Update Redis status if it wasn't already marked complete - if status not in ["complete", "completed", "finalized"]: - await redis_client.hset(key, "status", "complete") - logger.info(f"βœ… Marked session {session_id} as complete (all jobs terminal)") - - completed_sessions_from_redis.append({ - "session_id": session_id, - "client_id": session_data.get(b"client_id", b"").decode(), - "conversation_id": session_data.get(b"conversation_id", b"").decode() if b"conversation_id" in session_data else None, - "has_conversation": bool(session_data.get(b"conversation_id", b"")), - "action": session_data.get(b"action", b"complete").decode(), - "reason": session_data.get(b"reason", b"").decode() if b"reason" in session_data else "", - "completed_at": last_chunk_at, - "audio_file": session_data.get(b"audio_file", b"").decode() if b"audio_file" in session_data else "" - }) - else: - # Status says complete but jobs still processing - keep in active - active_sessions.append(session_obj) - else: - # This is an active session - active_sessions.append(session_obj) - - # Get stream health for all streams (per-client streams) - # Categorize as active or completed based on consumer activity - active_streams = {} - completed_streams = {} - - # Discover all audio streams - stream_keys = await redis_client.keys("audio:stream:*") - current_time = time.time() - - for stream_key in stream_keys: - stream_name = stream_key.decode() if isinstance(stream_key, bytes) else stream_key - try: - # Check if stream exists - stream_info = await redis_client.execute_command('XINFO', 'STREAM', stream_name) - - # Parse stream info (returns flat list of key-value pairs) - info_dict = {} - for i in range(0, len(stream_info), 2): - key = stream_info[i].decode() if isinstance(stream_info[i], bytes) else str(stream_info[i]) - value = stream_info[i+1] - - # Skip complex binary structures like first-entry and last-entry - # which contain message data that can't be JSON serialized - if key in ["first-entry", "last-entry"]: - # Just extract the message ID (first element) - if isinstance(value, list) and len(value) > 0: - msg_id = value[0] - if isinstance(msg_id, bytes): - msg_id = msg_id.decode() - value = msg_id - else: - value = None - elif isinstance(value, bytes): - try: - value = value.decode() - except UnicodeDecodeError: - # Binary data that can't be decoded, skip it - value = "" - - info_dict[key] = value - - # Calculate stream age from last entry - stream_age_seconds = 0 - last_entry_id = info_dict.get("last-entry") - if last_entry_id: - try: - # Redis Stream IDs format: "milliseconds-sequence" - last_timestamp_ms = int(last_entry_id.split('-')[0]) - last_timestamp_s = last_timestamp_ms / 1000 - stream_age_seconds = current_time - last_timestamp_s - except (ValueError, IndexError, AttributeError): - stream_age_seconds = 0 - - # Get consumer groups - groups = await redis_client.execute_command('XINFO', 'GROUPS', stream_name) - - stream_data = { - "stream_length": info_dict.get("length", 0), - "first_entry_id": info_dict.get("first-entry"), - "last_entry_id": last_entry_id, - "stream_age_seconds": stream_age_seconds, - "consumer_groups": [], - "total_pending": 0 - } - - # Track if stream has any active consumers - has_active_consumer = False - min_consumer_idle_ms = float('inf') - - # Parse consumer groups - for group in groups: - group_dict = {} - for i in range(0, len(group), 2): - key = group[i].decode() if isinstance(group[i], bytes) else str(group[i]) - value = group[i+1] - if isinstance(value, bytes): - try: - value = value.decode() - except UnicodeDecodeError: - value = "" - group_dict[key] = value - - group_name = group_dict.get("name", "unknown") - if isinstance(group_name, bytes): - group_name = group_name.decode() - - # Get consumers for this group - consumers = await redis_client.execute_command('XINFO', 'CONSUMERS', stream_name, group_name) - consumer_list = [] - consumer_pending_total = 0 - - for consumer in consumers: - consumer_dict = {} - for i in range(0, len(consumer), 2): - key = consumer[i].decode() if isinstance(consumer[i], bytes) else str(consumer[i]) - value = consumer[i+1] - if isinstance(value, bytes): - try: - value = value.decode() - except UnicodeDecodeError: - value = "" - consumer_dict[key] = value - - consumer_name = consumer_dict.get("name", "unknown") - if isinstance(consumer_name, bytes): - consumer_name = consumer_name.decode() - - consumer_pending = int(consumer_dict.get("pending", 0)) - consumer_idle_ms = int(consumer_dict.get("idle", 0)) - consumer_pending_total += consumer_pending - - # Track minimum idle time - min_consumer_idle_ms = min(min_consumer_idle_ms, consumer_idle_ms) - - # Consumer is active if idle < 5 minutes (300000ms) - if consumer_idle_ms < 300000: - has_active_consumer = True - - consumer_list.append({ - "name": consumer_name, - "pending": consumer_pending, - "idle_ms": consumer_idle_ms - }) - - # Get group-level pending count (may be 0 even if consumers have pending) - try: - pending = await redis_client.xpending(stream_name, group_name) - group_pending_count = int(pending[0]) if pending else 0 - except Exception: - group_pending_count = 0 - - # Use the maximum of group-level pending or sum of consumer pending - # (Sometimes group pending is 0 but consumers still have pending messages) - effective_pending = max(group_pending_count, consumer_pending_total) - - stream_data["consumer_groups"].append({ - "name": str(group_name), - "consumers": consumer_list, - "pending": int(effective_pending) - }) - - stream_data["total_pending"] += int(effective_pending) - - # Determine if stream is active or completed - # Active: has active consumers OR pending messages OR recent activity (< 5 min) - # Completed: no active consumers and idle > 5 minutes but < 1 hour - is_active = ( - has_active_consumer or - stream_data["total_pending"] > 0 or - stream_age_seconds < 300 # Less than 5 minutes old - ) - - if is_active: - active_streams[stream_name] = stream_data - else: - # Mark as completed (will be cleaned up when > 1 hour old) - stream_data["idle_seconds"] = stream_age_seconds - completed_streams[stream_name] = stream_data - - except Exception as e: - # Stream doesn't exist or error getting info - logger.debug(f"Error processing stream {stream_name}: {e}") - continue - - # Get RQ queue stats - include all registries - rq_stats = { - "transcription_queue": { - "queued": transcription_queue.count, - "processing": len(transcription_queue.started_job_registry), - "completed": len(transcription_queue.finished_job_registry), - "failed": len(transcription_queue.failed_job_registry), - "cancelled": len(transcription_queue.canceled_job_registry), - "deferred": len(transcription_queue.deferred_job_registry) - }, - "memory_queue": { - "queued": memory_queue.count, - "processing": len(memory_queue.started_job_registry), - "completed": len(memory_queue.finished_job_registry), - "failed": len(memory_queue.failed_job_registry), - "cancelled": len(memory_queue.canceled_job_registry), - "deferred": len(memory_queue.deferred_job_registry) - }, - "default_queue": { - "queued": default_queue.count, - "processing": len(default_queue.started_job_registry), - "completed": len(default_queue.finished_job_registry), - "failed": len(default_queue.failed_job_registry), - "cancelled": len(default_queue.canceled_job_registry), - "deferred": len(default_queue.deferred_job_registry) - } - } - - return { - "active_sessions": active_sessions, - "completed_sessions": completed_sessions_from_redis, - "active_streams": active_streams, - "completed_streams": completed_streams, - "stream_health": active_streams, # Backward compatibility - use active_streams - "rq_queues": rq_stats, - "timestamp": time.time() - } - - except Exception as e: - logger.error(f"Error getting streaming status: {e}", exc_info=True) - return JSONResponse( - status_code=500, - content={"error": f"Failed to get streaming status: {str(e)}"} - ) - - -async def cleanup_stuck_stream_workers(request): - """Clean up stuck Redis Stream consumers and pending messages from all active streams.""" - import time - - try: - # Get Redis client from request.app.state (initialized during startup) - redis_client = request.app.state.redis_audio_stream - - if not redis_client: - return JSONResponse( - status_code=503, - content={"error": "Redis client for audio streaming not initialized"} - ) - - cleanup_results = {} - total_cleaned = 0 - total_deleted_consumers = 0 - total_deleted_streams = 0 - current_time = time.time() - - # Discover all audio streams (per-client streams) - stream_keys = await redis_client.keys("audio:stream:*") - - for stream_key in stream_keys: - stream_name = stream_key.decode() if isinstance(stream_key, bytes) else stream_key - - try: - # First check stream age - delete old streams (>1 hour) immediately - stream_info = await redis_client.execute_command('XINFO', 'STREAM', stream_name) - - # Parse stream info - info_dict = {} - for i in range(0, len(stream_info), 2): - key_name = stream_info[i].decode() if isinstance(stream_info[i], bytes) else str(stream_info[i]) - info_dict[key_name] = stream_info[i+1] - - stream_length = int(info_dict.get("length", 0)) - last_entry = info_dict.get("last-entry") - - # Check if stream is old - should_delete_stream = False - stream_age = 0 - - if stream_length == 0: - should_delete_stream = True - stream_age = 0 - elif last_entry and isinstance(last_entry, list) and len(last_entry) > 0: - try: - last_id = last_entry[0] - if isinstance(last_id, bytes): - last_id = last_id.decode() - last_timestamp_ms = int(last_id.split('-')[0]) - last_timestamp_s = last_timestamp_ms / 1000 - stream_age = current_time - last_timestamp_s - - # Delete streams older than 1 hour (3600 seconds) - if stream_age > 3600: - should_delete_stream = True - except (ValueError, IndexError): - pass - - if should_delete_stream: - await redis_client.delete(stream_name) - total_deleted_streams += 1 - cleanup_results[stream_name] = { - "message": f"Deleted old stream (age: {stream_age:.0f}s, length: {stream_length})", - "cleaned": 0, - "deleted_consumers": 0, - "deleted_stream": True, - "stream_age": stream_age - } - continue - - # Get consumer groups - groups = await redis_client.execute_command('XINFO', 'GROUPS', stream_name) - - if not groups: - cleanup_results[stream_name] = {"message": "No consumer groups found", "cleaned": 0, "deleted_stream": False} - continue - - # Parse first group - group_dict = {} - group = groups[0] - for i in range(0, len(group), 2): - key = group[i].decode() if isinstance(group[i], bytes) else str(group[i]) - value = group[i+1] - if isinstance(value, bytes): - try: - value = value.decode() - except UnicodeDecodeError: - value = str(value) - group_dict[key] = value - - group_name = group_dict.get("name", "unknown") - if isinstance(group_name, bytes): - group_name = group_name.decode() - - pending_count = int(group_dict.get("pending", 0)) - - # Get consumers for this group to check per-consumer pending - consumers = await redis_client.execute_command('XINFO', 'CONSUMERS', stream_name, group_name) - - cleaned_count = 0 - total_consumer_pending = 0 - - # Clean up pending messages for each consumer AND delete dead consumers - deleted_consumers = 0 - for consumer in consumers: - consumer_dict = {} - for i in range(0, len(consumer), 2): - key = consumer[i].decode() if isinstance(consumer[i], bytes) else str(consumer[i]) - value = consumer[i+1] - if isinstance(value, bytes): - try: - value = value.decode() - except UnicodeDecodeError: - value = str(value) - consumer_dict[key] = value - - consumer_name = consumer_dict.get("name", "unknown") - if isinstance(consumer_name, bytes): - consumer_name = consumer_name.decode() - - consumer_pending = int(consumer_dict.get("pending", 0)) - consumer_idle_ms = int(consumer_dict.get("idle", 0)) - total_consumer_pending += consumer_pending - - # Check if consumer is dead (idle > 5 minutes = 300000ms) - is_dead = consumer_idle_ms > 300000 - - if consumer_pending > 0: - logger.info(f"Found {consumer_pending} pending messages for consumer {consumer_name} (idle: {consumer_idle_ms}ms)") - - # Get pending messages for this specific consumer - try: - pending_messages = await redis_client.execute_command( - 'XPENDING', stream_name, group_name, '-', '+', str(consumer_pending), consumer_name - ) - - # XPENDING returns flat list: [msg_id, consumer, idle_ms, delivery_count, msg_id, ...] - # Parse in groups of 4 - for i in range(0, len(pending_messages), 4): - if i < len(pending_messages): - msg_id = pending_messages[i] - if isinstance(msg_id, bytes): - msg_id = msg_id.decode() - - # Claim the message to a cleanup worker - try: - await redis_client.execute_command( - 'XCLAIM', stream_name, group_name, 'cleanup-worker', '0', msg_id - ) - - # Acknowledge it immediately - await redis_client.xack(stream_name, group_name, msg_id) - cleaned_count += 1 - except Exception as claim_error: - logger.warning(f"Failed to claim/ack message {msg_id}: {claim_error}") - - except Exception as consumer_error: - logger.error(f"Error processing consumer {consumer_name}: {consumer_error}") - - # Delete dead consumers (idle > 5 minutes with no pending messages) - if is_dead and consumer_pending == 0: - try: - await redis_client.execute_command( - 'XGROUP', 'DELCONSUMER', stream_name, group_name, consumer_name - ) - deleted_consumers += 1 - logger.info(f"🧹 Deleted dead consumer {consumer_name} (idle: {consumer_idle_ms}ms)") - except Exception as delete_error: - logger.warning(f"Failed to delete consumer {consumer_name}: {delete_error}") - - if total_consumer_pending == 0 and deleted_consumers == 0: - cleanup_results[stream_name] = {"message": "No pending messages or dead consumers", "cleaned": 0, "deleted_consumers": 0, "deleted_stream": False} - continue - - total_cleaned += cleaned_count - total_deleted_consumers += deleted_consumers - cleanup_results[stream_name] = { - "message": f"Cleaned {cleaned_count} pending messages, deleted {deleted_consumers} dead consumers", - "cleaned": cleaned_count, - "deleted_consumers": deleted_consumers, - "deleted_stream": False, - "original_pending": pending_count - } - - except Exception as e: - cleanup_results[stream_name] = { - "error": str(e), - "cleaned": 0 - } - - return { - "success": True, - "total_cleaned": total_cleaned, - "total_deleted_consumers": total_deleted_consumers, - "total_deleted_streams": total_deleted_streams, - "streams": cleanup_results, # New key for per-stream results - "providers": cleanup_results, # Keep for backward compatibility with frontend - "timestamp": time.time() - } - - except Exception as e: - logger.error(f"Error cleaning up stuck workers: {e}", exc_info=True) - return JSONResponse( - status_code=500, content={"error": f"Failed to cleanup stuck workers: {str(e)}"} - ) - - -async def cleanup_old_sessions(request, max_age_seconds: int = 3600): - """Clean up old session tracking metadata and old audio streams from Redis.""" - import time - - try: - # Get Redis client from request.app.state (initialized during startup) - redis_client = request.app.state.redis_audio_stream - - if not redis_client: - return JSONResponse( - status_code=503, - content={"error": "Redis client for audio streaming not initialized"} - ) - - # Get all session keys - session_keys = await redis_client.keys("audio:session:*") - cleaned_sessions = 0 - old_sessions = [] - - current_time = time.time() - - for key in session_keys: - session_data = await redis_client.hgetall(key) - if not session_data: - continue - - session_id = key.decode().split(":")[-1] - started_at = float(session_data.get(b"started_at", b"0")) - status = session_data.get(b"status", b"").decode() - - age_seconds = current_time - started_at - - # Clean up sessions older than max_age or stuck in "finalizing" - should_clean = ( - age_seconds > max_age_seconds or - (status == "finalizing" and age_seconds > 300) # Finalizing for more than 5 minutes - ) - - if should_clean: - old_sessions.append({ - "session_id": session_id, - "age_seconds": age_seconds, - "status": status - }) - await redis_client.delete(key) - cleaned_sessions += 1 - - # Also clean up old audio streams (per-client streams that are inactive) - stream_keys = await redis_client.keys("audio:stream:*") - cleaned_streams = 0 - old_streams = [] - - for stream_key in stream_keys: - stream_name = stream_key.decode() if isinstance(stream_key, bytes) else stream_key - - try: - # Check stream info to get last activity - stream_info = await redis_client.execute_command('XINFO', 'STREAM', stream_name) - - # Parse stream info - info_dict = {} - for i in range(0, len(stream_info), 2): - key_name = stream_info[i].decode() if isinstance(stream_info[i], bytes) else str(stream_info[i]) - info_dict[key_name] = stream_info[i+1] - - stream_length = int(info_dict.get("length", 0)) - last_entry = info_dict.get("last-entry") - - # Check stream age via last entry ID (Redis Stream IDs are timestamps) - should_delete = False - age_seconds = 0 - - if stream_length == 0: - # Empty stream - safe to delete - should_delete = True - reason = "empty" - elif last_entry and isinstance(last_entry, list) and len(last_entry) > 0: - # Extract timestamp from last entry ID - last_id = last_entry[0] - if isinstance(last_id, bytes): - last_id = last_id.decode() - - # Redis Stream IDs format: "milliseconds-sequence" - try: - last_timestamp_ms = int(last_id.split('-')[0]) - last_timestamp_s = last_timestamp_ms / 1000 - age_seconds = current_time - last_timestamp_s - - # Delete streams older than max_age regardless of size - if age_seconds > max_age_seconds: - should_delete = True - reason = "old" - except (ValueError, IndexError): - # If we can't parse timestamp, check if first entry is old - first_entry = info_dict.get("first-entry") - if first_entry and isinstance(first_entry, list) and len(first_entry) > 0: - try: - first_id = first_entry[0] - if isinstance(first_id, bytes): - first_id = first_id.decode() - first_timestamp_ms = int(first_id.split('-')[0]) - first_timestamp_s = first_timestamp_ms / 1000 - age_seconds = current_time - first_timestamp_s - - if age_seconds > max_age_seconds: - should_delete = True - reason = "old_unparseable" - except (ValueError, IndexError): - pass - - if should_delete: - await redis_client.delete(stream_name) - cleaned_streams += 1 - old_streams.append({ - "stream_name": stream_name, - "reason": reason, - "age_seconds": age_seconds, - "length": stream_length - }) - - except Exception as e: - logger.debug(f"Error checking stream {stream_name}: {e}") - continue - - return { - "success": True, - "cleaned_sessions": cleaned_sessions, - "cleaned_streams": cleaned_streams, - "cleaned_session_details": old_sessions, - "cleaned_stream_details": old_streams, - "timestamp": time.time() - } - - except Exception as e: - logger.error(f"Error cleaning up old sessions: {e}", exc_info=True) - return JSONResponse( - status_code=500, content={"error": f"Failed to cleanup old sessions: {str(e)}"} - ) diff --git a/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py index 919daa1b..98e96734 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py @@ -21,7 +21,7 @@ from advanced_omi_backend.auth import websocket_auth from advanced_omi_backend.client_manager import generate_client_id, get_client_manager from advanced_omi_backend.constants import OMI_CHANNELS, OMI_SAMPLE_RATE, OMI_SAMPLE_WIDTH -from advanced_omi_backend.audio_utils import process_audio_chunk +from advanced_omi_backend.utils.audio_utils import process_audio_chunk from advanced_omi_backend.services.audio_stream import AudioStreamProducer from advanced_omi_backend.services.audio_stream.producer import get_audio_stream_producer @@ -128,7 +128,84 @@ async def create_client_state(client_id: str, user, device_name: Optional[str] = async def cleanup_client_state(client_id: str): - """Clean up and remove client state.""" + """Clean up and remove client state, including cancelling speech detection job and marking session complete.""" + # Cancel the speech detection job for this client + from advanced_omi_backend.controllers.queue_controller import redis_conn + from rq.job import Job + import redis.asyncio as redis + + try: + job_id_key = f"speech_detection_job:{client_id}" + job_id_bytes = redis_conn.get(job_id_key) + + if job_id_bytes: + job_id = job_id_bytes.decode() + logger.info(f"πŸ›‘ Cancelling speech detection job {job_id} for client {client_id}") + + try: + # Fetch and cancel the job + job = Job.fetch(job_id, connection=redis_conn) + job.cancel() + logger.info(f"βœ… Successfully cancelled speech detection job {job_id}") + except Exception as job_error: + logger.warning(f"⚠️ Failed to cancel job {job_id}: {job_error}") + + # Clean up the tracking key + redis_conn.delete(job_id_key) + logger.info(f"🧹 Cleaned up job tracking key for client {client_id}") + else: + logger.debug(f"No speech detection job found for client {client_id}") + except Exception as e: + logger.warning(f"⚠️ Error during job cancellation for client {client_id}: {e}") + + # Mark all active sessions for this client as complete AND delete Redis streams + try: + # Get async Redis client + redis_url = os.getenv("REDIS_URL", "redis://localhost:6379/0") + async_redis = redis.from_url(redis_url, decode_responses=False) + + # Find all session keys for this client and mark them complete + pattern = f"audio:session:*" + cursor = 0 + sessions_closed = 0 + + while True: + cursor, keys = await async_redis.scan(cursor, match=pattern, count=100) + + for key in keys: + # Check if this session belongs to this client + client_id_bytes = await async_redis.hget(key, "client_id") + if client_id_bytes and client_id_bytes.decode() == client_id: + # Mark session as complete (WebSocket disconnected) + await async_redis.hset(key, mapping={ + "status": "complete", + "completed_at": str(time.time()), + "completion_reason": "websocket_disconnect" + }) + session_id = key.decode().replace("audio:session:", "") + logger.info(f"πŸ“Š Marked session {session_id[:12]} as complete (WebSocket disconnect)") + sessions_closed += 1 + + if cursor == 0: + break + + if sessions_closed > 0: + logger.info(f"βœ… Closed {sessions_closed} active session(s) for client {client_id}") + + # Delete Redis Streams for this client + stream_pattern = f"audio:stream:{client_id}" + stream_key = await async_redis.exists(stream_pattern) + if stream_key: + await async_redis.delete(stream_pattern) + logger.info(f"🧹 Deleted Redis stream: {stream_pattern}") + else: + logger.debug(f"No Redis stream found for client {client_id}") + + await async_redis.close() + + except Exception as session_error: + logger.warning(f"⚠️ Error marking sessions complete for client {client_id}: {session_error}") + # Use ClientManager for atomic client removal with cleanup client_manager = get_client_manager() removed = await client_manager.remove_client_with_cleanup(client_id) @@ -251,7 +328,6 @@ async def _initialize_streaming_session( job_ids = start_streaming_jobs( session_id=client_state.stream_session_id, user_id=user_id, - user_email=user_email, client_id=client_id ) @@ -620,7 +696,7 @@ async def _process_batch_audio_complete( return try: - from advanced_omi_backend.audio_utils import write_audio_file + from advanced_omi_backend.utils.audio_utils import write_audio_file from advanced_omi_backend.models.conversation import create_conversation # Combine all chunks @@ -648,12 +724,10 @@ async def _process_batch_audio_complete( f"βœ… Batch mode: Wrote audio file {wav_filename} ({duration:.1f}s)" ) - # Create conversation immediately for batch audio - conversation_id = str(uuid.uuid4()) + # Create conversation immediately for batch audio (conversation_id auto-generated) version_id = str(uuid.uuid4()) conversation = create_conversation( - conversation_id=conversation_id, audio_uuid=audio_uuid, user_id=user_id, client_id=client_id, @@ -661,18 +735,19 @@ async def _process_batch_audio_complete( summary="Processing batch audio..." ) await conversation.insert() + conversation_id = conversation.conversation_id # Get the auto-generated ID application_logger.info(f"πŸ“ Batch mode: Created conversation {conversation_id}") - # Enqueue complete batch processing job chain - from advanced_omi_backend.controllers.queue_controller import start_batch_processing_jobs + # Enqueue post-conversation processing job chain + from advanced_omi_backend.controllers.queue_controller import start_post_conversation_jobs - job_ids = start_batch_processing_jobs( + job_ids = start_post_conversation_jobs( conversation_id=conversation_id, audio_uuid=audio_uuid, - user_id=user_id, - user_email=user_email, - audio_file_path=file_path + audio_file_path=file_path, + user_id=None, # Will be read from conversation in DB by jobs + post_transcription=True # Run batch transcription for uploads ) application_logger.info( diff --git a/backends/advanced/src/advanced_omi_backend/conversation_manager.py b/backends/advanced/src/advanced_omi_backend/conversation_manager.py deleted file mode 100644 index a240dd99..00000000 --- a/backends/advanced/src/advanced_omi_backend/conversation_manager.py +++ /dev/null @@ -1,106 +0,0 @@ -"""Conversation Manager for handling conversation lifecycle and processing coordination. - -This module separates conversation management concerns from ClientState to follow -the Single Responsibility Principle. It handles conversation closure, memory processing -queuing, and audio cropping coordination. -""" - -import logging -from typing import Optional - -audio_logger = logging.getLogger("audio") - - -class ConversationManager: - """Manages conversation lifecycle and processing coordination. - - This class handles the responsibilities previously mixed into ClientState, - providing a clean separation of concerns for conversation management. - - V2 Architecture: Uses RQ jobs for all transcription and memory processing. - """ - - def __init__(self): - audio_logger.info("ConversationManager initialized") - - async def close_conversation( - self, - client_id: str, - audio_uuid: str, - user_id: str, - user_email: Optional[str], - conversation_start_time: float, - speech_segments: dict, - chunk_dir, # Can be Path or str - ) -> bool: - """Close a conversation and coordinate all necessary processing. - - Args: - client_id: Client identifier - audio_uuid: Unique audio conversation identifier - user_id: User identifier - user_email: User email - db_helper: Database helper instance - conversation_start_time: When conversation started - speech_segments: Speech segments for cropping - chunk_dir: Directory for audio chunks - - Returns: - True if conversation was closed successfully - """ - audio_logger.info(f"πŸ”’ Closing conversation {audio_uuid} for client {client_id}") - - try: - # V2 Architecture: All processing handled by RQ jobs - # Step 1: Enqueue final high-quality transcription via RQ - # This will add a new transcript version and trigger memory processing - from advanced_omi_backend.database import AudioChunksRepository - - repo = AudioChunksRepository() - audio_session = await repo.get_chunk(audio_uuid) - - if audio_session and audio_session.get("conversation_id"): - # Only enqueue if conversation was created (speech detected) - import uuid - from advanced_omi_backend.workers.transcription_jobs import transcribe_full_audio_job - from advanced_omi_backend.controllers.queue_controller import transcription_queue, JOB_RESULT_TTL - - conversation_id = audio_session["conversation_id"] - version_id = str(uuid.uuid4()) # Generate new version ID for final transcription - audio_logger.info(f"πŸ“€ Enqueuing final transcription job for conversation {conversation_id}") - - job = transcription_queue.enqueue( - transcribe_full_audio_job, - conversation_id, - audio_uuid, - audio_session["audio_file_path"], - version_id, - user_id, - job_timeout=300, - result_ttl=JOB_RESULT_TTL, - job_id=f"transcript-reprocess_{conversation_id[:12]}", - description=f"Final transcription for conversation {conversation_id[:12]} (conversation close)" - ) - audio_logger.info(f"βœ… Enqueued final transcription job {job.id} for conversation {conversation_id}") - else: - audio_logger.info(f"⏭️ No conversation created for {audio_uuid} (no speech detected), skipping final transcription") - - audio_logger.info(f"βœ… Successfully closed conversation {audio_uuid}") - return True - - except Exception as e: - audio_logger.error(f"❌ Error closing conversation {audio_uuid}: {e}", exc_info=True) - return False - - - -# Global singleton instance -_conversation_manager: Optional[ConversationManager] = None - - -def get_conversation_manager() -> ConversationManager: - """Get the global ConversationManager instance.""" - global _conversation_manager - if _conversation_manager is None: - _conversation_manager = ConversationManager() - return _conversation_manager diff --git a/backends/advanced/src/advanced_omi_backend/database.py b/backends/advanced/src/advanced_omi_backend/database.py index 1b85bf21..36d17ebb 100644 --- a/backends/advanced/src/advanced_omi_backend/database.py +++ b/backends/advanced/src/advanced_omi_backend/database.py @@ -66,7 +66,6 @@ async def create_chunk( user_id=None, user_email=None, transcript=None, - speakers_identified=None, memories=None, transcription_status="PENDING", memory_processing_status="PENDING", @@ -84,8 +83,7 @@ async def create_chunk( "provider": None, "created_at": datetime.now(UTC).isoformat(), "processing_run_id": None, - "raw_data": {}, - "speakers_identified": speakers_identified or [] + "raw_data": {} }) active_transcript_version = version_id @@ -123,7 +121,6 @@ async def create_chunk( # Compatibility fields (computed from active versions) "transcript": transcript or [], - "speakers_identified": speakers_identified or [], "memories": memories or [], "transcription_status": transcription_status, "memory_processing_status": memory_processing_status, @@ -152,8 +149,7 @@ async def add_transcript_segment(self, audio_uuid, transcript_segment): "provider": None, "created_at": datetime.now(UTC).isoformat(), "processing_run_id": None, - "raw_data": {}, - "speakers_identified": [] + "raw_data": {} } result = await self.col.update_one( @@ -183,12 +179,6 @@ async def add_transcript_segment(self, audio_uuid, transcript_segment): return result.modified_count > 0 - async def add_speaker(self, audio_uuid, speaker_id): - """Add a speaker to the speakers_identified list if not already present.""" - await self.col.update_one( - {"audio_uuid": audio_uuid}, - {"$addToSet": {"speakers_identified": speaker_id}}, - ) async def store_raw_transcript_data(self, audio_uuid, raw_data, provider): """Store raw transcript data from transcription provider.""" @@ -414,8 +404,7 @@ async def update_transcription_status( "provider": provider, "created_at": datetime.now(UTC).isoformat(), "processing_run_id": None, - "raw_data": {}, - "speakers_identified": [] + "raw_data": {} } if error_message: version_data["error_message"] = error_message diff --git a/backends/advanced/src/advanced_omi_backend/llm_client.py b/backends/advanced/src/advanced_omi_backend/llm_client.py index f3b0bb18..21ee3331 100644 --- a/backends/advanced/src/advanced_omi_backend/llm_client.py +++ b/backends/advanced/src/advanced_omi_backend/llm_client.py @@ -45,13 +45,11 @@ class OpenAILLMClient(LLMClient): def __init__( self, - provider: str, api_key: str | None = None, base_url: str | None = None, model: str | None = None, temperature: float = 0.1, ): - self.provider = provider super().__init__(model, temperature) self.api_key = api_key or os.getenv("OPENAI_API_KEY") self.base_url = base_url or os.getenv("OPENAI_BASE_URL") @@ -59,12 +57,25 @@ def __init__( if not self.api_key or not self.base_url or not self.model: raise ValueError("OPENAI_API_KEY, OPENAI_BASE_URL, and OPENAI_MODEL must be set") - # Initialize OpenAI client + # Initialize OpenAI client with optional Langfuse tracing try: - import langfuse.openai as openai + # Check if Langfuse is configured + langfuse_enabled = ( + os.getenv("LANGFUSE_PUBLIC_KEY") + and os.getenv("LANGFUSE_SECRET_KEY") + and os.getenv("LANGFUSE_HOST") + ) - self.client = openai.OpenAI(api_key=self.api_key, base_url=self.base_url) - self.logger.info(f"OpenAI client initialized with base_url: {self.base_url}") + if langfuse_enabled: + # Use Langfuse-wrapped OpenAI for tracing + import langfuse.openai as openai + self.client = openai.OpenAI(api_key=self.api_key, base_url=self.base_url) + self.logger.info(f"OpenAI client initialized with Langfuse tracing, base_url: {self.base_url}") + else: + # Use regular OpenAI client without tracing + from openai import OpenAI + self.client = OpenAI(api_key=self.api_key, base_url=self.base_url) + self.logger.info(f"OpenAI client initialized (no tracing), base_url: {self.base_url}") except ImportError: self.logger.error("OpenAI library not installed. Install with: pip install openai") raise @@ -96,79 +107,25 @@ def generate( self.logger.error(f"Error generating completion: {e}") raise - async def health_check(self) -> Dict: + def health_check(self) -> Dict: """Check OpenAI-compatible service health.""" try: - if not (self.model and self.base_url): + # For OpenAI API, check if we have valid configuration + # Avoid calling /models endpoint as it can be unreliable + if self.api_key and self.api_key != "dummy" and self.model: return { - "status": "⚠️ Configuration incomplete (missing model or base_url)", + "status": "βœ… Connected", "base_url": self.base_url, "default_model": self.model, "api_key_configured": bool(self.api_key and self.api_key != "dummy"), } - - if self.provider == "ollama": - import aiohttp - ollama_health_url = self.base_url.replace("/v1", "") if self.base_url.endswith("/v1") else self.base_url - - # Initialize response with main LLM status - response_data = { - "status": "❌ Unknown", + else: + return { + "status": "⚠️ Configuration incomplete", "base_url": self.base_url, "default_model": self.model, - "api_key_configured": False, - "embedder_model": os.getenv("OLLAMA_EMBEDDER_MODEL"), - "embedder_status": "❌ Not Checked" + "api_key_configured": bool(self.api_key and self.api_key != "dummy"), } - - try: - async with aiohttp.ClientSession() as session: - # Check main Ollama server health - async with session.get(f"{ollama_health_url}/api/version", timeout=aiohttp.ClientTimeout(total=5)) as response: - if response.status == 200: - response_data["status"] = "βœ… Connected" - else: - response_data["status"] = f"⚠️ Ollama Unhealthy: HTTP {response.status}" - - # Check embedder model availability - embedder_model_name = os.getenv("OLLAMA_EMBEDDER_MODEL") - if embedder_model_name: - try: - # Use /api/show to check if model exists - async with session.post(f"{ollama_health_url}/api/show", json={"name": embedder_model_name}, timeout=aiohttp.ClientTimeout(total=5)) as embedder_response: - if embedder_response.status == 200: - response_data["embedder_status"] = "βœ… Available" - else: - response_data["embedder_status"] = "⚠️ Embedder Model Unhealthy" - except aiohttp.ClientError: - response_data["embedder_status"] = "❌ Embedder Model Connection Failed" - except asyncio.TimeoutError: - response_data["embedder_status"] = "❌ Embedder Model Timeout" - else: - response_data["embedder_status"] = "⚠️ Embedder Model Not Configured" - - except aiohttp.ClientError: - response_data["status"] = "❌ Ollama Connection Failed" - except asyncio.TimeoutError: - response_data["status"] = "❌ Ollama Connection Timeout (5s)" - - return response_data - else: - # For other OpenAI-compatible APIs, check configuration - if self.api_key and self.api_key != "dummy": - return { - "status": "βœ… Connected", - "base_url": self.base_url, - "default_model": self.model, - "api_key_configured": bool(self.api_key and self.api_key != "dummy"), - } - else: - return { - "status": "⚠️ Configuration incomplete (missing API key)", - "base_url": self.base_url, - "default_model": self.model, - "api_key_configured": bool(self.api_key and self.api_key != "dummy"), - } except Exception as e: self.logger.error(f"Health check failed: {e}") return { @@ -191,20 +148,12 @@ def create_client() -> LLMClient: """Create an LLM client based on LLM_PROVIDER environment variable.""" provider = os.getenv("LLM_PROVIDER", "openai").lower() - if provider == "openai": + if provider in ["openai", "ollama"]: return OpenAILLMClient( - provider="openai", api_key=os.getenv("OPENAI_API_KEY"), base_url=os.getenv("OPENAI_BASE_URL"), model=os.getenv("OPENAI_MODEL"), ) - elif provider == "ollama": - return OpenAILLMClient( - provider="ollama", - api_key="dummy", # Ollama doesn't require an API key - base_url=os.getenv("OLLAMA_BASE_URL"), - model=os.getenv("OLLAMA_MODEL"), - ) else: raise ValueError(f"Unsupported LLM provider: {provider}") @@ -245,4 +194,5 @@ async def async_generate( async def async_health_check() -> Dict: """Async wrapper for LLM health check.""" client = get_llm_client() - return await client.health_check() + loop = asyncio.get_running_loop() + return await loop.run_in_executor(None, client.health_check) diff --git a/backends/advanced/src/advanced_omi_backend/memory/config.py b/backends/advanced/src/advanced_omi_backend/memory/config.py index 35d478b5..99e79d38 100644 --- a/backends/advanced/src/advanced_omi_backend/memory/config.py +++ b/backends/advanced/src/advanced_omi_backend/memory/config.py @@ -1,14 +1,23 @@ """Memory service configuration utilities.""" -import os import logging -from typing import Dict, Any +import os from dataclasses import dataclass from enum import Enum +from typing import Any, Dict memory_logger = logging.getLogger("memory_service") +def _is_langfuse_enabled() -> bool: + """Check if Langfuse is properly configured.""" + return bool( + os.getenv("LANGFUSE_PUBLIC_KEY") + and os.getenv("LANGFUSE_SECRET_KEY") + and os.getenv("LANGFUSE_HOST") + ) + + class LLMProvider(Enum): """Supported LLM providers.""" OPENAI = "openai" @@ -249,11 +258,19 @@ def get_embedding_dims(llm_config: Dict[str, Any]) -> int: """ embedding_model = llm_config.get('embedding_model') try: - import langfuse.openai as openai - client = openai.OpenAI( - api_key=llm_config.get('api_key'), - base_url=llm_config.get('base_url') - ) + # Conditionally use Langfuse if configured + if _is_langfuse_enabled(): + from langfuse.openai import OpenAI + client = OpenAI( + api_key=llm_config.get('api_key'), + base_url=llm_config.get('base_url') + ) + else: + from openai import OpenAI + client = OpenAI( + api_key=llm_config.get('api_key'), + base_url=llm_config.get('base_url') + ) response = client.embeddings.create( model=embedding_model, input="hello world" diff --git a/backends/advanced/src/advanced_omi_backend/memory/providers/llm_providers.py b/backends/advanced/src/advanced_omi_backend/memory/providers/llm_providers.py index b1a5bb0c..a876e643 100644 --- a/backends/advanced/src/advanced_omi_backend/memory/providers/llm_providers.py +++ b/backends/advanced/src/advanced_omi_backend/memory/providers/llm_providers.py @@ -32,6 +32,42 @@ memory_logger = logging.getLogger("memory_service") + +def _is_langfuse_enabled() -> bool: + """Check if Langfuse is properly configured.""" + return bool( + os.getenv("LANGFUSE_PUBLIC_KEY") + and os.getenv("LANGFUSE_SECRET_KEY") + and os.getenv("LANGFUSE_HOST") + ) + + +def _get_openai_client(api_key: str, base_url: str, is_async: bool = False): + """Get OpenAI client with optional Langfuse tracing. + + Args: + api_key: OpenAI API key + base_url: OpenAI API base URL + is_async: Whether to return async or sync client + + Returns: + OpenAI client instance (with or without Langfuse tracing) + """ + if _is_langfuse_enabled(): + # Use Langfuse-wrapped OpenAI for tracing + import langfuse.openai as openai + memory_logger.debug("Using OpenAI client with Langfuse tracing") + else: + # Use regular OpenAI client without tracing + from openai import OpenAI, AsyncOpenAI + openai = type('OpenAI', (), {'OpenAI': OpenAI, 'AsyncOpenAI': AsyncOpenAI})() + memory_logger.debug("Using OpenAI client without tracing") + + if is_async: + return openai.AsyncOpenAI(api_key=api_key, base_url=base_url) + else: + return openai.OpenAI(api_key=api_key, base_url=base_url) + # TODO: Re-enable spacy when Docker build is fixed # try: # nlp = spacy.load("en_core_web_sm") @@ -121,20 +157,19 @@ def __init__(self, config: Dict[str, Any]): async def extract_memories(self, text: str, prompt: str) -> List[str]: """Extract memories using OpenAI API with the enhanced fact retrieval prompt. - + Args: text: Input text to extract memories from prompt: System prompt to guide extraction (uses default if empty) - + Returns: List of extracted memory strings """ try: - import langfuse.openai as openai - - client = openai.AsyncOpenAI( + client = _get_openai_client( api_key=self.api_key, - base_url=self.base_url + base_url=self.base_url, + is_async=True ) # Use the provided prompt or fall back to default @@ -206,19 +241,18 @@ async def _process_chunk(self, client, system_prompt: str, chunk: str, index: in async def generate_embeddings(self, texts: List[str]) -> List[List[float]]: """Generate embeddings using OpenAI API. - + Args: texts: List of texts to generate embeddings for - + Returns: List of embedding vectors, one per input text """ try: - import langfuse.openai as openai - - client = openai.AsyncOpenAI( + client = _get_openai_client( api_key=self.api_key, - base_url=self.base_url + base_url=self.base_url, + is_async=True ) response = await client.embeddings.create( @@ -234,7 +268,7 @@ async def generate_embeddings(self, texts: List[str]) -> List[List[float]]: async def test_connection(self) -> bool: """Test OpenAI connection. - + Returns: True if connection successful, False otherwise """ @@ -248,11 +282,10 @@ async def test_connection(self) -> bool: response.raise_for_status() return True - import langfuse.openai as openai - - client = openai.AsyncOpenAI( + client = _get_openai_client( api_key=self.api_key, - base_url=self.base_url + base_url=self.base_url, + is_async=True ) await client.models.list() @@ -269,30 +302,29 @@ async def propose_memory_actions( custom_prompt: Optional[str] = None, ) -> Dict[str, Any]: """Use OpenAI chat completion with enhanced prompt to propose memory actions. - + Args: retrieved_old_memory: List of existing memories for context new_facts: List of new facts to process custom_prompt: Optional custom prompt to override default - + Returns: Dictionary containing proposed memory actions """ try: - import langfuse.openai as openai - # Generate the complete prompt using the helper function memory_logger.debug(f"🧠 Facts passed to prompt builder: {new_facts}") update_memory_messages = build_update_memory_messages( - retrieved_old_memory, - new_facts, + retrieved_old_memory, + new_facts, custom_prompt ) memory_logger.debug(f"🧠 Generated prompt user content: {update_memory_messages[1]['content'][:200]}...") - client = openai.AsyncOpenAI( + client = _get_openai_client( api_key=self.api_key, base_url=self.base_url, + is_async=True ) response = await client.chat.completions.create( diff --git a/backends/advanced/src/advanced_omi_backend/memory/providers/mcp_client.py b/backends/advanced/src/advanced_omi_backend/memory/providers/mcp_client.py index fe29266f..7942a17a 100644 --- a/backends/advanced/src/advanced_omi_backend/memory/providers/mcp_client.py +++ b/backends/advanced/src/advanced_omi_backend/memory/providers/mcp_client.py @@ -77,7 +77,37 @@ async def add_memories(self, text: str) -> List[str]: MCPError: If the server request fails """ try: - # Use REST API endpoint for creating memories + # Get app_id first to handle duplicate app names + apps_response = await self.client.get(f"{self.server_url}/api/v1/apps/") + apps_response.raise_for_status() + apps_data = apps_response.json() + + memory_logger.debug(f"Apps API response: {apps_data}") + memory_logger.debug(f"Apps data type: {type(apps_data)}") + if isinstance(apps_data, dict): + memory_logger.debug(f"Apps dict keys: {apps_data.keys()}") + if "apps" in apps_data: + memory_logger.debug(f"Number of apps: {len(apps_data['apps'])}") + memory_logger.debug(f"Apps list: {apps_data['apps']}") + + app_id = None + if apps_data.get("apps"): + # Find matching app by name, prefer one with most memories + matching = [a for a in apps_data["apps"] if a["name"] == self.client_name] + memory_logger.debug(f"Matching apps for '{self.client_name}': {matching}") + if matching: + matching.sort(key=lambda x: x.get("total_memories_created", 0), reverse=True) + app_id = matching[0]["id"] + memory_logger.info(f"Found matching app with ID: {app_id}") + else: + app_id = apps_data["apps"][0]["id"] + memory_logger.info(f"No matching app name, using first app ID: {app_id}") + + if not app_id: + memory_logger.error("No apps found in OpenMemory - cannot create memory") + raise MCPError("No apps found in OpenMemory") + + # Use REST API endpoint for creating memories (trailing slash required) response = await self.client.post( f"{self.server_url}/api/v1/memories/", json={ @@ -87,8 +117,8 @@ async def add_memories(self, text: str) -> List[str]: "source": "friend_lite", "client": self.client_name }, - "infer": True, # Let OpenMemory extract memories - "app": self.client_name # Use client name as app name + "infer": True, + "app_id": app_id # Use app_id to avoid duplicate name issues } ) response.raise_for_status() diff --git a/backends/advanced/src/advanced_omi_backend/models/conversation.py b/backends/advanced/src/advanced_omi_backend/models/conversation.py index 864c68e2..cba23c41 100644 --- a/backends/advanced/src/advanced_omi_backend/models/conversation.py +++ b/backends/advanced/src/advanced_omi_backend/models/conversation.py @@ -9,6 +9,7 @@ from typing import Dict, List, Optional, Any, Union from pydantic import BaseModel, Field, model_validator from enum import Enum +import uuid from beanie import Document, Indexed @@ -30,6 +31,12 @@ class MemoryProvider(str, Enum): FRIEND_LITE = "friend_lite" OPENMEMORY_MCP = "openmemory_mcp" + class ConversationStatus(str, Enum): + """Conversation processing status.""" + ACTIVE = "active" # Has running jobs or open websocket + COMPLETED = "completed" # All jobs succeeded + FAILED = "failed" # One or more jobs failed + # Nested Models class SpeakerSegment(BaseModel): """Individual speaker segment in a transcript.""" @@ -62,11 +69,15 @@ class MemoryVersion(BaseModel): metadata: Dict[str, Any] = Field(default_factory=dict, description="Additional provider-specific metadata") # Core identifiers - conversation_id: Indexed(str, unique=True) = Field(description="Unique conversation identifier") + conversation_id: Indexed(str, unique=True) = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique conversation identifier") audio_uuid: Indexed(str) = Field(description="Link to audio_chunks collection") user_id: Indexed(str) = Field(description="User who owns this conversation") client_id: Indexed(str) = Field(description="Client device identifier") + # Audio file reference + audio_path: Optional[str] = Field(None, description="Path to audio file (relative to CHUNK_DIR)") + cropped_audio_path: Optional[str] = Field(None, description="Path to cropped audio file (relative to CHUNK_DIR)") + # Creation metadata created_at: Indexed(datetime) = Field(default_factory=datetime.utcnow, description="When the conversation was created") @@ -280,10 +291,10 @@ class Settings: # Factory function for creating conversations def create_conversation( - conversation_id: str, audio_uuid: str, user_id: str, client_id: str, + conversation_id: Optional[str] = None, title: Optional[str] = None, summary: Optional[str] = None, transcript: Optional[str] = None, @@ -293,10 +304,10 @@ def create_conversation( Factory function to create a new conversation. Args: - conversation_id: Unique conversation identifier audio_uuid: Link to audio_chunks collection user_id: User who owns this conversation client_id: Client device identifier + conversation_id: Optional unique conversation identifier (auto-generated if not provided) title: Optional conversation title summary: Optional conversation summary transcript: Optional transcript text @@ -305,20 +316,26 @@ def create_conversation( Returns: Conversation instance """ - return Conversation( - conversation_id=conversation_id, - audio_uuid=audio_uuid, - user_id=user_id, - client_id=client_id, - created_at=datetime.now(), - title=title, - summary=summary, - transcript=transcript or "", - segments=segments or [], - transcript_versions=[], - active_transcript_version=None, - memory_versions=[], - active_memory_version=None, - memories=[], - memory_count=0 - ) \ No newline at end of file + # Build the conversation data + conv_data = { + "audio_uuid": audio_uuid, + "user_id": user_id, + "client_id": client_id, + "created_at": datetime.now(), + "title": title, + "summary": summary, + "transcript": transcript or "", + "segments": segments or [], + "transcript_versions": [], + "active_transcript_version": None, + "memory_versions": [], + "active_memory_version": None, + "memories": [], + "memory_count": 0 + } + + # Only set conversation_id if provided, otherwise let the model auto-generate it + if conversation_id is not None: + conv_data["conversation_id"] = conversation_id + + return Conversation(**conv_data) \ No newline at end of file diff --git a/backends/advanced/src/advanced_omi_backend/models/job.py b/backends/advanced/src/advanced_omi_backend/models/job.py index 545b8a12..9d355ce5 100644 --- a/backends/advanced/src/advanced_omi_backend/models/job.py +++ b/backends/advanced/src/advanced_omi_backend/models/job.py @@ -20,6 +20,49 @@ logger = logging.getLogger(__name__) +# Global flag to track if Beanie is initialized in this process +_beanie_initialized = False +_beanie_init_lock = asyncio.Lock() + +async def _ensure_beanie_initialized(): + """Ensure Beanie is initialized in the current process (for RQ workers).""" + global _beanie_initialized + async with _beanie_init_lock: + if _beanie_initialized: + return + try: + import os + from motor.motor_asyncio import AsyncIOMotorClient + from beanie import init_beanie + from advanced_omi_backend.models.conversation import Conversation + from advanced_omi_backend.models.audio_file import AudioFile + from advanced_omi_backend.models.user import User + from pymongo.errors import ConfigurationError + + # Get MongoDB URI from environment + mongodb_uri = os.getenv("MONGODB_URI", "mongodb://localhost:27017") + + # Create MongoDB client + client = AsyncIOMotorClient(mongodb_uri) + try: + database = client.get_default_database("friend-lite") + except ConfigurationError: + database = client["friend-lite"] + raise + _beanie_initialized = True + # Initialize Beanie + await init_beanie( + database=database, + document_models=[User, Conversation, AudioFile], + ) + + _beanie_initialized = True + logger.info("βœ… Beanie initialized in RQ worker process") + + except Exception as e: + logger.error(f"❌ Failed to initialize Beanie in RQ worker: {e}") + raise + class JobPriority(str, Enum): """Priority levels for RQ job processing. @@ -78,7 +121,6 @@ async def _setup(self): """Setup common dependencies before job execution.""" # Initialize Beanie for MongoDB access if self.initialize_beanie: - from advanced_omi_backend.controllers.queue_controller import _ensure_beanie_initialized await _ensure_beanie_initialized() logger.debug("Beanie initialized") @@ -205,7 +247,6 @@ async def process(): # Initialize Beanie for MongoDB access if beanie: - from advanced_omi_backend.controllers.queue_controller import _ensure_beanie_initialized await _ensure_beanie_initialized() logger.debug("Beanie initialized") diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/conversation_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/conversation_routes.py index be387ff8..ac426ee8 100644 --- a/backends/advanced/src/advanced_omi_backend/routers/modules/conversation_routes.py +++ b/backends/advanced/src/advanced_omi_backend/routers/modules/conversation_routes.py @@ -14,7 +14,7 @@ ClientManager, get_client_manager_dependency, ) -from advanced_omi_backend.controllers import conversation_controller +from advanced_omi_backend.controllers import conversation_controller, audio_controller from advanced_omi_backend.users import User logger = logging.getLogger(__name__) @@ -54,7 +54,7 @@ async def get_cropped_audio_info( audio_uuid: str, current_user: User = Depends(current_active_user) ): """Get cropped audio information for a conversation. Users can only access their own conversations.""" - return await conversation_controller.get_cropped_audio_info(audio_uuid, current_user) + return await audio_controller.get_cropped_audio_info(audio_uuid, current_user) # Deprecated @@ -63,32 +63,7 @@ async def reprocess_audio_cropping( audio_uuid: str, current_user: User = Depends(current_active_user) ): """Reprocess audio cropping for a conversation. Users can only reprocess their own conversations.""" - return await conversation_controller.reprocess_audio_cropping(audio_uuid, current_user) - - -@router.post("/{audio_uuid}/speakers") -async def add_speaker_to_conversation( - audio_uuid: str, speaker_id: str, current_user: User = Depends(current_active_user) -): - """Add a speaker to the speakers_identified list for a conversation. Users can only modify their own conversations.""" - return await conversation_controller.add_speaker_to_conversation( - audio_uuid, speaker_id, current_user - ) - - -@router.put("/{audio_uuid}/transcript/{segment_index}") -async def update_transcript_segment( - audio_uuid: str, - segment_index: int, - current_user: User = Depends(current_active_user), - speaker_id: Optional[str] = None, - start_time: Optional[float] = None, - end_time: Optional[float] = None, -): - """Update a specific transcript segment with speaker or timing information. Users can only modify their own conversations.""" - return await conversation_controller.update_transcript_segment( - audio_uuid, segment_index, current_user, speaker_id, start_time, end_time - ) + return await audio_controller.reprocess_audio_cropping(audio_uuid, current_user) # New reprocessing endpoints diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/health_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/health_routes.py index 49160c13..4981ca39 100644 --- a/backends/advanced/src/advanced_omi_backend/routers/modules/health_routes.py +++ b/backends/advanced/src/advanced_omi_backend/routers/modules/health_routes.py @@ -190,28 +190,12 @@ async def health_check(): # Check LLM service (non-critical service - may not be running) try: llm_health = await asyncio.wait_for(async_health_check(), timeout=8.0) - - # Determine overall health for audioai service based on LLM and embedder status - is_llm_healthy = "βœ…" in llm_health.get("status", "") - - # Determine embedder health based on provider - llm_provider = os.getenv("LLM_PROVIDER", "openai").lower() - if llm_provider == "ollama": - is_embedder_healthy = "βœ…" in llm_health.get("embedder_status", "") or llm_health.get("embedder_status") == "⚠️ Embedder Model Not Configured" - else: - # For OpenAI and other providers, embedder status is not applicable, so consider it healthy - is_embedder_healthy = True - - audioai_overall_healthy = is_llm_healthy and is_embedder_healthy - health_status["services"]["audioai"] = { "status": llm_health.get("status", "❌ Unknown"), - "healthy": audioai_overall_healthy, + "healthy": "βœ…" in llm_health.get("status", ""), "base_url": llm_health.get("base_url", ""), "model": llm_health.get("default_model", ""), "provider": os.getenv("LLM_PROVIDER", "openai"), - "embedder_model": llm_health.get("embedder_model", ""), - "embedder_status": llm_health.get("embedder_status", ""), "critical": False, } except asyncio.TimeoutError: @@ -220,8 +204,6 @@ async def health_check(): "healthy": False, "provider": os.getenv("LLM_PROVIDER", "openai"), "critical": False, - "embedder_model": os.getenv("OLLAMA_EMBEDDER_MODEL"), - "embedder_status": "❌ Not Checked (Timeout)" } overall_healthy = False except Exception as e: @@ -230,8 +212,6 @@ async def health_check(): "healthy": False, "provider": os.getenv("LLM_PROVIDER", "openai"), "critical": False, - "embedder_model": os.getenv("OLLAMA_EMBEDDER_MODEL"), - "embedder_status": "❌ Not Checked (Connection Failed)" } overall_healthy = False diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py index 89679dba..3e540b19 100644 --- a/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py +++ b/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py @@ -4,12 +4,12 @@ """ import logging -from fastapi import APIRouter, Depends, Query, HTTPException +from fastapi import APIRouter, Depends, Query, HTTPException, Request from pydantic import BaseModel from typing import List, Optional from advanced_omi_backend.auth import current_active_user -from advanced_omi_backend.controllers.queue_controller import get_jobs, get_job_stats, get_queue_health, redis_conn +from advanced_omi_backend.controllers.queue_controller import get_jobs, get_job_stats, get_queue_health, redis_conn, QUEUE_NAMES from advanced_omi_backend.users import User from rq.job import Job import redis.asyncio as aioredis @@ -86,6 +86,7 @@ async def get_job( "func_name": job.func_name if hasattr(job, 'func_name') else "", "args": job.args, "kwargs": job.kwargs, + "meta": job.meta if job.meta else {}, "result": job.result, "error_message": str(job.exc_info) if job.exc_info else None, } @@ -95,6 +96,46 @@ async def get_job( raise HTTPException(status_code=404, detail="Job not found") +@router.delete("/jobs/{job_id}") +async def cancel_job( + job_id: str, + current_user: User = Depends(current_active_user) +): + """Cancel or delete a job.""" + try: + job = Job.fetch(job_id, connection=redis_conn) + + # Check user permission (non-admins can only cancel their own jobs) + if not current_user.is_superuser: + job_user_id = job.kwargs.get("user_id") if job.kwargs else None + if job_user_id != str(current_user.user_id): + raise HTTPException(status_code=403, detail="Access forbidden") + + # Cancel if queued or processing, delete if completed/failed + if job.is_queued or job.is_started or job.is_deferred or job.is_scheduled: + # Cancel the job + job.cancel() + logger.info(f"Cancelled job {job_id}") + return { + "job_id": job_id, + "action": "cancelled", + "message": f"Job {job_id} has been cancelled" + } + else: + # Delete completed/failed jobs + job.delete() + logger.info(f"Deleted job {job_id}") + return { + "job_id": job_id, + "action": "deleted", + "message": f"Job {job_id} has been deleted" + } + + except HTTPException as e: + logger.error(f"Failed to cancel/delete job {job_id}: {e}") + raise HTTPException(status_code=404, detail=f"Job not found or could not be cancelled: {str(e)}") + + @router.get("/jobs/by-session/{session_id}") async def get_jobs_by_session( session_id: str, @@ -113,7 +154,7 @@ async def get_jobs_by_session( all_jobs = [] processed_job_ids = set() # Track which jobs we've already processed - queues = ["default", "transcription", "memory"] + queues = QUEUE_NAMES def get_job_status(job, registries_map): """Determine job status from registries.""" @@ -159,6 +200,9 @@ def process_job_and_dependents(job, queue_name, base_status): "ended_at": job.ended_at.isoformat() if job.ended_at else None, "description": job.description or "", "result": job.result, + "meta": job.meta if job.meta else {}, + "args": job.args, + "kwargs": job.kwargs if job.kwargs else {}, "error_message": str(job.exc_info) if job.exc_info else None, }) @@ -402,15 +446,15 @@ async def flush_jobs( raise HTTPException(status_code=403, detail="Admin access required") try: - from datetime import datetime, timedelta + from datetime import datetime, timedelta, timezone from rq.registry import FinishedJobRegistry, FailedJobRegistry, CanceledJobRegistry from advanced_omi_backend.controllers.queue_controller import get_queue - cutoff_time = datetime.utcnow() - timedelta(hours=request.older_than_hours) + cutoff_time = datetime.now(timezone.utc) - timedelta(hours=request.older_than_hours) total_removed = 0 # Get all queues - queues = ["default", "transcription", "memory"] + queues = QUEUE_NAMES for queue_name in queues: queue = get_queue(queue_name) @@ -484,32 +528,57 @@ async def flush_all_jobs( from advanced_omi_backend.controllers.queue_controller import get_queue total_removed = 0 - queues = ["default", "transcription", "memory"] + queues = QUEUE_NAMES for queue_name in queues: queue = get_queue(queue_name) + # First, empty the queue itself (removes queued jobs) + queued_count = len(queue) + queue.empty() + total_removed += queued_count + logger.info(f"Emptied {queued_count} queued jobs from {queue_name}") + # Remove from all registries registries = [ - FinishedJobRegistry(queue=queue), - FailedJobRegistry(queue=queue), - CanceledJobRegistry(queue=queue), - StartedJobRegistry(queue=queue), - DeferredJobRegistry(queue=queue), - ScheduledJobRegistry(queue=queue) + ("finished", FinishedJobRegistry(queue=queue)), + ("failed", FailedJobRegistry(queue=queue)), + ("canceled", CanceledJobRegistry(queue=queue)), + ("started", StartedJobRegistry(queue=queue)), + ("deferred", DeferredJobRegistry(queue=queue)), + ("scheduled", ScheduledJobRegistry(queue=queue)) ] - for registry in registries: - for job_id in registry.get_job_ids(): + for registry_name, registry in registries: + job_ids = list(registry.get_job_ids()) # Convert to list to avoid iterator issues + logger.info(f"Flushing {len(job_ids)} jobs from {queue_name}/{registry_name}") + + for job_id in job_ids: try: + # Try to fetch and delete the job job = Job.fetch(job_id, connection=redis_conn) + + # Cancel if running, then delete + if job.is_started: + try: + job.cancel() + logger.info(f"Cancelled running job {job_id}") + except Exception as cancel_error: + logger.warning(f"Could not cancel job {job_id}: {cancel_error}") + job.delete() total_removed += 1 + except Exception as e: - logger.error(f"Error deleting job {job_id}: {e}") + # Job might already be deleted or not exist - try to remove from registry anyway + logger.warning(f"Error deleting job {job_id}: {e}") + try: + registry.remove(job_id) + logger.info(f"Removed stale job reference {job_id} from {registry_name} registry") + except Exception as reg_error: + logger.error(f"Could not remove {job_id} from registry: {reg_error}") - # Also empty the queue itself - queue.empty() + logger.info(f"Flushed {total_removed} jobs from all queues") return { "total_removed": total_removed, @@ -549,6 +618,12 @@ async def get_redis_sessions( session_data = await redis_client.hgetall(key) if session_data: session_id = key.decode().replace("audio:session:", "") + + # Get conversation count for this session + conversation_count_key = f"session:conversation_count:{session_id}" + conversation_count_bytes = await redis_client.get(conversation_count_key) + conversation_count = int(conversation_count_bytes.decode()) if conversation_count_bytes else 0 + sessions.append({ "session_id": session_id, "user_id": session_data.get(b"user_id", b"").decode(), @@ -559,7 +634,8 @@ async def get_redis_sessions( "status": session_data.get(b"status", b"").decode(), "started_at": session_data.get(b"started_at", b"").decode(), "chunks_published": int(session_data.get(b"chunks_published", b"0").decode() or 0), - "last_chunk_at": session_data.get(b"last_chunk_at", b"").decode() + "last_chunk_at": session_data.get(b"last_chunk_at", b"").decode(), + "conversation_count": conversation_count }) except Exception as e: logger.error(f"Error getting session info for {key}: {e}") @@ -626,4 +702,269 @@ async def clear_old_sessions( except Exception as e: logger.error(f"Failed to clear sessions: {e}", exc_info=True) - raise HTTPException(status_code=500, detail=f"Failed to clear sessions: {str(e)}") \ No newline at end of file + raise HTTPException(status_code=500, detail=f"Failed to clear sessions: {str(e)}") + + +@router.get("/dashboard") +async def get_dashboard_data( + request: Request, + expanded_sessions: str = Query(default="", description="Comma-separated list of session IDs to fetch jobs for"), + current_user: User = Depends(current_active_user) +): + """Get all data needed for the Queue dashboard in a single API call. + + Returns: + - Jobs grouped by status (queued, processing, completed, failed) + - Queue statistics + - Streaming status + - Session jobs for expanded sessions + """ + try: + from advanced_omi_backend.controllers import system_controller + from rq.registry import FinishedJobRegistry, FailedJobRegistry, StartedJobRegistry + from advanced_omi_backend.controllers.queue_controller import get_queue + + # Parse expanded sessions list + expanded_session_ids = [s.strip() for s in expanded_sessions.split(",") if s.strip()] if expanded_sessions else [] + + # Fetch all data in parallel + import asyncio + + async def fetch_jobs_by_status(status_name: str, limit: int = 100): + """Fetch jobs by status using existing registry logic.""" + try: + queues = QUEUE_NAMES + all_jobs = [] + + for queue_name in queues: + queue = get_queue(queue_name) + + # Get job IDs based on status + if status_name == "queued": + job_ids = queue.job_ids[:limit] + elif status_name == "processing": + job_ids = list(StartedJobRegistry(queue=queue).get_job_ids())[:limit] + elif status_name == "completed": + job_ids = list(FinishedJobRegistry(queue=queue).get_job_ids())[:limit] + elif status_name == "failed": + job_ids = list(FailedJobRegistry(queue=queue).get_job_ids())[:limit] + else: + continue + + # Fetch job details + for job_id in job_ids: + try: + job = Job.fetch(job_id, connection=redis_conn) + + # Check user permission + if not current_user.is_superuser: + job_user_id = job.kwargs.get("user_id") if job.kwargs else None + if job_user_id != str(current_user.user_id): + continue + + # Add job with metadata + all_jobs.append({ + "job_id": job.id, + "job_type": job.func_name.split('.')[-1] if job.func_name else "unknown", + "user_id": job.kwargs.get("user_id") if job.kwargs else None, + "status": status_name, + "priority": "normal", # RQ doesn't have priority concept + "data": {"description": job.description or ""}, + "result": job.result, + "meta": job.meta if job.meta else {}, + "kwargs": job.kwargs if job.kwargs else {}, + "error_message": str(job.exc_info) if job.exc_info else None, + "created_at": job.created_at.isoformat() if job.created_at else None, + "started_at": job.started_at.isoformat() if job.started_at else None, + "ended_at": job.ended_at.isoformat() if job.ended_at else None, + "retry_count": 0, # RQ doesn't track this by default + "max_retries": 0, + "progress_percent": 0, + "progress_message": "", + "queue": queue_name + }) + except Exception as e: + logger.debug(f"Error fetching job {job_id}: {e}") + continue + + return all_jobs + except Exception as e: + logger.error(f"Error fetching {status_name} jobs: {e}") + return [] + + async def fetch_stats(): + """Fetch queue stats.""" + try: + return get_job_stats() + except Exception as e: + logger.error(f"Error fetching stats: {e}") + return {"total_jobs": 0, "queued_jobs": 0, "processing_jobs": 0, "completed_jobs": 0, "failed_jobs": 0} + + async def fetch_streaming_status(): + """Fetch streaming status.""" + try: + # Import session_controller for streaming status + from advanced_omi_backend.controllers import session_controller + # Use the actual request object from the parent function + return await session_controller.get_streaming_status(request) + except Exception as e: + logger.error(f"Error fetching streaming status: {e}") + return {"active_sessions": [], "stream_health": {}, "rq_queues": {}} + + async def fetch_session_jobs(session_id: str): + """Fetch jobs for a specific session.""" + try: + # Reuse the existing logic from get_jobs_by_session endpoint + from advanced_omi_backend.models.conversation import Conversation + + # Get conversation IDs for this session + conversations = await Conversation.find(Conversation.audio_uuid == session_id).to_list() + conversation_ids = {conv.conversation_id for conv in conversations} + + all_jobs = [] + processed_job_ids = set() + queues = QUEUE_NAMES + + def get_job_status(job): + if job.is_queued: + return "queued" + elif job.is_started: + return "processing" + elif job.is_finished: + return "completed" + elif job.is_failed: + return "failed" + elif job.is_deferred: + return "deferred" + else: + return "unknown" + + # Find all jobs for this session + for queue_name in queues: + queue = get_queue(queue_name) + + # Check all registries + from rq.registry import ( + FinishedJobRegistry, FailedJobRegistry, StartedJobRegistry, + CanceledJobRegistry, DeferredJobRegistry, ScheduledJobRegistry + ) + + registries = [ + ("queued", queue.job_ids), + ("processing", StartedJobRegistry(queue=queue).get_job_ids()), + ("completed", FinishedJobRegistry(queue=queue).get_job_ids()), + ("failed", FailedJobRegistry(queue=queue).get_job_ids()) + ] + + for status_name, job_ids in registries: + for job_id in job_ids: + if job_id in processed_job_ids: + continue + + try: + job = Job.fetch(job_id, connection=redis_conn) + + # Check if job belongs to this session + matches_session = False + if job.meta and 'audio_uuid' in job.meta and job.meta['audio_uuid'] == session_id: + matches_session = True + elif job.args and len(job.args) > 0 and job.args[0] == session_id: + matches_session = True + + if not matches_session: + continue + + # Check user permission + if not current_user.is_superuser: + job_user_id = job.kwargs.get("user_id") if job.kwargs else None + if job_user_id != str(current_user.user_id): + continue + + processed_job_ids.add(job_id) + all_jobs.append({ + "job_id": job.id, + "job_type": job.func_name.split('.')[-1] if job.func_name else "unknown", + "queue": queue_name, + "status": get_job_status(job), + "created_at": job.created_at.isoformat() if job.created_at else None, + "started_at": job.started_at.isoformat() if job.started_at else None, + "ended_at": job.ended_at.isoformat() if job.ended_at else None, + "description": job.description or "", + "result": job.result, + "meta": job.meta if job.meta else {}, + "error_message": str(job.exc_info) if job.exc_info else None + }) + except Exception as e: + logger.debug(f"Error fetching job {job_id}: {e}") + continue + + return {"session_id": session_id, "jobs": all_jobs} + except Exception as e: + logger.error(f"Error fetching jobs for session {session_id}: {e}") + return {"session_id": session_id, "jobs": []} + + # Execute all fetches in parallel + queued_jobs_task = fetch_jobs_by_status("queued", limit=100) + processing_jobs_task = fetch_jobs_by_status("processing", limit=100) + completed_jobs_task = fetch_jobs_by_status("completed", limit=50) + failed_jobs_task = fetch_jobs_by_status("failed", limit=50) + stats_task = fetch_stats() + streaming_status_task = fetch_streaming_status() + session_jobs_tasks = [fetch_session_jobs(sid) for sid in expanded_session_ids] + + results = await asyncio.gather( + queued_jobs_task, + processing_jobs_task, + completed_jobs_task, + failed_jobs_task, + stats_task, + streaming_status_task, + *session_jobs_tasks, + return_exceptions=True + ) + + queued_jobs = results[0] if not isinstance(results[0], Exception) else [] + processing_jobs = results[1] if not isinstance(results[1], Exception) else [] + completed_jobs = results[2] if not isinstance(results[2], Exception) else [] + failed_jobs = results[3] if not isinstance(results[3], Exception) else [] + stats = results[4] if not isinstance(results[4], Exception) else {"total_jobs": 0} + streaming_status = results[5] if not isinstance(results[5], Exception) else {"active_sessions": []} + recent_conversations = [] + session_jobs_results = results[6:] if len(results) > 6 else [] + + # Convert session jobs list to dict + session_jobs = {} + for result in session_jobs_results: + if not isinstance(result, Exception) and result: + session_jobs[result["session_id"]] = result["jobs"] + + # Convert conversations to dict format for frontend + conversations_list = [] + for conv in recent_conversations: + conversations_list.append({ + "conversation_id": conv.conversation_id, + "audio_uuid": conv.audio_uuid, + "user_id": str(conv.user_id) if conv.user_id else None, + "created_at": conv.created_at.isoformat() if conv.created_at else None, + "title": conv.title, + "summary": conv.summary, + "transcript_text": conv.get_active_transcript_text() if hasattr(conv, 'get_active_transcript_text') else None, + }) + + return { + "jobs": { + "queued": queued_jobs, + "processing": processing_jobs, + "completed": completed_jobs, + "failed": failed_jobs + }, + "stats": stats, + "streaming_status": streaming_status, + "recent_conversations": conversations_list, + "session_jobs": session_jobs, + "timestamp": asyncio.get_event_loop().time() + } + + except Exception as e: + logger.error(f"Failed to get dashboard data: {e}", exc_info=True) + raise HTTPException(status_code=500, detail=f"Failed to get dashboard data: {str(e)}") \ No newline at end of file diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py index b3d886e5..c03a7802 100644 --- a/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py +++ b/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py @@ -10,7 +10,7 @@ from fastapi import APIRouter, Depends, Request from advanced_omi_backend.auth import current_active_user, current_superuser -from advanced_omi_backend.controllers import system_controller +from advanced_omi_backend.controllers import system_controller, session_controller, queue_controller from advanced_omi_backend.models.user import User logger = logging.getLogger(__name__) @@ -133,16 +133,16 @@ async def delete_all_user_memories(current_user: User = Depends(current_active_u @router.get("/streaming/status") async def get_streaming_status(request: Request, current_user: User = Depends(current_superuser)): """Get status of active streaming sessions and Redis Streams health. Admin only.""" - return await system_controller.get_streaming_status(request) + return await session_controller.get_streaming_status(request) @router.post("/streaming/cleanup") async def cleanup_stuck_stream_workers(request: Request, current_user: User = Depends(current_superuser)): """Clean up stuck Redis Stream workers and pending messages. Admin only.""" - return await system_controller.cleanup_stuck_stream_workers(request) + return await queue_controller.cleanup_stuck_stream_workers(request) @router.post("/streaming/cleanup-sessions") async def cleanup_old_sessions(request: Request, max_age_seconds: int = 3600, current_user: User = Depends(current_superuser)): """Clean up old session tracking metadata. Admin only.""" - return await system_controller.cleanup_old_sessions(request, max_age_seconds) + return await session_controller.cleanup_old_sessions(request, max_age_seconds) diff --git a/backends/advanced/src/advanced_omi_backend/services/audio_service.py b/backends/advanced/src/advanced_omi_backend/services/audio_service.py index 094f5526..992ede75 100644 --- a/backends/advanced/src/advanced_omi_backend/services/audio_service.py +++ b/backends/advanced/src/advanced_omi_backend/services/audio_service.py @@ -5,15 +5,10 @@ using Redis Streams for event-driven architecture. """ -import asyncio -import json import logging import os import time -import uuid -from dataclasses import dataclass -from pathlib import Path -from typing import Any, Dict, Optional, Tuple +from typing import Any, Dict, Optional import redis.asyncio as aioredis from wyoming.audio import AudioChunk @@ -22,20 +17,6 @@ audio_logger = logging.getLogger("audio_processing") -@dataclass -class AudioStreamMessage: - """Message format for audio stream.""" - client_id: str - user_id: str - user_email: str - audio_data: bytes - audio_rate: int - audio_width: int - audio_channels: int - audio_uuid: Optional[str] = None - timestamp: Optional[int] = None - - class AudioStreamService: """ Audio service using Redis Streams for event-driven processing. diff --git a/backends/advanced/src/advanced_omi_backend/services/audio_stream/aggregator.py b/backends/advanced/src/advanced_omi_backend/services/audio_stream/aggregator.py index 9b82aabf..26b985ab 100644 --- a/backends/advanced/src/advanced_omi_backend/services/audio_stream/aggregator.py +++ b/backends/advanced/src/advanced_omi_backend/services/audio_stream/aggregator.py @@ -68,7 +68,7 @@ async def get_session_results(self, session_id: str) -> list[dict]: # Log detailed result info chunk_ids = [r["chunk_id"] for r in results] total_text_length = sum(len(r["text"]) for r in results) - logger.info( + logger.debug( f"πŸ”„ Retrieved {len(results)} results for session {session_id}: " f"chunks={chunk_ids}, total_text={total_text_length} chars" ) @@ -143,7 +143,7 @@ async def get_combined_results(self, session_id: str) -> dict: "provider": provider } - logger.info( + logger.debug( f"πŸ“¦ Combined {len(results)} chunks for session {session_id}: " f"{len(full_text)} chars, {len(all_words)} words, {len(all_segments)} segments" ) diff --git a/backends/advanced/src/advanced_omi_backend/services/audio_stream/consumer.py b/backends/advanced/src/advanced_omi_backend/services/audio_stream/consumer.py index ea770253..c36ee188 100644 --- a/backends/advanced/src/advanced_omi_backend/services/audio_stream/consumer.py +++ b/backends/advanced/src/advanced_omi_backend/services/audio_stream/consumer.py @@ -577,7 +577,7 @@ async def store_result( approximate=True ) - logger.info( + logger.debug( f"➑️ Stored result {chunk_id} in {session_results_stream}: " f"text_len={len(text)}, msg_id={message_id.decode()}" ) diff --git a/backends/advanced/src/advanced_omi_backend/services/audio_stream/producer.py b/backends/advanced/src/advanced_omi_backend/services/audio_stream/producer.py index 98e93cfc..95bf25e1 100644 --- a/backends/advanced/src/advanced_omi_backend/services/audio_stream/producer.py +++ b/backends/advanced/src/advanced_omi_backend/services/audio_stream/producer.py @@ -248,7 +248,7 @@ async def add_audio_chunk( # Log every 10th chunk to avoid spam if session_buffer["chunk_count"] % 10 == 0 or session_buffer["chunk_count"] <= 5: - logger.info( + logger.debug( f"πŸ“€ Added fixed-size chunk {chunk_id_formatted} to {stream_name} " f"({len(chunk_audio)} bytes = {len(chunk_audio)/bytes_per_second:.3f}s, " f"buffer remaining: {len(session_buffer['buffer'])} bytes)" diff --git a/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram.py b/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram.py index 89b80de1..e9261955 100644 --- a/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram.py +++ b/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram.py @@ -53,7 +53,7 @@ async def transcribe(self, audio_data: bytes, sample_rate: int, diarize: bool = headers = {"Authorization": f"Token {self.api_key}", "Content-Type": "audio/raw"} - logger.info(f"Sending {len(audio_data)} bytes to Deepgram API") + logger.debug(f"Sending {len(audio_data)} bytes to Deepgram API") # Calculate dynamic timeout based on audio file size estimated_duration = len(audio_data) / (sample_rate * 2 * 1) # 16-bit mono @@ -128,7 +128,7 @@ async def transcribe(self, audio_data: bytes, sample_rate: int, diarize: bool = segments.append(current_segment) else: transcript = alternative.get("transcript", "").strip() - logger.info( + logger.debug( f"Deepgram basic transcription successful: {len(transcript)} characters" ) @@ -184,11 +184,13 @@ async def transcribe(self, audio_data: bytes, sample_rate: int, diarize: bool = logger.warning("Deepgram returned empty transcript") return {"text": "", "words": [], "segments": []} else: - logger.warning("Deepgram response missing expected transcript structure") - return {"text": "", "words": [], "segments": []} + error_msg = "Deepgram response missing expected transcript structure" + logger.error(error_msg) + raise RuntimeError(error_msg) else: - logger.error(f"Deepgram API error: {response.status_code} - {response.text}") - return {"text": "", "words": [], "segments": []} + error_msg = f"Deepgram API error: {response.status_code} - {response.text}" + logger.error(error_msg) + raise RuntimeError(error_msg) except httpx.TimeoutException as e: timeout_type = "unknown" @@ -200,13 +202,16 @@ async def transcribe(self, audio_data: bytes, sample_rate: int, diarize: bool = timeout_type = "write (upload)" elif "pool" in str(e).lower(): timeout_type = "connection pool" - logger.error( - f"HTTP {timeout_type} timeout during Deepgram API call for {len(audio_data)} bytes: {e}" - ) - return {"text": "", "words": [], "segments": []} + error_msg = f"HTTP {timeout_type} timeout during Deepgram API call for {len(audio_data)} bytes: {e}" + logger.error(error_msg) + raise RuntimeError(error_msg) from e + except RuntimeError: + # Re-raise RuntimeError from above (API errors, timeouts) + raise except Exception as e: - logger.error(f"Error calling Deepgram API: {e}") - return {"text": "", "words": [], "segments": []} + error_msg = f"Unexpected error calling Deepgram API: {e}" + logger.error(error_msg) + raise RuntimeError(error_msg) from e class DeepgramStreamingProvider(StreamingTranscriptionProvider): @@ -264,7 +269,7 @@ async def start_stream(self, client_id: str, sample_rate: int = 16000, diarize: "stream_id": str(uuid.uuid4()) } - logger.info(f"Deepgram WebSocket connected for client {client_id}") + logger.debug(f"Deepgram WebSocket connected for client {client_id}") except Exception as e: logger.error(f"Failed to start Deepgram streaming for {client_id}: {e}") diff --git a/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py b/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py index b66b6f08..50b12645 100644 --- a/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py +++ b/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py @@ -5,6 +5,7 @@ to enhance transcripts with actual speaker names instead of generic labels. """ +import asyncio import json import logging import os @@ -12,7 +13,7 @@ from typing import Dict, List, Optional import aiohttp -from aiohttp import ClientConnectorError, ClientTimeout +from aiohttp import ClientConnectorError logger = logging.getLogger(__name__) @@ -144,7 +145,7 @@ async def diarize_identify_match( except ClientConnectorError as e: logger.error(f"🎀 Failed to connect to speaker recognition service: {e}") return {} - except ClientTimeout as e: + except asyncio.TimeoutError as e: logger.error(f"🎀 Timeout connecting to speaker recognition service: {e}") return {} except aiohttp.ClientError as e: @@ -475,7 +476,7 @@ async def check_if_enrolled_speaker_present( import uuid from pathlib import Path from advanced_omi_backend.utils.audio_extraction import extract_audio_for_results - from advanced_omi_backend.audio_utils import write_pcm_to_wav + from advanced_omi_backend.utils.audio_utils import write_pcm_to_wav logger.info(f"🎀 [SPEAKER CHECK] Starting speaker check for session {session_id}") logger.info(f"🎀 [SPEAKER CHECK] Client: {client_id}, User: {user_id}") diff --git a/backends/advanced/src/advanced_omi_backend/audio_utils.py b/backends/advanced/src/advanced_omi_backend/utils/audio_utils.py similarity index 87% rename from backends/advanced/src/advanced_omi_backend/audio_utils.py rename to backends/advanced/src/advanced_omi_backend/utils/audio_utils.py index 302d068e..2a4aeaf9 100644 --- a/backends/advanced/src/advanced_omi_backend/audio_utils.py +++ b/backends/advanced/src/advanced_omi_backend/utils/audio_utils.py @@ -267,68 +267,50 @@ async def _process_audio_cropping_with_relative_timestamps( chunk_repo: Optional['AudioChunksRepository'] = None, ) -> bool: """ - Process audio cropping with automatic relative timestamp conversion. - This function handles both live processing and reprocessing scenarios. + Process audio cropping with speech segments already in relative format. + + The segments are expected to be in relative format (seconds from audio start), + as provided by Deepgram transcription. No timestamp conversion is needed. """ try: - # Convert absolute timestamps to relative timestamps - # Extract file start time from filename: timestamp_client_uuid.wav - filename = original_path.split("/")[-1] - logger.info(f"πŸ• Parsing filename: {filename}") - filename_parts = filename.split("_") - if len(filename_parts) < 3: - logger.error( - f"Invalid filename format: {filename}. Expected format: timestamp_client_id_audio_uuid.wav" - ) - return False - - try: - file_start_timestamp = float(filename_parts[0]) - except ValueError as e: - logger.error(f"Cannot parse timestamp from filename {filename}: {e}") - return False - - # Convert speech segments to relative timestamps - relative_segments = [] - for start_abs, end_abs in speech_segments: + # Validate input segments + validated_segments = [] + for start_rel, end_rel in speech_segments: # Validate input timestamps - if start_abs >= end_abs: + if start_rel >= end_rel: logger.warning( - f"⚠️ Invalid speech segment: start={start_abs} >= end={end_abs}, skipping" + f"⚠️ Invalid speech segment: start={start_rel} >= end={end_rel}, skipping" ) continue - start_rel = start_abs - file_start_timestamp - end_rel = end_abs - file_start_timestamp - - # Ensure relative timestamps are positive (sanity check) + # Ensure timestamps are positive (sanity check) if start_rel < 0: logger.warning( - f"⚠️ Negative start timestamp: {start_rel} (absolute: {start_abs}, file_start: {file_start_timestamp}), clamping to 0.0" + f"⚠️ Negative start timestamp: {start_rel}, clamping to 0.0" ) start_rel = 0.0 if end_rel < 0: logger.warning( - f"⚠️ Negative end timestamp: {end_rel} (absolute: {end_abs}, file_start: {file_start_timestamp}), skipping segment" + f"⚠️ Negative end timestamp: {end_rel}, skipping segment" ) continue - relative_segments.append((start_rel, end_rel)) + validated_segments.append((start_rel, end_rel)) - logger.info(f"πŸ• Converting timestamps for {audio_uuid}: file_start={file_start_timestamp}") - logger.info(f"πŸ• Absolute segments: {speech_segments}") - logger.info(f"πŸ• Relative segments: {relative_segments}") + logger.info(f"πŸ• Processing cropping for {audio_uuid}") + logger.info(f"πŸ• Input segments (relative timestamps): {speech_segments}") + logger.info(f"πŸ• Validated segments: {validated_segments}") - # Validate that we have valid relative segments after conversion - if not relative_segments: + # Validate that we have valid segments + if not validated_segments: logger.warning( - f"No valid relative segments after timestamp conversion for {audio_uuid}" + f"No valid segments for cropping {audio_uuid}" ) return False - success = await _crop_audio_with_ffmpeg(original_path, relative_segments, output_path) + success = await _crop_audio_with_ffmpeg(original_path, validated_segments, output_path) if success: - # Update database with cropped file info (keep original absolute timestamps for reference) + # Update database with cropped file info cropped_filename = output_path.split("/")[-1] if chunk_repo is not None: await chunk_repo.update_cropped_audio(audio_uuid, cropped_filename, speech_segments) diff --git a/backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py b/backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py index c6cfa06e..ef83f3ba 100644 --- a/backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py +++ b/backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py @@ -13,6 +13,39 @@ logger = logging.getLogger(__name__) +def is_meaningful_speech(combined_results: dict) -> bool: + """ + Convenience wrapper to check if combined transcription results contain meaningful speech. + + This is a shared helper used by both speech detection and conversation timeout logic. + + Args: + combined_results: Combined results from TranscriptionResultsAggregator with: + - "text": str - Full transcript text + - "words": list - Word-level data with confidence and timing + - "segments": list - Speaker segments + - "chunk_count": int - Number of chunks processed + + Returns: + bool: True if meaningful speech detected, False otherwise + + Example: + >>> combined = await aggregator.get_combined_results(session_id) + >>> if is_meaningful_speech(combined): + >>> print("Meaningful speech detected!") + """ + if not combined_results.get("text"): + return False + + transcript_data = { + "text": combined_results["text"], + "words": combined_results.get("words", []) + } + + speech_analysis = analyze_speech(transcript_data) + return speech_analysis["has_speech"] + + def analyze_speech(transcript_data: dict) -> dict: """ Analyze transcript for meaningful speech to determine if conversation should be created. diff --git a/backends/advanced/src/advanced_omi_backend/workers/__init__.py b/backends/advanced/src/advanced_omi_backend/workers/__init__.py index 5b9b1044..fb32797d 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/__init__.py +++ b/backends/advanced/src/advanced_omi_backend/workers/__init__.py @@ -3,6 +3,7 @@ This package provides modular RQ job functions organized by domain: - transcription_jobs: Speech-to-text processing +- speaker_jobs: Speaker recognition and identification - conversation_jobs: Conversation management and updates - memory_jobs: Memory extraction and processing - audio_jobs: Audio file processing and cropping @@ -13,10 +14,15 @@ # Import from transcription_jobs from .transcription_jobs import ( transcribe_full_audio_job, - recognise_speakers_job, stream_speech_detection_job, ) +# Import from speaker_jobs +from .speaker_jobs import ( + check_enrolled_speakers_job, + recognise_speakers_job, +) + # Import from conversation_jobs from .conversation_jobs import ( open_conversation_job, @@ -30,10 +36,8 @@ # Import from audio_jobs from .audio_jobs import ( - process_audio_job, process_cropping_job, audio_streaming_persistence_job, - enqueue_audio_processing, enqueue_cropping, ) @@ -49,18 +53,23 @@ redis_conn, REDIS_URL, JOB_RESULT_TTL, - _ensure_beanie_initialized, TRANSCRIPTION_QUEUE, MEMORY_QUEUE, DEFAULT_QUEUE, ) +# Import from job models +from advanced_omi_backend.models.job import _ensure_beanie_initialized + __all__ = [ # Transcription jobs "transcribe_full_audio_job", - "recognise_speakers_job", "stream_speech_detection_job", + # Speaker jobs + "check_enrolled_speakers_job", + "recognise_speakers_job", + # Conversation jobs "open_conversation_job", "audio_streaming_persistence_job", @@ -70,9 +79,7 @@ "enqueue_memory_processing", # Audio jobs - "process_audio_job", "process_cropping_job", - "enqueue_audio_processing", "enqueue_cropping", # Queue utils diff --git a/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py index 0cd84a63..1c7b227a 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py @@ -14,264 +14,124 @@ from advanced_omi_backend.controllers.queue_controller import ( default_queue, - _ensure_beanie_initialized, JOB_RESULT_TTL, ) +from advanced_omi_backend.models.job import _ensure_beanie_initialized logger = logging.getLogger(__name__) -def process_audio_job( - client_id: str, - user_id: str, - user_email: str, - audio_data: bytes, - audio_rate: int, - audio_width: int, - audio_channels: int, - audio_uuid: Optional[str] = None, - timestamp: Optional[int] = None +@async_job(redis=True, beanie=True) +async def process_cropping_job( + conversation_id: str, + audio_path: str, + redis_client=None ) -> Dict[str, Any]: """ - RQ job function for audio file writing and database entry creation. - - This function is executed by RQ workers and can survive server restarts. - """ - import asyncio - import time - import uuid - from pathlib import Path - from wyoming.audio import AudioChunk - from easy_audio_interfaces.filesystem.filesystem_interfaces import LocalFileSink - from advanced_omi_backend.database import get_collections - - try: - logger.info(f"πŸ”„ RQ: Starting audio processing for client {client_id}") - - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - - try: - async def process(): - # Get repository - collections = get_collections() - from advanced_omi_backend.database import AudioChunksRepository - from advanced_omi_backend.config import CHUNK_DIR - repository = AudioChunksRepository(collections["chunks_col"]) - - # Use CHUNK_DIR from config - chunk_dir = CHUNK_DIR - - # Ensure directory exists - chunk_dir.mkdir(parents=True, exist_ok=True) - - # Create audio UUID if not provided - final_audio_uuid = audio_uuid or uuid.uuid4().hex - final_timestamp = timestamp or int(time.time()) - - # Create filename and file sink - wav_filename = f"{final_timestamp}_{client_id}_{final_audio_uuid}.wav" - file_path = chunk_dir / wav_filename - - # Create file sink - sink = LocalFileSink( - file_path=str(file_path), - sample_rate=int(audio_rate), - channels=int(audio_channels), - sample_width=int(audio_width) - ) - - # Open sink and write audio - await sink.open() - audio_chunk = AudioChunk( - rate=audio_rate, - width=audio_width, - channels=audio_channels, - audio=audio_data - ) - await sink.write(audio_chunk) - await sink.close() - - # Create database entry - await repository.create_chunk( - audio_uuid=final_audio_uuid, - audio_path=wav_filename, - client_id=client_id, - timestamp=final_timestamp, - user_id=user_id, - user_email=user_email, - ) - - logger.info(f"βœ… RQ: Completed audio processing for client {client_id}, file: {wav_filename}") + RQ job function for audio cropping - removes silent segments from audio. - # Enqueue transcript processing for this audio file - # First ensure Beanie is initialized for this worker process - await _ensure_beanie_initialized() + This job: + 1. Reads transcript segments from conversation + 2. Extracts speech timestamps + 3. Creates cropped audio file with only speech segments + 4. Updates audio_chunks collection with cropped file path - # Create a conversation entry - from advanced_omi_backend.models.conversation import create_conversation - import uuid as uuid_lib - - conversation_id = str(uuid_lib.uuid4()) - conversation = create_conversation( - conversation_id=conversation_id, - audio_uuid=final_audio_uuid, - user_id=user_id, - client_id=client_id - ) - # Set placeholder title/summary - conversation.title = "Processing..." - conversation.summary = "Transcript processing in progress" - await conversation.insert() - - logger.info(f"πŸ“ RQ: Created conversation {conversation_id} for audio {final_audio_uuid}") - - # Now enqueue transcript processing (runs outside async context) - version_id = str(uuid_lib.uuid4()) - - return { - "success": True, - "audio_uuid": final_audio_uuid, - "conversation_id": conversation_id, - "wav_filename": wav_filename, - "client_id": client_id, - "version_id": version_id, - "file_path": str(file_path) - } - - result = loop.run_until_complete(process()) - - # Enqueue transcript processing job chain (outside async context) - if result.get("success") and result.get("conversation_id"): - from .transcription_jobs import transcribe_full_audio_job, recognise_speakers_job - from .memory_jobs import process_memory_job - from advanced_omi_backend.controllers.queue_controller import transcription_queue, memory_queue, JOB_RESULT_TTL - - conversation_id = result["conversation_id"] - - # Job 1: Transcribe audio to text - transcript_job = transcription_queue.enqueue( - transcribe_full_audio_job, - conversation_id, - result["audio_uuid"], - result["file_path"], - result["version_id"], - user_id, - "upload", - job_timeout=600, - result_ttl=JOB_RESULT_TTL, - job_id=f"upload_{conversation_id[:8]}", - description=f"Transcribe audio for {conversation_id[:8]}", - meta={'audio_uuid': result["audio_uuid"]} - ) - logger.info(f"πŸ“₯ RQ: Enqueued transcription job {transcript_job.id}") - - # Job 2: Recognize speakers (depends on transcription) - speaker_job = transcription_queue.enqueue( - recognise_speakers_job, - conversation_id, - result["version_id"], - result["file_path"], - user_id, - "", # transcript_text - will be read from DB - [], # words - will be read from DB - depends_on=transcript_job, - job_timeout=600, - result_ttl=JOB_RESULT_TTL, - job_id=f"speaker_{conversation_id[:8]}", - description=f"Recognize speakers for {conversation_id[:8]}", - meta={'audio_uuid': result["audio_uuid"]} - ) - logger.info(f"πŸ“₯ RQ: Enqueued speaker recognition job {speaker_job.id} (depends on {transcript_job.id})") - - # Job 3: Extract memories (depends on speaker recognition) - memory_job = memory_queue.enqueue( - process_memory_job, - None, # client_id - will be read from conversation in DB - user_id, - "", # user_email - will be read from user in DB - conversation_id, - depends_on=speaker_job, - job_timeout=1800, - result_ttl=JOB_RESULT_TTL, - job_id=f"memory_{conversation_id[:8]}", - description=f"Extract memories for {conversation_id[:8]}", - meta={'audio_uuid': result["audio_uuid"]} - ) - logger.info(f"πŸ“₯ RQ: Enqueued memory job {memory_job.id} (depends on {speaker_job.id})") - - result["transcript_job_id"] = transcript_job.id - result["speaker_job_id"] = speaker_job.id - result["memory_job_id"] = memory_job.id - - return result - - finally: - loop.close() - - except Exception as e: - logger.error(f"❌ RQ: Audio processing failed for client {client_id}: {e}") - raise - - -def process_cropping_job( - client_id: str, - user_id: str, - audio_uuid: str, - original_path: str, - speech_segments: list, - output_path: str -) -> Dict[str, Any]: - """ - RQ job function for audio cropping. + Args: + conversation_id: Conversation ID + audio_path: Path to original audio file + redis_client: Redis client (injected by decorator) - This function is executed by RQ workers and can survive server restarts. + Returns: + Dict with processing results """ - import asyncio - from advanced_omi_backend.audio_utils import _process_audio_cropping_with_relative_timestamps + from pathlib import Path + from advanced_omi_backend.utils.audio_utils import _process_audio_cropping_with_relative_timestamps from advanced_omi_backend.database import get_collections, AudioChunksRepository + from advanced_omi_backend.models.conversation import Conversation + from advanced_omi_backend.config import CHUNK_DIR try: - logger.info(f"πŸ”„ RQ: Starting audio cropping for audio {audio_uuid}") - - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - - try: - async def process(): - # Get repository - collections = get_collections() - repository = AudioChunksRepository(collections["chunks_col"]) - - # Convert list of lists to list of tuples - segments_tuples = [tuple(seg) for seg in speech_segments] - - # Process cropping - await _process_audio_cropping_with_relative_timestamps( - original_path, - segments_tuples, - output_path, - audio_uuid, - repository - ) - - logger.info(f"βœ… RQ: Completed audio cropping for audio {audio_uuid}") - - return { - "success": True, - "audio_uuid": audio_uuid, - "output_path": output_path, - "segments": len(speech_segments) - } - - result = loop.run_until_complete(process()) - return result + logger.info(f"πŸ”„ RQ: Starting audio cropping for conversation {conversation_id}") + + # Get conversation to access segments + conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id) + if not conversation: + raise ValueError(f"Conversation {conversation_id} not found") + + # Extract speech segments from transcript + segments = conversation.segments + if not segments or len(segments) == 0: + logger.warning(f"⚠️ No segments found for conversation {conversation_id}, skipping cropping") + return { + "success": False, + "conversation_id": conversation_id, + "reason": "no_segments" + } + + # Convert segments to (start, end) tuples + speech_segments = [(seg.start, seg.end) for seg in segments] + logger.info(f"Found {len(speech_segments)} speech segments for cropping") + + # Generate output path for cropped audio + audio_uuid = conversation.audio_uuid + original_path = Path(audio_path) + cropped_filename = f"cropped_{original_path.name}" + output_path = CHUNK_DIR / cropped_filename + + # Get repository for database updates + collections = get_collections() + repository = AudioChunksRepository(collections["chunks_col"]) + + # Process cropping + success = await _process_audio_cropping_with_relative_timestamps( + str(original_path), + speech_segments, + str(output_path), + audio_uuid, + repository + ) - finally: - loop.close() + if not success: + logger.error(f"❌ RQ: Audio cropping failed for conversation {conversation_id}") + return { + "success": False, + "conversation_id": conversation_id, + "reason": "cropping_failed" + } + + # Calculate cropped duration + cropped_duration_seconds = sum(end - start for start, end in speech_segments) + + # Update conversation with cropped audio path + conversation.cropped_audio_path = cropped_filename + await conversation.save() + logger.info(f"πŸ’Ύ Updated conversation {conversation_id[:12]} with cropped_audio_path: {cropped_filename}") + + logger.info(f"βœ… RQ: Completed audio cropping for conversation {conversation_id} ({cropped_duration_seconds:.1f}s)") + + # Update job metadata with cropped duration + from rq import get_current_job + current_job = get_current_job() + if current_job: + if not current_job.meta: + current_job.meta = {} + current_job.meta['cropped_duration_seconds'] = round(cropped_duration_seconds, 1) + current_job.meta['segments_cropped'] = len(speech_segments) + current_job.save_meta() + + return { + "success": True, + "conversation_id": conversation_id, + "audio_uuid": audio_uuid, + "original_path": str(original_path), + "cropped_path": str(output_path), + "cropped_filename": cropped_filename, + "segments_count": len(speech_segments), + "cropped_duration_seconds": cropped_duration_seconds + } except Exception as e: - logger.error(f"❌ RQ: Audio cropping failed for audio {audio_uuid}: {e}") + logger.error(f"❌ RQ: Audio cropping failed for conversation {conversation_id}: {e}") raise @@ -279,24 +139,27 @@ async def process(): async def audio_streaming_persistence_job( session_id: str, user_id: str, - user_email: str, client_id: str, redis_client=None ) -> Dict[str, Any]: """ - Long-running RQ job that collects audio chunks from Redis stream and writes to disk progressively. + Long-running RQ job that progressively writes audio chunks to disk as they arrive. + + Opens a WAV file immediately and appends chunks in real-time, making the file + available for playback in the UI before the session completes. - Runs in parallel with transcription processing to reduce memory pressure on WebSocket. + Runs in parallel with transcription processing to reduce memory pressure. Args: session_id: Stream session ID user_id: User ID - user_email: User email client_id: Client ID redis_client: Redis client (injected by decorator) Returns: Dict with audio_file_path, chunk_count, total_bytes, duration_seconds + + Note: user_email is fetched from the database when needed. """ logger.info(f"🎡 Starting audio persistence for session {session_id}") @@ -323,20 +186,141 @@ async def audio_streaming_persistence_job( max_runtime = 3540 # 59 minutes start_time = time.time() - # Audio collection - audio_chunks = [] - chunk_count = 0 + from advanced_omi_backend.config import CHUNK_DIR + from easy_audio_interfaces.filesystem.filesystem_interfaces import LocalFileSink + from wyoming.audio import AudioChunk + + # Ensure directory exists + CHUNK_DIR.mkdir(parents=True, exist_ok=True) + + # File rotation state + current_conversation_id = None + file_sink = None + file_path = None + wav_filename = None + conversation_chunk_count = 0 + conversation_start_time = None + + # Audio collection stats (across all conversations in this session) + total_chunk_count = 0 total_bytes = 0 end_signal_received = False consecutive_empty_reads = 0 max_empty_reads = 3 # Exit after 3 consecutive empty reads (deterministic check) + conversation_count = 0 while True: # Check timeout if time.time() - start_time > max_runtime: logger.warning(f"⏱️ Timeout reached for audio persistence {session_id}") + # Close current file if open + if file_sink: + await file_sink.close() + logger.info(f"βœ… Closed file on timeout: {wav_filename}") break + # Check if session is finalizing (user stopped recording or WebSocket disconnected) + session_status = await redis_client.hget(session_key, "status") + if session_status and session_status.decode() in ["finalizing", "complete"]: + logger.info(f"πŸ›‘ Session finalizing detected, writing final chunks...") + # Give a brief moment for any in-flight chunks to arrive + await asyncio.sleep(0.5) + # Do one final read to write remaining chunks to current file + if file_sink: + try: + final_messages = await redis_client.xreadgroup( + audio_group_name, + audio_consumer_name, + {audio_stream_name: ">"}, + count=50, + block=500 + ) + if final_messages: + for stream_name, msgs in final_messages: + for message_id, fields in msgs: + audio_data = fields.get(b"audio_data", b"") + chunk_id = fields.get(b"chunk_id", b"").decode() + if chunk_id != "END" and len(audio_data) > 0: + chunk = AudioChunk( + rate=16000, + width=2, + channels=1, + audio=audio_data + ) + await file_sink.write(chunk) + conversation_chunk_count += 1 + total_chunk_count += 1 + total_bytes += len(audio_data) + await redis_client.xack(audio_stream_name, audio_group_name, message_id) + logger.info(f"πŸ“¦ Final read wrote {len(final_messages[0][1]) if final_messages else 0} more chunks") + except Exception as e: + logger.debug(f"Final audio read error (non-fatal): {e}") + + # Close final file + await file_sink.close() + logger.info(f"βœ… Closed final file: {wav_filename} ({conversation_chunk_count} chunks)") + break + + # Check for conversation change (file rotation signal) + conversation_key = f"conversation:current:{session_id}" + new_conversation_id = await redis_client.get(conversation_key) + + if new_conversation_id: + new_conversation_id = new_conversation_id.decode() + + # Conversation changed - rotate to new file + if new_conversation_id != current_conversation_id: + # Close previous file if exists + if file_sink: + await file_sink.close() + duration = (time.time() - conversation_start_time) if conversation_start_time else 0 + logger.info( + f"βœ… Closed conversation {current_conversation_id[:12]} file: {wav_filename} " + f"({conversation_chunk_count} chunks, {duration:.1f}s)" + ) + + # Open new file for new conversation + current_conversation_id = new_conversation_id + conversation_count += 1 + conversation_chunk_count = 0 + conversation_start_time = time.time() + + timestamp = int(time.time() * 1000) + wav_filename = f"{timestamp}_{client_id}_{current_conversation_id}.wav" + file_path = CHUNK_DIR / wav_filename + + file_sink = LocalFileSink( + file_path=str(file_path), + sample_rate=16000, + channels=1, + sample_width=2 + ) + await file_sink.open() + logger.info( + f"πŸ“ Opened new file for conversation #{conversation_count} ({current_conversation_id[:12]}): {file_path}" + ) + + # Store file path in Redis (keyed by conversation_id, not session_id) + audio_file_key = f"audio:file:{current_conversation_id}" + await redis_client.set(audio_file_key, str(file_path), ex=3600) + logger.info(f"πŸ’Ύ Stored audio file path in Redis: {audio_file_key}") + else: + # Key deleted - conversation ended, close current file + if file_sink and current_conversation_id: + await file_sink.close() + duration = (time.time() - conversation_start_time) if conversation_start_time else 0 + logger.info( + f"βœ… Closed conversation {current_conversation_id[:12]} file after conversation ended: {wav_filename} " + f"({conversation_chunk_count} chunks, {duration:.1f}s)" + ) + file_sink = None # Clear sink to prevent writing to closed file + current_conversation_id = None + + # If no file open yet, wait for conversation to be created + if not file_sink: + await asyncio.sleep(0.5) + continue + # Read audio chunks from stream (non-blocking) try: audio_messages = await redis_client.xreadgroup( @@ -362,13 +346,24 @@ async def audio_streaming_persistence_job( logger.info(f"πŸ“‘ Received END signal in audio persistence") end_signal_received = True elif len(audio_data) > 0: - audio_chunks.append(audio_data) - chunk_count += 1 + # Write chunk immediately to file + chunk = AudioChunk( + rate=16000, + width=2, + channels=1, + audio=audio_data + ) + await file_sink.write(chunk) + conversation_chunk_count += 1 + total_chunk_count += 1 total_bytes += len(audio_data) # Log every 40 chunks to avoid spam - if chunk_count % 40 == 0: - logger.info(f"πŸ“¦ Collected {chunk_count} audio chunks ({total_bytes / 1024 / 1024:.2f} MB)") + if total_chunk_count % 40 == 0: + logger.info( + f"πŸ“¦ Session {session_id[:12]}: {total_chunk_count} total chunks " + f"(conversation {current_conversation_id[:12]}: {conversation_chunk_count} chunks)" + ) # ACK the message await redis_client.xack(audio_stream_name, audio_group_name, message_id) @@ -388,111 +383,57 @@ async def audio_streaming_persistence_job( await asyncio.sleep(0.1) # Check every 100ms for responsiveness - # Write complete audio file - if audio_chunks: - from advanced_omi_backend.audio_utils import write_audio_file - - complete_audio = b''.join(audio_chunks) - timestamp = int(time.time() * 1000) - - logger.info(f"πŸ’Ύ Writing {len(audio_chunks)} chunks ({total_bytes / 1024 / 1024:.2f} MB) to disk") - - wav_filename, file_path, duration = await write_audio_file( - raw_audio_data=complete_audio, - audio_uuid=session_id, - client_id=client_id, - user_id=user_id, - user_email=user_email, - timestamp=timestamp, - validate=False - ) - logger.info(f"βœ… Wrote audio file: {wav_filename} ({duration:.1f}s, {chunk_count} chunks)") + # Job complete - calculate final stats + runtime_seconds = time.time() - start_time - # Store file path in Redis for finalize job to find - audio_file_key = f"audio:file:{session_id}" - await redis_client.set(audio_file_key, file_path, ex=3600) - logger.info(f"πŸ’Ύ Stored audio file path in Redis: {audio_file_key}") + # Calculate duration (16kHz, 16-bit mono = 32000 bytes/second) + if total_bytes > 0: + duration = total_bytes / (16000 * 2 * 1) # sample_rate * sample_width * channels else: - logger.warning(f"⚠️ No audio chunks collected for session {session_id}") - file_path = None + logger.warning(f"⚠️ No audio chunks written for session {session_id}") duration = 0.0 - # Clean up Redis tracking key + logger.info( + f"🎡 Audio persistence job complete for session {session_id}: " + f"{conversation_count} conversations, {total_chunk_count} total chunks, " + f"{total_bytes / 1024 / 1024:.2f} MB, {runtime_seconds:.1f}s runtime" + ) + + # Clean up Redis tracking keys audio_job_key = f"audio_persistence:session:{session_id}" await redis_client.delete(audio_job_key) - logger.info(f"🧹 Cleaned up tracking key {audio_job_key}") + conversation_key = f"conversation:current:{session_id}" + await redis_client.delete(conversation_key) + logger.info(f"🧹 Cleaned up tracking keys for session {session_id}") return { "session_id": session_id, - "audio_file_path": file_path, - "chunk_count": chunk_count, + "conversation_count": conversation_count, + "last_audio_file_path": str(file_path) if file_path else None, + "total_chunk_count": total_chunk_count, "total_bytes": total_bytes, "duration_seconds": duration, - "runtime_seconds": time.time() - start_time + "runtime_seconds": runtime_seconds } # Enqueue wrapper functions -def enqueue_audio_processing( - client_id: str, - user_id: str, - user_email: str, - audio_data: bytes, - audio_rate: int, - audio_width: int, - audio_channels: int, - audio_uuid: Optional[str] = None, - timestamp: Optional[int] = None, - priority: JobPriority = JobPriority.NORMAL -): - """ - Enqueue an audio processing job (file writing + DB entry). - - Returns RQ Job object for tracking. - """ - timeout_mapping = { - JobPriority.URGENT: 120, # 2 minutes - JobPriority.HIGH: 90, # 1.5 minutes - JobPriority.NORMAL: 60, # 1 minute - JobPriority.LOW: 30 # 30 seconds - } - - job = default_queue.enqueue( - process_audio_job, - client_id, - user_id, - user_email, - audio_data, - audio_rate, - audio_width, - audio_channels, - audio_uuid, - timestamp, - job_timeout=timeout_mapping.get(priority, 60), - result_ttl=JOB_RESULT_TTL, - job_id=f"audio_{client_id}_{audio_uuid or 'new'}", - description=f"Process audio for client {client_id}", - meta={'audio_uuid': audio_uuid} if audio_uuid else {} - ) - - logger.info(f"πŸ“₯ RQ: Enqueued audio job {job.id} for client {client_id}") - return job - - def enqueue_cropping( - client_id: str, - user_id: str, - audio_uuid: str, - original_path: str, - speech_segments: list, - output_path: str, + conversation_id: str, + audio_path: str, priority: JobPriority = JobPriority.NORMAL ): """ Enqueue an audio cropping job. - Returns RQ Job object for tracking. + Args: + conversation_id: Conversation ID + audio_path: Path to audio file + priority: Job priority level + + Returns: + RQ Job object for tracking. """ timeout_mapping = { JobPriority.URGENT: 300, # 5 minutes @@ -503,18 +444,14 @@ def enqueue_cropping( job = default_queue.enqueue( process_cropping_job, - client_id, - user_id, - audio_uuid, - original_path, - speech_segments, - output_path, + conversation_id, + audio_path, job_timeout=timeout_mapping.get(priority, 180), result_ttl=JOB_RESULT_TTL, - job_id=f"cropping_{audio_uuid[:8]}", - description=f"Crop audio for {audio_uuid[:8]}", - meta={'audio_uuid': audio_uuid} + job_id=f"crop_{conversation_id[:12]}", + description=f"Crop audio for conversation {conversation_id[:12]}", + meta={'conversation_id': conversation_id} ) - logger.info(f"πŸ“₯ RQ: Enqueued cropping job {job.id} for audio {audio_uuid}") + logger.info(f"πŸ“₯ RQ: Enqueued cropping job {job.id} for conversation {conversation_id}") return job diff --git a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py index 2e44e034..1d3400c3 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py @@ -10,10 +10,6 @@ from typing import Dict, Any from advanced_omi_backend.models.job import async_job -from advanced_omi_backend.controllers.queue_controller import ( - transcription_queue, - REDIS_URL, -) logger = logging.getLogger(__name__) @@ -22,9 +18,9 @@ async def open_conversation_job( session_id: str, user_id: str, - user_email: str, client_id: str, speech_detected_at: float, + speech_job_id: str = None, redis_client=None ) -> Dict[str, Any]: """ @@ -35,26 +31,27 @@ async def open_conversation_job( Args: session_id: Stream session ID user_id: User ID - user_email: User email client_id: Client ID speech_detected_at: Timestamp when speech was first detected + speech_job_id: Optional speech detection job ID to update with conversation_id redis_client: Redis client (injected by decorator) Returns: Dict with conversation_id, final_result_count, runtime_seconds + + Note: user_email is fetched from the database when needed. """ from advanced_omi_backend.services.audio_stream import TranscriptionResultsAggregator - from advanced_omi_backend.models.conversation import Conversation - - import uuid - from advanced_omi_backend.models.conversation import create_conversation + from advanced_omi_backend.models.conversation import Conversation, create_conversation + from rq import get_current_job logger.info(f"πŸ“ Creating and opening conversation for session {session_id} (speech detected at {speech_detected_at})") - # Create minimal streaming conversation - conversation_id = str(uuid.uuid4()) + # Get current job for meta storage + current_job = get_current_job() + + # Create minimal streaming conversation (conversation_id auto-generated) conversation = create_conversation( - conversation_id=conversation_id, audio_uuid=session_id, user_id=user_id, client_id=client_id, @@ -64,13 +61,56 @@ async def open_conversation_job( # Save to database await conversation.insert() + conversation_id = conversation.conversation_id # Get the auto-generated ID logger.info(f"βœ… Created streaming conversation {conversation_id} for session {session_id}") + # Update speech detection job metadata with conversation_id + if speech_job_id: + try: + from rq.job import Job + from advanced_omi_backend.controllers.queue_controller import redis_conn + + speech_job = Job.fetch(speech_job_id, connection=redis_conn) + if speech_job and speech_job.meta: + # Only update if conversation_id not already set (first conversation wins) + if not speech_job.meta.get('conversation_id'): + speech_job.meta['conversation_id'] = conversation_id + # Remove session_level flag - now linked to conversation + speech_job.meta.pop('session_level', None) + speech_job.save_meta() + logger.info(f"πŸ”— Updated speech job {speech_job_id[:12]} with conversation_id") + else: + logger.info(f"⏭️ Speech job {speech_job_id[:12]} already linked to conversation {speech_job.meta.get('conversation_id')[:12]}") + + # Also update the speaker check job if referenced in speech job metadata + # Only update if it doesn't already have a conversation_id (first conversation wins) + speaker_check_job_id = speech_job.meta.get('speaker_check_job_id') + if speaker_check_job_id: + try: + speaker_check_job = Job.fetch(speaker_check_job_id, connection=redis_conn) + if speaker_check_job and speaker_check_job.meta: + # Only update if conversation_id not already set + if not speaker_check_job.meta.get('conversation_id'): + speaker_check_job.meta['conversation_id'] = conversation_id + speaker_check_job.save_meta() + logger.info(f"πŸ”— Updated speaker check job {speaker_check_job_id} with conversation_id") + else: + logger.info(f"⏭️ Speaker check job {speaker_check_job_id} already linked to conversation {speaker_check_job.meta.get('conversation_id')[:12]}") + except Exception as speaker_err: + logger.warning(f"⚠️ Failed to update speaker check job metadata: {speaker_err}") + except Exception as e: + logger.warning(f"⚠️ Failed to update speech job metadata: {e}") + # Store conversation_id in Redis for finalize job to find conversation_key = f"conversation:session:{session_id}" await redis_client.set(conversation_key, conversation_id, ex=3600) logger.info(f"πŸ’Ύ Stored conversation ID in Redis: {conversation_key}") + # Signal audio persistence job to rotate to this conversation's file + current_conversation_key = f"conversation:current:{session_id}" + await redis_client.set(current_conversation_key, conversation_id, ex=3600) + logger.info(f"πŸ”„ Signaled audio persistence to rotate file for conversation {conversation_id[:12]}") + # Use redis_client parameter aggregator = TranscriptionResultsAggregator(redis_client) @@ -82,6 +122,17 @@ async def open_conversation_job( last_result_count = 0 finalize_received = False + # Inactivity timeout configuration + import os + inactivity_timeout_seconds = float(os.getenv("SPEECH_INACTIVITY_THRESHOLD_SECONDS", "60")) + inactivity_timeout_minutes = inactivity_timeout_seconds / 60 + last_meaningful_speech_time = time.time() # Initialize with conversation start + timeout_triggered = False # Track if closure was due to timeout + last_inactivity_log_time = time.time() # Track when we last logged inactivity + last_word_count = 0 # Track word count to detect actual new speech + + logger.info(f"πŸ“Š Conversation timeout configured: {inactivity_timeout_minutes} minutes ({inactivity_timeout_seconds}s)") + while True: # Check if session is finalizing (set by producer when recording stops) if not finalize_received: @@ -91,15 +142,96 @@ async def open_conversation_job( logger.info(f"πŸ›‘ Session finalizing, waiting for audio persistence job to complete...") break # Exit immediately when finalize signal received - # Check timeout + # Check max runtime timeout if time.time() - start_time > max_runtime: - logger.warning(f"⏱️ Timeout reached for {conversation_id}") + logger.warning(f"⏱️ Max runtime reached for {conversation_id}") break # Get combined results from aggregator combined = await aggregator.get_combined_results(session_id) current_count = combined["chunk_count"] + # Analyze speech content using detailed analysis + from advanced_omi_backend.utils.conversation_utils import analyze_speech + + transcript_data = { + "text": combined["text"], + "words": combined.get("words", []) + } + speech_analysis = analyze_speech(transcript_data) + + # Extract speaker information from segments + speakers = [] + segments = combined.get("segments", []) + if segments: + for seg in segments: + speaker = seg.get("speaker", "Unknown") + if speaker and speaker != "Unknown" and speaker not in speakers: + speakers.append(speaker) + + # Check if NEW speech arrived (word count increased) + # Track word count instead of chunk count to avoid resetting on noise/silence chunks + current_word_count = speech_analysis.get("word_count", 0) + if current_word_count > last_word_count: + last_meaningful_speech_time = time.time() + last_word_count = current_word_count + # Store timestamp in Redis for visibility/debugging + await redis_client.set( + f"conversation:last_speech:{conversation_id}", + last_meaningful_speech_time, + ex=3600 # 1 hour TTL + ) + logger.debug(f"πŸ—£οΈ New speech detected (word count: {current_word_count}), updated last_speech timestamp") + + # Update job meta with current state + if current_job: + if not current_job.meta: + current_job.meta = {} + + from datetime import datetime + + # Set created_at only once (first time we update metadata) + if 'created_at' not in current_job.meta: + current_job.meta['created_at'] = datetime.now().isoformat() + + current_job.meta.update({ + "conversation_id": conversation_id, + "audio_uuid": session_id, # Link to session for job grouping + "client_id": client_id, # Ensure client_id is always present + "transcript": combined["text"][:500] + "..." if len(combined["text"]) > 500 else combined["text"], # First 500 chars + "transcript_length": len(combined["text"]), + "speakers": speakers, + "word_count": speech_analysis.get("word_count", 0), + "duration_seconds": speech_analysis.get("duration", 0), + "has_speech": speech_analysis.get("has_speech", False), + "last_update": datetime.now().isoformat(), + "inactivity_seconds": time.time() - last_meaningful_speech_time, + "chunks_processed": current_count + }) + current_job.save_meta() + + # Check inactivity timeout and log every 10 seconds + inactivity_duration = time.time() - last_meaningful_speech_time + current_time = time.time() + + # Log inactivity every 10 seconds + if current_time - last_inactivity_log_time >= 10: + logger.info(f"⏱️ Time since last speech: {inactivity_duration:.1f}s (timeout: {inactivity_timeout_seconds:.0f}s)") + last_inactivity_log_time = current_time + + if inactivity_duration > inactivity_timeout_seconds: + logger.info( + f"πŸ• Conversation {conversation_id} inactive for " + f"{inactivity_duration/60:.1f} minutes (threshold: {inactivity_timeout_minutes} min), " + f"auto-closing conversation (session remains active for next conversation)..." + ) + # DON'T set session to finalizing - just close this conversation + # Session remains "active" so new conversations can be created + # Only user manual stop or WebSocket disconnect should finalize the session + timeout_triggered = True + finalize_received = True + break + # Update conversation if new results arrived if current_count > last_result_count: # Update conversation in MongoDB @@ -126,8 +258,8 @@ async def open_conversation_job( logger.info(f"βœ… Conversation {conversation_id} updates complete, waiting for audio file to be ready...") # Wait for audio_streaming_persistence_job to complete and write the file path - # Poll for the audio file key - this is deterministic, not a timeout-based grace period - audio_file_key = f"audio:file:{session_id}" + # Audio persistence now writes files per-conversation, so key uses conversation_id + audio_file_key = f"audio:file:{conversation_id}" file_path_bytes = None max_wait_audio = 30 # Maximum 30 seconds to wait for audio file wait_start = time.time() @@ -142,48 +274,59 @@ async def open_conversation_job( # Check if still within reasonable time elapsed = time.time() - wait_start if elapsed % 5 == 0: # Log every 5 seconds - logger.info(f"⏳ Waiting for audio file... ({elapsed:.0f}s elapsed)") + logger.info(f"⏳ Waiting for audio file (conversation {conversation_id[:12]})... ({elapsed:.0f}s elapsed)") await asyncio.sleep(0.5) # Check every 500ms if not file_path_bytes: - logger.error(f"❌ Audio file path not found in Redis after {max_wait_audio}s") - logger.warning(f"⚠️ Audio persistence job may have failed or is still running - cannot enqueue batch transcription") + logger.error(f"❌ Audio file path not found in Redis after {max_wait_audio}s (key: {audio_file_key})") + logger.warning(f"⚠️ Audio persistence job may not have rotated file yet - cannot enqueue batch transcription") else: file_path = file_path_bytes.decode() logger.info(f"πŸ“ Retrieved audio file path: {file_path}") - # Enqueue complete batch processing job chain - from advanced_omi_backend.controllers.queue_controller import start_batch_processing_jobs - - job_ids = start_batch_processing_jobs( + # Update conversation with audio file path + conversation = await Conversation.find_one( + Conversation.conversation_id == conversation_id + ) + if conversation: + # Store just the filename (relative to CHUNK_DIR) + from pathlib import Path + audio_filename = Path(file_path).name + conversation.audio_path = audio_filename + await conversation.save() + logger.info(f"πŸ’Ύ Updated conversation {conversation_id[:12]} with audio_path: {audio_filename}") + else: + logger.warning(f"⚠️ Conversation {conversation_id} not found for audio_path update") + + # Enqueue post-conversation processing pipeline + from advanced_omi_backend.controllers.queue_controller import start_post_conversation_jobs + + job_ids = start_post_conversation_jobs( conversation_id=conversation_id, audio_uuid=session_id, + audio_file_path=file_path, user_id=user_id, - user_email=user_email, - audio_file_path=file_path + post_transcription=True # Run batch transcription for streaming audio ) logger.info( - f"πŸ“₯ RQ: Enqueued batch processing chain: " - f"{job_ids['transcription']} β†’ {job_ids['speaker_recognition']} β†’ {job_ids['memory']}" + f"πŸ“₯ Pipeline: transcribe({job_ids['transcription']}) β†’ " + f"speaker({job_ids['speaker_recognition']}) β†’ " + f"[memory({job_ids['memory']}) + title({job_ids['title_summary']})]" ) # Wait a moment to ensure jobs are registered in RQ await asyncio.sleep(0.5) - # DON'T mark session as complete yet - dependent jobs are still processing - # Session remains in "finalizing" status until process_memory_job completes - logger.info(f"⏳ Session {session_id} remains in 'finalizing' status while batch jobs process") - # Clean up Redis streams to prevent memory leaks try: - # Delete the audio input stream - audio_stream_key = f"audio:stream:{client_id}" - await redis_client.delete(audio_stream_key) - logger.info(f"🧹 Deleted audio stream: {audio_stream_key}") + # NOTE: Do NOT delete audio:stream:{client_id} here! + # The audio stream is per-client (WebSocket connection), not per-conversation. + # It's still actively receiving audio and will be reused by the next conversation. + # Only delete it on WebSocket disconnect (handled in websocket_controller.py) - # Delete the transcription results stream + # Delete the transcription results stream (per-session/conversation) results_stream_key = f"transcription:results:{session_id}" await redis_client.delete(results_stream_key) logger.info(f"🧹 Deleted results stream: {results_stream_key}") @@ -194,13 +337,176 @@ async def open_conversation_job( except Exception as cleanup_error: logger.warning(f"⚠️ Error during stream cleanup: {cleanup_error}") - # Clean up Redis tracking key so new speech detection jobs can start + # Clean up Redis tracking keys so speech detection job knows conversation is complete open_job_key = f"open_conversation:session:{session_id}" await redis_client.delete(open_job_key) logger.info(f"🧹 Cleaned up tracking key {open_job_key}") + # Delete the conversation:current signal so audio persistence knows conversation ended + current_conversation_key = f"conversation:current:{session_id}" + await redis_client.delete(current_conversation_key) + logger.info(f"🧹 Deleted conversation:current signal for session {session_id[:12]}") + + # Increment conversation count for this session + conversation_count_key = f"session:conversation_count:{session_id}" + conversation_count = await redis_client.incr(conversation_count_key) + await redis_client.expire(conversation_count_key, 3600) # 1 hour TTL + logger.info(f"πŸ“Š Conversation count for session {session_id}: {conversation_count}") + + # Check if session is still active (user still recording) and restart listening jobs + session_status = await redis_client.hget(session_key, "status") + if session_status: + status_str = session_status.decode() if isinstance(session_status, bytes) else session_status + + if status_str == "active": + # Session still active - enqueue new speech detection for next conversation + logger.info(f"πŸ”„ Enqueueing new speech detection (conversation #{conversation_count + 1})") + + from advanced_omi_backend.controllers.queue_controller import transcription_queue, redis_conn, JOB_RESULT_TTL + from advanced_omi_backend.workers.transcription_jobs import stream_speech_detection_job + + # Enqueue speech detection job for next conversation (audio persistence keeps running) + speech_job = transcription_queue.enqueue( + stream_speech_detection_job, + session_id, + user_id, + client_id, + job_timeout=3600, + result_ttl=JOB_RESULT_TTL, + job_id=f"speech-detect_{session_id[:12]}_{conversation_count}", + description=f"Listening for speech (conversation #{conversation_count + 1})", + meta={'audio_uuid': session_id, 'client_id': client_id, 'session_level': True} + ) + + # Store job ID for cleanup (keyed by client_id for WebSocket cleanup) + try: + redis_conn.set(f"speech_detection_job:{client_id}", speech_job.id, ex=3600) + logger.info(f"πŸ“Œ Stored speech detection job ID for client {client_id}") + except Exception as e: + logger.warning(f"⚠️ Failed to store job ID for {client_id}: {e}") + + logger.info(f"βœ… Enqueued speech detection job {speech_job.id}") + else: + logger.info(f"Session {session_id} status={status_str}, not restarting (user stopped recording)") + else: + logger.info(f"Session {session_id} not found, not restarting (session ended)") + return { "conversation_id": conversation_id, + "conversation_count": conversation_count, "final_result_count": last_result_count, - "runtime_seconds": time.time() - start_time + "runtime_seconds": time.time() - start_time, + "timeout_triggered": timeout_triggered + } + + +@async_job(redis=True, beanie=True) +async def generate_title_summary_job( + conversation_id: str, + redis_client=None +) -> Dict[str, Any]: + """ + Generate title and summary for a conversation using LLM. + + This job runs independently of transcription and memory jobs to ensure + conversations always get meaningful titles and summaries, even if other + processing steps fail. + + Uses the utility functions from conversation_utils for consistent title/summary generation. + + Args: + conversation_id: Conversation ID + redis_client: Redis client (injected by decorator) + + Returns: + Dict with generated title and summary + """ + from advanced_omi_backend.models.conversation import Conversation + from advanced_omi_backend.utils.conversation_utils import ( + generate_title_with_speakers, + generate_summary_with_speakers + ) + + logger.info(f"πŸ“ Starting title/summary generation for conversation {conversation_id}") + + start_time = time.time() + + # Get the conversation + conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id) + if not conversation: + logger.error(f"Conversation {conversation_id} not found") + return {"success": False, "error": "Conversation not found"} + + # Get segments from active transcript version + segments = conversation.segments or [] + + if not segments or len(segments) == 0: + logger.warning(f"⚠️ No segments available for conversation {conversation_id}") + return { + "success": False, + "error": "No segments available", + "conversation_id": conversation_id + } + + # Generate title and summary using speaker-aware utilities + try: + logger.info(f"πŸ€– Generating title/summary using LLM for conversation {conversation_id}") + + # Convert segments to dict format expected by utils + segment_dicts = [ + { + "speaker": seg.speaker, + "text": seg.text, + "start": seg.start, + "end": seg.end + } + for seg in segments + ] + + # Generate title and summary with speaker awareness + title = await generate_title_with_speakers(segment_dicts) + summary = await generate_summary_with_speakers(segment_dicts) + + conversation.title = title + conversation.summary = summary + + logger.info(f"βœ… Generated title: '{conversation.title}', summary: '{conversation.summary}'") + + except Exception as gen_error: + logger.error(f"❌ Title/summary generation failed: {gen_error}") + return { + "success": False, + "error": str(gen_error), + "conversation_id": conversation_id, + "processing_time_seconds": time.time() - start_time + } + + # Save the updated conversation + await conversation.save() + + processing_time = time.time() - start_time + + # Update job metadata + from rq import get_current_job + current_job = get_current_job() + if current_job: + if not current_job.meta: + current_job.meta = {} + current_job.meta.update({ + "conversation_id": conversation_id, + "title": conversation.title, + "summary": conversation.summary, + "segment_count": len(segments), + "processing_time": processing_time + }) + current_job.save_meta() + + logger.info(f"βœ… Title/summary generation completed for {conversation_id} in {processing_time:.2f}s") + + return { + "success": True, + "conversation_id": conversation_id, + "title": conversation.title, + "summary": conversation.summary, + "processing_time_seconds": processing_time } diff --git a/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py index a838ee67..c1a6dfc0 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py @@ -20,19 +20,23 @@ @async_job(redis=True, beanie=True) async def process_memory_job( - client_id: str, - user_id: str, - user_email: str, conversation_id: str, redis_client=None ) -> Dict[str, Any]: """ RQ job function for memory extraction and processing from conversations. + V2 Architecture: + 1. Extracts memories from conversation transcript + 2. Checks primary speakers filter if configured + 3. Uses configured memory provider (friend_lite or openmemory_mcp) + 4. Stores memory references in conversation document + + Note: Listening jobs are restarted by open_conversation_job (not here). + This allows users to resume talking immediately after conversation closes, + without waiting for memory processing to complete. + Args: - client_id: Client identifier - user_id: User ID - user_email: User email conversation_id: Conversation ID to process redis_client: Redis client (injected by decorator) @@ -52,20 +56,18 @@ async def process_memory_job( logger.warning(f"No conversation found for {conversation_id}") return {"success": False, "error": "Conversation not found"} - # Read client_id and user_email from conversation/user if not provided - # (Parameters may be empty if called via job dependency) - actual_client_id = client_id or conversation_model.client_id - actual_user_email = user_email + # Get client_id, user_id, and user_email from conversation/user + client_id = conversation_model.client_id + user_id = conversation_model.user_id - if not actual_user_email: - user = await get_user_by_id(user_id) - if user: - actual_user_email = user.email - else: - logger.warning(f"Could not find user {user_id}") - actual_user_email = "" + user = await get_user_by_id(user_id) + if user: + user_email = user.email + else: + logger.warning(f"Could not find user {user_id}") + user_email = "" - logger.info(f"πŸ”„ Processing memory for conversation {conversation_id}, client={actual_client_id}, user={user_id}") + logger.info(f"πŸ”„ Processing memory for conversation {conversation_id}, client={client_id}, user={user_id}") # Extract conversation text from transcript segments full_conversation = "" @@ -116,10 +118,10 @@ async def process_memory_job( memory_service = get_memory_service() memory_result = await memory_service.add_memory( full_conversation, - actual_client_id, + client_id, conversation_id, user_id, - actual_user_email, + user_email, allow_update=True, ) @@ -140,17 +142,38 @@ async def process_memory_job( processing_time = time.time() - start_time logger.info(f"βœ… Completed memory processing for conversation {conversation_id} - created {len(created_memory_ids)} memories in {processing_time:.2f}s") - # Mark session as complete in Redis (this is the last job in the chain) - if conversation_model and conversation_model.audio_uuid: - session_key = f"audio:session:{conversation_model.audio_uuid}" + # Update job metadata with memory information + from rq import get_current_job + current_job = get_current_job() + if current_job: + if not current_job.meta: + current_job.meta = {} + + # Fetch memory details to display in UI + memory_details = [] try: - await redis_client.hset(session_key, mapping={ - "status": "complete", - "completed_at": str(time.time()) - }) - logger.info(f"βœ… Marked session {conversation_model.audio_uuid} as complete (all jobs finished)") + for memory_id in created_memory_ids[:5]: # Limit to first 5 for display + memory_entry = await memory_service.get_memory(memory_id, user_id) + if memory_entry: + memory_details.append({ + "memory_id": memory_id, + "text": memory_entry.get("text", "")[:200] # First 200 chars + }) except Exception as e: - logger.warning(f"⚠️ Could not mark session as complete: {e}") + logger.warning(f"Failed to fetch memory details for UI: {e}") + + current_job.meta.update({ + "conversation_id": conversation_id, + "memories_created": len(created_memory_ids), + "memory_ids": created_memory_ids[:5], # Store first 5 IDs + "memory_details": memory_details, + "processing_time": processing_time + }) + current_job.save_meta() + + # NOTE: Listening jobs are restarted by open_conversation_job (not here) + # This allows users to resume talking immediately after conversation closes, + # without waiting for memory processing to complete. return { "success": True, @@ -158,18 +181,7 @@ async def process_memory_job( "processing_time": processing_time } else: - # Mark session as complete even if no memories created - if conversation_model and conversation_model.audio_uuid: - session_key = f"audio:session:{conversation_model.audio_uuid}" - try: - await redis_client.hset(session_key, mapping={ - "status": "complete", - "completed_at": str(time.time()) - }) - logger.info(f"βœ… Marked session {conversation_model.audio_uuid} as complete (no memories)") - except Exception as e: - logger.warning(f"⚠️ Could not mark session as complete: {e}") - + # No memories created - still successful return {"success": True, "memories_created": 0, "skipped": True} else: return {"success": False, "error": "Memory service returned False"} diff --git a/backends/advanced/src/advanced_omi_backend/workers/rq_worker_entry.py b/backends/advanced/src/advanced_omi_backend/workers/rq_worker_entry.py new file mode 100755 index 00000000..c402005f --- /dev/null +++ b/backends/advanced/src/advanced_omi_backend/workers/rq_worker_entry.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 +""" +RQ Worker Entry Point with Logging Configuration. + +This script configures Python logging before starting RQ workers, +ensuring that application-level logs from job functions are visible. +""" + +import logging +import os +import sys + +# Configure logging BEFORE importing any application modules +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", + stream=sys.stdout +) + +logger = logging.getLogger(__name__) + + +def main(): + """Start RQ worker with proper logging configuration.""" + from rq import Worker + from redis import Redis + + # Get Redis URL from environment + redis_url = os.getenv('REDIS_URL', 'redis://localhost:6379/0') + + # Get queue names from command line arguments + queue_names = sys.argv[1:] if len(sys.argv) > 1 else ['transcription', 'memory', 'default'] + + logger.info(f"πŸš€ Starting RQ worker for queues: {', '.join(queue_names)}") + logger.info(f"πŸ“‘ Redis URL: {redis_url}") + + # Create Redis connection + redis_conn = Redis.from_url(redis_url) + + # Create and start worker + worker = Worker( + queue_names, + connection=redis_conn, + log_job_description=True + ) + + logger.info("βœ… RQ worker ready") + + # This blocks until worker is stopped + worker.work(logging_level='INFO') + + +if __name__ == "__main__": + main() diff --git a/backends/advanced/src/advanced_omi_backend/workers/speaker_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/speaker_jobs.py new file mode 100644 index 00000000..80434232 --- /dev/null +++ b/backends/advanced/src/advanced_omi_backend/workers/speaker_jobs.py @@ -0,0 +1,287 @@ +""" +Speaker recognition related RQ job functions. + +This module contains all jobs related to speaker identification and recognition. +""" + +import asyncio +import logging +import time +from typing import Dict, Any + +from advanced_omi_backend.models.job import async_job +from advanced_omi_backend.controllers.queue_controller import transcription_queue + +logger = logging.getLogger(__name__) + + +@async_job(redis=True, beanie=True) +async def check_enrolled_speakers_job( + session_id: str, + user_id: str, + client_id: str, + redis_client=None +) -> Dict[str, Any]: + """ + Check if any enrolled speakers are present in the current audio stream. + + This job is used during speech detection to filter conversations by enrolled speakers. + + Args: + session_id: Stream session ID + user_id: User ID + client_id: Client ID + redis_client: Redis client (injected by decorator) + + Returns: + Dict with enrolled_present, identified_speakers, and speaker_result + """ + from advanced_omi_backend.services.audio_stream import TranscriptionResultsAggregator + from advanced_omi_backend.speaker_recognition_client import SpeakerRecognitionClient + + logger.info(f"🎀 Starting enrolled speaker check for session {session_id[:12]}") + + start_time = time.time() + + # Get aggregated transcription results + aggregator = TranscriptionResultsAggregator(redis_client) + raw_results = await aggregator.get_session_results(session_id) + + # Check for enrolled speakers + speaker_client = SpeakerRecognitionClient() + enrolled_present, speaker_result = await speaker_client.check_if_enrolled_speaker_present( + redis_client=redis_client, + client_id=client_id, + session_id=session_id, + user_id=user_id, + transcription_results=raw_results + ) + + # Extract identified speakers + identified_speakers = [] + if speaker_result and "segments" in speaker_result: + for seg in speaker_result["segments"]: + identified_as = seg.get("identified_as") + if identified_as and identified_as != "Unknown" and identified_as not in identified_speakers: + identified_speakers.append(identified_as) + + processing_time = time.time() - start_time + + if enrolled_present: + logger.info(f"βœ… Enrolled speaker(s) found: {', '.join(identified_speakers)} ({processing_time:.2f}s)") + else: + logger.info(f"⏭️ No enrolled speakers found ({processing_time:.2f}s)") + + # Update job metadata for timeline tracking + from rq import get_current_job + current_job = get_current_job() + if current_job: + if not current_job.meta: + current_job.meta = {} + current_job.meta.update({ + "session_id": session_id, + "audio_uuid": session_id, + "client_id": client_id, + "enrolled_present": enrolled_present, + "identified_speakers": identified_speakers, + "speaker_count": len(identified_speakers), + "processing_time": processing_time + }) + current_job.save_meta() + + return { + "success": True, + "session_id": session_id, + "enrolled_present": enrolled_present, + "identified_speakers": identified_speakers, + "speaker_result": speaker_result, + "processing_time_seconds": processing_time + } + + +@async_job(redis=True, beanie=True) +async def recognise_speakers_job( + conversation_id: str, + version_id: str, + audio_path: str, + transcript_text: str, + words: list, + redis_client=None +) -> Dict[str, Any]: + """ + RQ job function for identifying speakers in a transcribed conversation. + + This job runs after transcription and: + 1. Calls speaker recognition service to identify speakers + 2. Updates the transcript version with identified speaker labels + 3. Returns results for downstream jobs (memory) + + Args: + conversation_id: Conversation ID + version_id: Transcript version ID to update + audio_path: Path to audio file + transcript_text: Transcript text from transcription job + words: Word-level timing data from transcription job + redis_client: Redis client (injected by decorator) + + Returns: + Dict with processing results + """ + from advanced_omi_backend.models.conversation import Conversation + from advanced_omi_backend.speaker_recognition_client import SpeakerRecognitionClient + + logger.info(f"🎀 RQ: Starting speaker recognition for conversation {conversation_id}") + + start_time = time.time() + + # Get the conversation + conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id) + if not conversation: + logger.error(f"Conversation {conversation_id} not found") + return {"success": False, "error": "Conversation not found"} + + # Get user_id from conversation + user_id = conversation.user_id + + # Use the provided audio path + actual_audio_path = audio_path + logger.info(f"πŸ“ Using audio for speaker recognition: {audio_path}") + + # Find the transcript version to update + transcript_version = None + for version in conversation.transcript_versions: + if version.version_id == version_id: + transcript_version = version + break + + if not transcript_version: + logger.error(f"Transcript version {version_id} not found") + return {"success": False, "error": "Transcript version not found"} + + # Check if speaker recognition is enabled + speaker_client = SpeakerRecognitionClient() + if not speaker_client.enabled: + logger.info(f"🎀 Speaker recognition disabled, skipping") + return { + "success": True, + "conversation_id": conversation_id, + "version_id": version_id, + "speaker_recognition_enabled": False, + "processing_time_seconds": 0 + } + + # Call speaker recognition service + try: + logger.info(f"🎀 Calling speaker recognition service...") + + # Read transcript text and words from the transcript version + # (Parameters may be empty if called via job dependency) + actual_transcript_text = transcript_text or transcript_version.transcript or "" + actual_words = words if words else [] + + # If words not provided, we need to get them from metadata + if not actual_words and transcript_version.metadata: + actual_words = transcript_version.metadata.get("words", []) + + if not actual_transcript_text: + logger.warning(f"🎀 No transcript text found in version {version_id}") + return { + "success": False, + "conversation_id": conversation_id, + "version_id": version_id, + "error": "No transcript text available", + "processing_time_seconds": 0 + } + + transcript_data = { + "text": actual_transcript_text, + "words": actual_words + } + + speaker_result = await speaker_client.diarize_identify_match( + audio_path=actual_audio_path, + transcript_data=transcript_data, + user_id=user_id + ) + + if not speaker_result or "segments" not in speaker_result: + logger.warning(f"🎀 Speaker recognition returned no segments") + return { + "success": True, + "conversation_id": conversation_id, + "version_id": version_id, + "speaker_recognition_enabled": True, + "identified_speakers": [], + "processing_time_seconds": time.time() - start_time + } + + speaker_segments = speaker_result["segments"] + logger.info(f"🎀 Speaker recognition returned {len(speaker_segments)} segments") + + # Update the transcript version segments with identified speakers + updated_segments = [] + for seg in speaker_segments: + speaker_name = seg.get("identified_as") or seg.get("speaker", "Unknown") + updated_segments.append( + Conversation.SpeakerSegment( + start=seg.get("start", 0), + end=seg.get("end", 0), + text=seg.get("text", ""), + speaker=speaker_name, + confidence=seg.get("confidence") + ) + ) + + # Update the transcript version + transcript_version.segments = updated_segments + + # Extract unique identified speakers for metadata + identified_speakers = set() + for seg in speaker_segments: + identified_as = seg.get("identified_as", "Unknown") + if identified_as != "Unknown": + identified_speakers.add(identified_as) + + # Update metadata + if not transcript_version.metadata: + transcript_version.metadata = {} + + transcript_version.metadata["speaker_recognition"] = { + "enabled": True, + "identified_speakers": list(identified_speakers), + "speaker_count": len(identified_speakers), + "total_segments": len(speaker_segments), + "processing_time_seconds": time.time() - start_time + } + + # Update legacy fields if this is the active version + if conversation.active_transcript_version == version_id: + conversation.segments = updated_segments + + await conversation.save() + + processing_time = time.time() - start_time + logger.info(f"βœ… Speaker recognition completed for {conversation_id} in {processing_time:.2f}s") + + return { + "success": True, + "conversation_id": conversation_id, + "version_id": version_id, + "speaker_recognition_enabled": True, + "identified_speakers": list(identified_speakers), + "segment_count": len(updated_segments), + "processing_time_seconds": processing_time + } + + except Exception as speaker_error: + logger.error(f"❌ Speaker recognition failed: {speaker_error}") + import traceback + logger.debug(traceback.format_exc()) + + return { + "success": False, + "conversation_id": conversation_id, + "version_id": version_id, + "error": str(speaker_error), + "processing_time_seconds": time.time() - start_time + } diff --git a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py index c69bc3fa..e081786a 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py @@ -15,7 +15,6 @@ from advanced_omi_backend.controllers.queue_controller import ( transcription_queue, redis_conn, - _ensure_beanie_initialized, JOB_RESULT_TTL, REDIS_URL, ) @@ -120,7 +119,6 @@ async def transcribe_full_audio_job( audio_uuid: str, audio_path: str, version_id: str, - user_id: str, trigger: str = "reprocess", redis_client=None ) -> Dict[str, Any]: @@ -140,7 +138,6 @@ async def transcribe_full_audio_job( audio_uuid: Audio UUID (unused but kept for compatibility) audio_path: Path to audio file version_id: Version ID for new transcript - user_id: User ID trigger: Trigger source redis_client: Redis client (injected by decorator) @@ -155,6 +152,15 @@ async def transcribe_full_audio_job( start_time = time.time() + # Get the conversation + conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id) + if not conversation: + raise ValueError(f"Conversation {conversation_id} not found") + + # Use the provided audio path + actual_audio_path = audio_path + logger.info(f"πŸ“ Using audio for transcription: {audio_path}") + # Get the transcription provider provider = get_transcription_provider(mode="batch") if not provider: @@ -164,9 +170,9 @@ async def transcribe_full_audio_job( logger.info(f"Using transcription provider: {provider_name}") # Read the audio file - audio_file_path = Path(audio_path) + audio_file_path = Path(actual_audio_path) if not audio_file_path.exists(): - raise FileNotFoundError(f"Audio file not found: {audio_path}") + raise FileNotFoundError(f"Audio file not found: {actual_audio_path}") # Load audio data with open(audio_file_path, 'rb') as f: @@ -189,12 +195,6 @@ async def transcribe_full_audio_job( # Calculate processing time (transcription only) processing_time = time.time() - start_time - # Get the conversation using Beanie - conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id) - if not conversation: - logger.error(f"Conversation {conversation_id} not found") - return {"success": False, "error": "Conversation not found"} - # Convert segments to SpeakerSegment objects speaker_segments = [] for seg in segments: @@ -300,12 +300,27 @@ async def transcribe_full_audio_job( logger.info(f"βœ… Transcript processing completed for {conversation_id} in {processing_time:.2f}s") + # Update job metadata with title and summary for UI display + from rq import get_current_job + current_job = get_current_job() + if current_job: + if not current_job.meta: + current_job.meta = {} + current_job.meta.update({ + "conversation_id": conversation_id, + "title": conversation.title, + "summary": conversation.summary, + "transcript_length": len(transcript_text), + "word_count": len(words), + "processing_time": processing_time + }) + current_job.save_meta() + return { "success": True, "conversation_id": conversation_id, "version_id": version_id, "audio_path": str(audio_file_path), - "user_id": user_id, "transcript": transcript_text, "segments": [seg.model_dump() for seg in speaker_segments], "words": words, # Needed by speaker recognition @@ -316,385 +331,291 @@ async def transcribe_full_audio_job( @async_job(redis=True, beanie=True) -async def recognise_speakers_job( - conversation_id: str, - version_id: str, - audio_path: str, +async def stream_speech_detection_job( + session_id: str, user_id: str, - transcript_text: str, - words: list, + client_id: str, redis_client=None ) -> Dict[str, Any]: """ - RQ job function for identifying speakers in a transcribed conversation. + Listen for meaningful speech, optionally check for enrolled speakers, then start conversation. - This job runs after transcription and: - 1. Calls speaker recognition service to identify speakers - 2. Updates the transcript version with identified speaker labels - 3. Returns results for downstream jobs (memory) + Simple flow: + 1. Listen for meaningful speech + 2. If speaker filter enabled β†’ check for enrolled speakers + 3. If criteria met β†’ start open_conversation_job and EXIT + 4. Conversation will restart new speech detection when complete Args: - conversation_id: Conversation ID - version_id: Transcript version ID to update - audio_path: Path to audio file + session_id: Stream session ID user_id: User ID - transcript_text: Transcript text from transcription job - words: Word-level timing data from transcription job + client_id: Client ID redis_client: Redis client (injected by decorator) Returns: - Dict with processing results - """ - from advanced_omi_backend.models.conversation import Conversation - from advanced_omi_backend.speaker_recognition_client import SpeakerRecognitionClient - - logger.info(f"🎀 RQ: Starting speaker recognition for conversation {conversation_id}") - - start_time = time.time() - - # Get the conversation - conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id) - if not conversation: - logger.error(f"Conversation {conversation_id} not found") - return {"success": False, "error": "Conversation not found"} - - # Find the transcript version to update - transcript_version = None - for version in conversation.transcript_versions: - if version.version_id == version_id: - transcript_version = version - break - - if not transcript_version: - logger.error(f"Transcript version {version_id} not found") - return {"success": False, "error": "Transcript version not found"} - - # Check if speaker recognition is enabled - speaker_client = SpeakerRecognitionClient() - if not speaker_client.enabled: - logger.info(f"🎀 Speaker recognition disabled, skipping") - return { - "success": True, - "conversation_id": conversation_id, - "version_id": version_id, - "speaker_recognition_enabled": False, - "processing_time_seconds": 0 - } - - # Call speaker recognition service - try: - logger.info(f"🎀 Calling speaker recognition service...") - - # Read transcript text and words from the transcript version - # (Parameters may be empty if called via job dependency) - actual_transcript_text = transcript_text or transcript_version.transcript or "" - actual_words = words if words else [] - - # If words not provided, we need to get them from metadata - if not actual_words and transcript_version.metadata: - actual_words = transcript_version.metadata.get("words", []) - - if not actual_transcript_text: - logger.warning(f"🎀 No transcript text found in version {version_id}") - return { - "success": False, - "conversation_id": conversation_id, - "version_id": version_id, - "error": "No transcript text available", - "processing_time_seconds": 0 - } - - transcript_data = { - "text": actual_transcript_text, - "words": actual_words - } - - speaker_result = await speaker_client.diarize_identify_match( - audio_path=audio_path, - transcript_data=transcript_data, - user_id=user_id - ) - - if not speaker_result or "segments" not in speaker_result: - logger.warning(f"🎀 Speaker recognition returned no segments") - return { - "success": True, - "conversation_id": conversation_id, - "version_id": version_id, - "speaker_recognition_enabled": True, - "identified_speakers": [], - "processing_time_seconds": time.time() - start_time - } - - speaker_segments = speaker_result["segments"] - logger.info(f"🎀 Speaker recognition returned {len(speaker_segments)} segments") - - # Update the transcript version segments with identified speakers - updated_segments = [] - for seg in speaker_segments: - speaker_name = seg.get("identified_as") or seg.get("speaker", "Unknown") - updated_segments.append( - Conversation.SpeakerSegment( - start=seg.get("start", 0), - end=seg.get("end", 0), - text=seg.get("text", ""), - speaker=speaker_name, - confidence=seg.get("confidence") - ) - ) - - # Update the transcript version - transcript_version.segments = updated_segments - - # Extract unique identified speakers for metadata - identified_speakers = set() - for seg in speaker_segments: - identified_as = seg.get("identified_as", "Unknown") - if identified_as != "Unknown": - identified_speakers.add(identified_as) - - # Update metadata - if not transcript_version.metadata: - transcript_version.metadata = {} - - transcript_version.metadata["speaker_recognition"] = { - "enabled": True, - "identified_speakers": list(identified_speakers), - "speaker_count": len(identified_speakers), - "total_segments": len(speaker_segments), - "processing_time_seconds": time.time() - start_time - } - - # Update legacy fields if this is the active version - if conversation.active_transcript_version == version_id: - conversation.segments = updated_segments - - await conversation.save() - - processing_time = time.time() - start_time - logger.info(f"βœ… Speaker recognition completed for {conversation_id} in {processing_time:.2f}s") - - return { - "success": True, - "conversation_id": conversation_id, - "version_id": version_id, - "user_id": user_id, - "speaker_recognition_enabled": True, - "identified_speakers": list(identified_speakers), - "segment_count": len(updated_segments), - "processing_time_seconds": processing_time - } - - except Exception as speaker_error: - logger.error(f"❌ Speaker recognition failed: {speaker_error}") - import traceback - logger.debug(traceback.format_exc()) + Dict with session info and conversation_job_id or no_speech_detected - return { - "success": False, - "conversation_id": conversation_id, - "version_id": version_id, - "error": str(speaker_error), - "processing_time_seconds": time.time() - start_time - } - - -@async_job(redis=True, beanie=True) -async def stream_speech_detection_job( - session_id: str, - user_id: str, - user_email: str, - client_id: str, - redis_client=None -) -> Dict[str, Any]: + Note: user_email is fetched from the database when needed. """ - Job that monitors transcription stream for speech (STREAMING MODE ONLY). - - Decorated with @async_job to handle setup/teardown automatically. - - Job lifecycle: - 1. Monitors transcription stream for speech - 2. When speech detected: - - Checks if conversation already open (prevents duplicates) - - If no open conversation: creates conversation + starts open_conversation_job - - Exits (job completes) - 3. New stream_speech_detection_job can be started when conversation closes - - This architecture alternates between "listening for speech" and "actively recording conversation". - - This is part of the V2 architecture using RQ jobs as orchestrators. - - For batch/upload mode, conversations are created upfront and transcribe_full_audio_job is used. - - Args: - session_id: Stream session ID - user_id: User ID - user_email: User email - client_id: Client ID - - Returns: - Dict with session_id, conversation_id, open_conversation_job_id, detected_speakers, runtime_seconds - """ from advanced_omi_backend.services.audio_stream import TranscriptionResultsAggregator + from advanced_omi_backend.utils.conversation_utils import analyze_speech from .conversation_jobs import open_conversation_job + from rq import get_current_job - logger.info(f"πŸ” RQ: Starting stream speech detection for session {session_id}") + logger.info(f"πŸ” Starting speech detection for session {session_id[:12]}") - # Use redis_client from decorator + # Setup aggregator = TranscriptionResultsAggregator(redis_client) - - # Job control + current_job = get_current_job() session_key = f"audio:session:{session_id}" - max_runtime = 3540 # 59 minutes (graceful exit before RQ timeout at 60 min) start_time = time.time() + max_runtime = 3540 # 59 minutes + + # Get conversation count + conversation_count_key = f"session:conversation_count:{session_id}" + conversation_count_bytes = await redis_client.get(conversation_count_key) + conversation_count = int(conversation_count_bytes) if conversation_count_bytes else 0 + + # Check if speaker filtering is enabled + speaker_filter_enabled = os.getenv("RECORD_ONLY_ENROLLED_SPEAKERS", "false").lower() == "true" + logger.info(f"πŸ“Š Conversation #{conversation_count + 1}, Speaker filter: {'enabled' if speaker_filter_enabled else 'disabled'}") + + # Update job metadata to show status + if current_job: + if not current_job.meta: + current_job.meta = {} + current_job.meta.update({ + "status": "listening_for_speech", + "session_id": session_id, + "audio_uuid": session_id, + "client_id": client_id, + "session_level": True # Mark as session-level job + }) + current_job.save_meta() + + # Main loop: Listen for speech + while True: + # Exit conditions + session_status = await redis_client.hget(session_key, "status") + if session_status and session_status.decode() in ["complete", "closed"]: + logger.info(f"πŸ›‘ Session ended, exiting") + break - conversation_id = None - open_conversation_job_id = None - detected_speakers = [] # Track enrolled speakers detected during speech detection + if time.time() - start_time > max_runtime: + logger.warning(f"⏱️ Max runtime reached, exiting") + break - while True: - # Check if session has ended (status = "finalizing" or "complete") - # session_status = await redis_client.hget(session_key, "status") - # if session_status: - # status_str = session_status.decode() if isinstance(session_status, bytes) else session_status - # if status_str in ["finalizing", "complete"]: - # logger.info(f"πŸ›‘ Session {status_str}, stopping speech detection") - # break - - # # Check timeout - # if time.time() - start_time > max_runtime: - # logger.warning(f"⏱️ Timeout reached for {session_id}") - # break - - # Get combined transcription results (aggregator does the combining) + # Get transcription results combined = await aggregator.get_combined_results(session_id) - if not combined["text"]: - await asyncio.sleep(2) # Check every 2 seconds + await asyncio.sleep(2) continue - # Analyze for speech using centralized detection from utils - from advanced_omi_backend.utils.conversation_utils import analyze_speech - transcript_data = { - "text": combined["text"], - "words": combined["words"] - } + # Step 1: Check for meaningful speech + transcript_data = {"text": combined["text"], "words": combined.get("words", [])} speech_analysis = analyze_speech(transcript_data) - has_speech = speech_analysis["has_speech"] - print(f"πŸ” SPEECH ANALYSIS: session={session_id}, has_speech={has_speech}, conv_id={conversation_id}, words={speech_analysis.get('word_count', 0)}") logger.info( - f"πŸ” Speech analysis for {session_id}: has_speech={has_speech}, " - f"conversation_id={conversation_id}, word_count={speech_analysis.get('word_count', 0)}" + f"πŸ” {speech_analysis.get('word_count', 0)} words, " + f"{speech_analysis.get('duration', 0):.1f}s, " + f"has_speech: {speech_analysis.get('has_speech', False)}" ) - if has_speech and not conversation_id: - print(f"πŸ’¬ SPEECH DETECTED! Checking if enrolled speakers present...") - logger.info(f"πŸ’¬ Speech detected in {session_id}!") - - # Check if we should filter by enrolled speakers (two-stage filter: text first, then speaker) - record_only_enrolled = os.getenv("RECORD_ONLY_ENROLLED_SPEAKERS", "false").lower() == "true" - - if record_only_enrolled: - logger.info(f"🎀 Checking if enrolled speakers are present...") - - from advanced_omi_backend.speaker_recognition_client import SpeakerRecognitionClient - - # Get raw transcription results (with chunk IDs) - raw_results = await aggregator.get_session_results(session_id) - - # Check if enrolled speaker is speaking (also returns speaker recognition results) - speaker_client = SpeakerRecognitionClient() - enrolled_speaker_present, speaker_recognition_result = await speaker_client.check_if_enrolled_speaker_present( - redis_client=redis_client, - client_id=client_id, - session_id=session_id, - user_id=user_id, - transcription_results=raw_results - ) + if not speech_analysis.get("has_speech", False): + await asyncio.sleep(2) + continue - if not enrolled_speaker_present: - logger.info(f"⏭️ Meaningful speech detected but not from enrolled speakers, continuing to listen...") - await asyncio.sleep(2) - continue + logger.info(f"πŸ’¬ Meaningful speech detected!") - # Extract identified speakers from the result - identified_speakers = [] - if speaker_recognition_result and "segments" in speaker_recognition_result: - for seg in speaker_recognition_result["segments"]: - identified_as = seg.get("identified_as") - # Filter out None and "Unknown" values - if identified_as and identified_as != "Unknown" and identified_as not in identified_speakers: - identified_speakers.append(identified_as) + # Add session event for speech detected + from datetime import datetime + await redis_client.hset( + session_key, + "last_event", + f"speech_detected:{datetime.utcnow().isoformat()}" + ) + await redis_client.hset( + session_key, + "speech_detected_at", + datetime.utcnow().isoformat() + ) - num_segments = len(speaker_recognition_result["segments"]) + # Step 2: If speaker filter enabled, check for enrolled speakers + identified_speakers = [] + speaker_check_job = None # Initialize for later reference + if speaker_filter_enabled: + logger.info(f"🎀 Enqueuing speaker check job...") + + # Add session event for speaker check starting + await redis_client.hset( + session_key, + "last_event", + f"speaker_check_starting:{datetime.utcnow().isoformat()}" + ) + await redis_client.hset( + session_key, + "speaker_check_status", + "checking" + ) + from .speaker_jobs import check_enrolled_speakers_job + + # Enqueue speaker check as a separate trackable job + speaker_check_job = transcription_queue.enqueue( + check_enrolled_speakers_job, + session_id, + user_id, + client_id, + job_timeout=300, # 5 minutes for speaker recognition + result_ttl=600, + job_id=f"speaker-check_{session_id[:12]}_{conversation_count}", + description=f"Speaker check for conversation #{conversation_count+1}", + meta={'audio_uuid': session_id, 'client_id': client_id} + ) - if identified_speakers: - speakers_str = ", ".join(identified_speakers) - logger.info(f"βœ… Enrolled speaker(s) detected: {speakers_str}") - logger.info(f"🎀 Speaker recognition returned {num_segments} segments with {len(identified_speakers)} enrolled speaker(s)") - print(f"βœ… ENROLLED SPEAKERS DETECTED: {speakers_str} ({num_segments} segments)") - detected_speakers = identified_speakers # Store for return value + # Poll for result (with timeout) + max_wait = 30 # 30 seconds max + poll_interval = 0.5 + waited = 0 + enrolled_present = False + + while waited < max_wait: + try: + speaker_check_job.refresh() + except Exception as e: + from rq.exceptions import NoSuchJobError + if isinstance(e, NoSuchJobError): + logger.warning(f"⚠️ Speaker check job disappeared from Redis (likely completed quickly), assuming not enrolled") + break else: - logger.info(f"βœ… Enrolled speaker detected! (no identified_as field in segments)") - logger.info(f"🎀 Speaker recognition returned {num_segments} segments during enrollment check") - else: - logger.info(f"βœ… Enrolled speaker detected! Proceeding to create conversation...") - - # Check if conversation job already running for this session - open_job_key = f"open_conversation:session:{session_id}" - existing_job = await redis_client.get(open_job_key) - - if existing_job: - # Already have an open conversation job running - open_conversation_job_id = existing_job.decode() - logger.info(f"βœ… Conversation job already running: {open_conversation_job_id}") + raise + + if speaker_check_job.is_finished: + result = speaker_check_job.result + enrolled_present = result.get("enrolled_present", False) + identified_speakers = result.get("identified_speakers", []) + logger.info(f"βœ… Speaker check completed: enrolled={enrolled_present}") + + # Update session event for speaker check complete + await redis_client.hset( + session_key, + "last_event", + f"speaker_check_complete:{datetime.utcnow().isoformat()}" + ) + await redis_client.hset( + session_key, + "speaker_check_status", + "enrolled" if enrolled_present else "not_enrolled" + ) + if identified_speakers: + await redis_client.hset( + session_key, + "identified_speakers", + ",".join(identified_speakers) + ) + break + elif speaker_check_job.is_failed: + logger.warning(f"⚠️ Speaker check job failed, assuming not enrolled") + + # Update session event for speaker check failed + await redis_client.hset( + session_key, + "last_event", + f"speaker_check_failed:{datetime.utcnow().isoformat()}" + ) + await redis_client.hset( + session_key, + "speaker_check_status", + "failed" + ) + break + await asyncio.sleep(poll_interval) + waited += poll_interval else: - # No conversation job running - enqueue one - speech_detected_at = time.time() - logger.info(f"πŸ“ Enqueueing open_conversation_job (speech detected at {speech_detected_at})") - - # Start open_conversation_job to create and monitor conversation - open_job = transcription_queue.enqueue( - open_conversation_job, - session_id, - user_id, - user_email, - client_id, - speech_detected_at, - job_timeout=3600, - result_ttl=600, - job_id=f"open-conv_{session_id[:12]}", - description=f"Open conversation for session {session_id[:12]}" + # Timeout - assume not enrolled + logger.warning(f"⏱️ Speaker check timed out after {max_wait}s, assuming not enrolled") + enrolled_present = False + + # Update session event for speaker check timeout + await redis_client.hset( + session_key, + "last_event", + f"speaker_check_timeout:{datetime.utcnow().isoformat()}" ) - open_conversation_job_id = open_job.id - - # Store job tracking (TTL handles cleanup automatically) - await redis_client.set( - open_job_key, - open_job.id, - ex=3600 # Expire after 1 hour + await redis_client.hset( + session_key, + "speaker_check_status", + "timeout" ) - logger.info(f"βœ… Enqueued conversation job {open_job.id}") - - # Exit this job now that conversation job is running - logger.info(f"🏁 Exiting speech detection job - conversation job is now managing session") - break - else: - if not has_speech: - logger.debug(f"⏭️ No speech detected yet (words: {speech_analysis.get('word_count', 0)})") + # Log speaker check result but proceed with conversation regardless + if enrolled_present: + logger.info(f"βœ… Enrolled speaker(s) found: {', '.join(identified_speakers) if identified_speakers else 'Unknown'}") else: - logger.debug(f"ℹ️ Speech detected but conversation already exists: {conversation_id}") + logger.info(f"ℹ️ No enrolled speakers found, but proceeding with conversation anyway") + + # Step 3: Start conversation and EXIT + speech_detected_at = time.time() + open_job_key = f"open_conversation:session:{session_id}" + + # Enqueue conversation job with speech detection job ID + from datetime import datetime + + speech_job_id = current_job.id if current_job else None + + open_job = transcription_queue.enqueue( + open_conversation_job, + session_id, + user_id, + client_id, + speech_detected_at, + speech_job_id, # Pass speech detection job ID + job_timeout=3600, + result_ttl=JOB_RESULT_TTL, # Use configured TTL (24 hours) instead of 10 minutes + job_id=f"open-conv_{session_id[:12]}_{conversation_count}", + description=f"Conversation #{conversation_count+1} for {session_id[:12]}", + meta={'audio_uuid': session_id, 'client_id': client_id} + ) + + # Track the job + await redis_client.set(open_job_key, open_job.id, ex=3600) - await asyncio.sleep(2) # Check every 2 seconds + # Store metadata in speech detection job + if current_job: + if not current_job.meta: + current_job.meta = {} - logger.info(f"βœ… Stream speech detection complete for {session_id}") + # Remove session_level flag now that conversation is starting + current_job.meta.pop('session_level', None) + current_job.meta.update({ + "conversation_job_id": open_job.id, + "speaker_check_job_id": speaker_check_job.id if speaker_check_job else None, + "detected_speakers": identified_speakers, + "speech_detected_at": datetime.fromtimestamp(speech_detected_at).isoformat(), + "session_id": session_id, + "audio_uuid": session_id, # For job grouping + "client_id": client_id # For job grouping + }) + current_job.save_meta() + + logger.info(f"βœ… Started conversation job {open_job.id}, exiting speech detection") + + return { + "session_id": session_id, + "user_id": user_id, + "client_id": client_id, + "conversation_job_id": open_job.id, + "speech_detected_at": datetime.fromtimestamp(speech_detected_at).isoformat(), + "runtime_seconds": time.time() - start_time + } + + # Session ended without speech + logger.info(f"βœ… Session ended without speech") return { "session_id": session_id, - "open_conversation_job_id": open_conversation_job_id, - "detected_speakers": detected_speakers, + "user_id": user_id, + "client_id": client_id, + "no_speech_detected": True, "runtime_seconds": time.time() - start_time } diff --git a/backends/advanced/start-k8s.sh b/backends/advanced/start-k8s.sh index 963ff533..24a3e6e7 100755 --- a/backends/advanced/start-k8s.sh +++ b/backends/advanced/start-k8s.sh @@ -19,6 +19,7 @@ shutdown() { kill $RQ_WORKER_1_PID 2>/dev/null || true kill $RQ_WORKER_2_PID 2>/dev/null || true kill $RQ_WORKER_3_PID 2>/dev/null || true + kill $AUDIO_PERSISTENCE_WORKER_PID 2>/dev/null || true kill $BACKEND_PID 2>/dev/null || true wait echo "βœ… All services stopped" @@ -53,7 +54,7 @@ fi # Clean up stale worker registrations from previous runs echo "🧹 Cleaning up stale worker registrations from Redis..." -uv run --no-sync python3 -c " +python3 -c " from rq import Worker from redis import Redis import os @@ -117,7 +118,7 @@ sleep 1 # NEW WORKERS - Redis Streams multi-provider architecture # Single worker ensures sequential processing of audio chunks (matching start-workers.sh) echo "🎡 Starting audio stream Deepgram worker (1 worker for sequential processing)..." -if uv run --no-sync python3 -m advanced_omi_backend.workers.audio_stream_deepgram_worker & +if python3 -m advanced_omi_backend.workers.audio_stream_deepgram_worker & then AUDIO_WORKER_1_PID=$! echo " βœ… Deepgram stream worker started with PID: $AUDIO_WORKER_1_PID" @@ -128,7 +129,7 @@ fi # Start 3 RQ workers listening to ALL queues (matching start-workers.sh) echo "πŸ”§ Starting RQ workers (3 workers, all queues: transcription, memory, default)..." -if uv run --no-sync rq worker transcription memory default --url "${REDIS_URL:-redis://localhost:6379/0}" --verbose --logging_level INFO & +if python3 -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & then RQ_WORKER_1_PID=$! echo " βœ… RQ worker 1 started with PID: $RQ_WORKER_1_PID" @@ -138,7 +139,7 @@ else exit 1 fi -if uv run --no-sync rq worker transcription memory default --url "${REDIS_URL:-redis://localhost:6379/0}" --verbose --logging_level INFO & +if python3 -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & then RQ_WORKER_2_PID=$! echo " βœ… RQ worker 2 started with PID: $RQ_WORKER_2_PID" @@ -148,7 +149,7 @@ else exit 1 fi -if uv run --no-sync rq worker transcription memory default --url "${REDIS_URL:-redis://localhost:6379/0}" --verbose --logging_level INFO & +if python3 -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & then RQ_WORKER_3_PID=$! echo " βœ… RQ worker 3 started with PID: $RQ_WORKER_3_PID" @@ -158,18 +159,30 @@ else exit 1 fi +# Start 1 dedicated audio persistence worker (matching start-workers.sh) +echo "πŸ’Ύ Starting audio persistence worker (1 worker for audio queue)..." +if python3 -m advanced_omi_backend.workers.rq_worker_entry audio & +then + AUDIO_PERSISTENCE_WORKER_PID=$! + echo " βœ… Audio persistence worker started with PID: $AUDIO_PERSISTENCE_WORKER_PID" +else + echo " ❌ Failed to start audio persistence worker" + kill $AUDIO_WORKER_1_PID $RQ_WORKER_1_PID $RQ_WORKER_2_PID $RQ_WORKER_3_PID 2>/dev/null || true + exit 1 +fi + # Give workers a moment to start sleep 3 # Start the main FastAPI application echo "🌐 Starting FastAPI backend..." -if uv run --no-sync python3 src/advanced_omi_backend/main.py & +if python3 src/advanced_omi_backend/main.py & then BACKEND_PID=$! echo " βœ… FastAPI backend started with PID: $BACKEND_PID" else echo " ❌ Failed to start FastAPI backend" - kill $AUDIO_WORKER_1_PID $RQ_WORKER_1_PID $RQ_WORKER_2_PID $RQ_WORKER_3_PID 2>/dev/null || true + kill $AUDIO_WORKER_1_PID $RQ_WORKER_1_PID $RQ_WORKER_2_PID $RQ_WORKER_3_PID $AUDIO_PERSISTENCE_WORKER_PID 2>/dev/null || true exit 1 fi @@ -178,6 +191,7 @@ echo " - Audio stream worker: $AUDIO_WORKER_1_PID (Redis Streams consumer - seq echo " - RQ worker 1: $RQ_WORKER_1_PID (transcription, memory, default)" echo " - RQ worker 2: $RQ_WORKER_2_PID (transcription, memory, default)" echo " - RQ worker 3: $RQ_WORKER_3_PID (transcription, memory, default)" +echo " - Audio persistence worker: $AUDIO_PERSISTENCE_WORKER_PID (audio queue - file rotation)" echo " - FastAPI Backend: $BACKEND_PID" # Wait for any process to exit @@ -190,6 +204,7 @@ echo "⚠️ One service exited, stopping all services..." [ -n "$RQ_WORKER_1_PID" ] && kill $RQ_WORKER_1_PID 2>/dev/null || true [ -n "$RQ_WORKER_2_PID" ] && kill $RQ_WORKER_2_PID 2>/dev/null || true [ -n "$RQ_WORKER_3_PID" ] && kill $RQ_WORKER_3_PID 2>/dev/null || true +[ -n "$AUDIO_PERSISTENCE_WORKER_PID" ] && kill $AUDIO_PERSISTENCE_WORKER_PID 2>/dev/null || true [ -n "$BACKEND_PID" ] && kill $BACKEND_PID 2>/dev/null || true wait diff --git a/backends/advanced/start-workers.sh b/backends/advanced/start-workers.sh index 173f986c..0f39cb09 100755 --- a/backends/advanced/start-workers.sh +++ b/backends/advanced/start-workers.sh @@ -13,16 +13,20 @@ uv run python -c " from rq import Worker from redis import Redis import os +import socket redis_url = os.getenv('REDIS_URL', 'redis://localhost:6379/0') redis_conn = Redis.from_url(redis_url) +hostname = socket.gethostname() -# Get all workers and clean up dead ones +# Only clean up workers from THIS hostname (pod) workers = Worker.all(connection=redis_conn) +cleaned = 0 for worker in workers: - # Force cleanup of all registered workers from previous runs - worker.register_death() -print(f'Cleaned up {len(workers)} stale workers') + if hostname in worker.name: + worker.register_death() + cleaned += 1 +print(f'Cleaned up {cleaned} stale workers from {hostname}') " 2>/dev/null || echo "No stale workers to clean" sleep 1 @@ -33,7 +37,11 @@ shutdown() { kill $RQ_WORKER_1_PID 2>/dev/null || true kill $RQ_WORKER_2_PID 2>/dev/null || true kill $RQ_WORKER_3_PID 2>/dev/null || true - kill $AUDIO_WORKER_1_PID 2>/dev/null || true + kill $RQ_WORKER_4_PID 2>/dev/null || true + kill $RQ_WORKER_5_PID 2>/dev/null || true + kill $RQ_WORKER_6_PID 2>/dev/null || true + kill $AUDIO_PERSISTENCE_WORKER_PID 2>/dev/null || true + kill $AUDIO_STREAM_WORKER_PID 2>/dev/null || true wait echo "βœ… All workers stopped" exit 0 @@ -45,26 +53,42 @@ trap shutdown SIGTERM SIGINT # Configure Python logging for RQ workers export PYTHONUNBUFFERED=1 -# Start 3 RQ workers listening to ALL queues -echo "πŸ”§ Starting RQ workers (3 workers, all queues: transcription, memory, default)..." -uv run rq worker transcription memory default --url "${REDIS_URL:-redis://localhost:6379/0}" --verbose --logging_level INFO & +# Start 6 RQ workers listening to ALL queues +echo "πŸ”§ Starting RQ workers (6 workers, all queues: transcription, memory, default)..." +uv run python -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & RQ_WORKER_1_PID=$! -uv run rq worker transcription memory default --url "${REDIS_URL:-redis://localhost:6379/0}" --verbose --logging_level INFO & +uv run python -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & RQ_WORKER_2_PID=$! -uv run rq worker transcription memory default --url "${REDIS_URL:-redis://localhost:6379/0}" --verbose --logging_level INFO & +uv run python -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & RQ_WORKER_3_PID=$! +uv run python -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & +RQ_WORKER_4_PID=$! +uv run python -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & +RQ_WORKER_5_PID=$! +uv run python -m advanced_omi_backend.workers.rq_worker_entry transcription memory default & +RQ_WORKER_6_PID=$! + +# Start 1 dedicated audio persistence worker +# Single worker for audio persistence jobs (file rotation) +echo "πŸ’Ύ Starting audio persistence worker (1 worker for audio queue)..." +uv run python -m advanced_omi_backend.workers.rq_worker_entry audio & +AUDIO_PERSISTENCE_WORKER_PID=$! # Start 1 audio stream worker for Deepgram # Single worker ensures sequential processing of audio chunks echo "🎡 Starting audio stream Deepgram worker (1 worker for sequential processing)..." uv run python -m advanced_omi_backend.workers.audio_stream_deepgram_worker & -AUDIO_WORKER_1_PID=$! +AUDIO_STREAM_WORKER_PID=$! echo "βœ… All workers started:" echo " - RQ worker 1: PID $RQ_WORKER_1_PID (transcription, memory, default)" echo " - RQ worker 2: PID $RQ_WORKER_2_PID (transcription, memory, default)" echo " - RQ worker 3: PID $RQ_WORKER_3_PID (transcription, memory, default)" -echo " - Audio stream worker: PID $AUDIO_WORKER_1_PID (Redis Streams consumer - sequential processing)" +echo " - RQ worker 4: PID $RQ_WORKER_4_PID (transcription, memory, default)" +echo " - RQ worker 5: PID $RQ_WORKER_5_PID (transcription, memory, default)" +echo " - RQ worker 6: PID $RQ_WORKER_6_PID (transcription, memory, default)" +echo " - Audio persistence worker: PID $AUDIO_PERSISTENCE_WORKER_PID (audio queue - file rotation)" +echo " - Audio stream worker: PID $AUDIO_STREAM_WORKER_PID (Redis Streams consumer - sequential processing)" # Wait for any process to exit wait -n @@ -74,7 +98,11 @@ echo "⚠️ One worker exited, stopping all workers..." kill $RQ_WORKER_1_PID 2>/dev/null || true kill $RQ_WORKER_2_PID 2>/dev/null || true kill $RQ_WORKER_3_PID 2>/dev/null || true -kill $AUDIO_WORKER_1_PID 2>/dev/null || true +kill $RQ_WORKER_4_PID 2>/dev/null || true +kill $RQ_WORKER_5_PID 2>/dev/null || true +kill $RQ_WORKER_6_PID 2>/dev/null || true +kill $AUDIO_PERSISTENCE_WORKER_PID 2>/dev/null || true +kill $AUDIO_STREAM_WORKER_PID 2>/dev/null || true wait echo "πŸ”„ All workers stopped" diff --git a/backends/advanced/tests/test_integration.py b/backends/advanced/tests/test_integration.py index e0b55798..a4422d4c 100644 --- a/backends/advanced/tests/test_integration.py +++ b/backends/advanced/tests/test_integration.py @@ -735,7 +735,7 @@ def upload_test_audio(self): logger.info("πŸ“€ Sending upload request...") response = requests.post( - f"{BACKEND_URL}/api/process-audio-files", + f"{BACKEND_URL}/api/audio/upload", files=files, data=data, headers=headers, @@ -749,118 +749,124 @@ def upload_test_audio(self): result = response.json() logger.info(f"πŸ“€ Upload response: {json.dumps(result, indent=2)}") - + # Extract client_id from response - client_id = None - if result.get('conversations'): - client_id = result['conversations'][0].get('client_id') - elif result.get('processed_files'): - client_id = result['processed_files'][0].get('client_id') - elif result.get('files'): - client_id = result['files'][0].get('client_id') - + client_id = result.get('client_id') if not client_id: raise RuntimeError("No client_id in upload response") - + logger.info(f"πŸ“€ Generated client_id: {client_id}") - return client_id - - def verify_processing_results(self, client_id: str): - """Verify that audio was processed correctly.""" - logger.info(f"πŸ” Verifying processing results for client: {client_id}") + return result # Return full response with job IDs - # Use backend API instead of direct MongoDB connection - - # First, wait for processing to complete using processor status endpoint - logger.info("πŸ” Waiting for processing to complete...") + def verify_processing_results(self, upload_response: dict): + """Verify that audio was processed correctly using job tracking.""" + client_id = upload_response.get('client_id') + files = upload_response.get('files', []) + + if not files: + raise RuntimeError("No files in upload response") + + file_info = files[0] + transcript_job_id = file_info.get('transcript_job_id') + conversation_id = file_info.get('conversation_id') + + logger.info(f"πŸ” Verifying processing results:") + logger.info(f" - Client ID: {client_id}") + logger.info(f" - Conversation ID: {conversation_id}") + logger.info(f" - Transcript Job ID: {transcript_job_id}") + + # Wait for transcription job to complete + logger.info("πŸ” Waiting for transcription job to complete...") start_time = time.time() - processing_complete = False - - while time.time() - start_time < 60: # Wait up to 60 seconds for processing + job_complete = False + + while time.time() - start_time < 60: # Wait up to 60 seconds for transcription try: - # Check processor status for this client + # Check job status via queue API response = requests.get( - f"{BACKEND_URL}/api/processor/tasks/{client_id}", + f"{BACKEND_URL}/api/queue/jobs/{transcript_job_id}", headers={"Authorization": f"Bearer {self.token}"}, timeout=10 ) - + if response.status_code == 200: - data = response.json() - stages = data.get("stages", {}) - - # Check if transcription stage is complete - transcription_stage = stages.get("transcription", {}) - if transcription_stage.get("completed", False): - logger.info(f"βœ… Transcription processing completed for client_id: {client_id}") - processing_complete = True + job_data = response.json() + status = job_data.get("status") + + if status == "completed": + logger.info(f"βœ… Transcription job completed successfully") + job_complete = True break - - # Check for errors - if transcription_stage.get("error"): - logger.error(f"❌ Transcription error: {transcription_stage.get('error')}") + elif status == "failed": + error = job_data.get("exc_info", "Unknown error") + logger.error(f"❌ Transcription job failed: {error}") break - - # Show processing status - logger.info(f"πŸ“Š Processing status: {data.get('status', 'unknown')}") - for stage_name, stage_info in stages.items(): - completed = stage_info.get("completed", False) - error = stage_info.get("error") - status = "βœ…" if completed else "❌" if error else "⏳" - logger.info(f" {status} {stage_name}: {'completed' if completed else 'error' if error else 'processing'}") - + else: + logger.info(f"⏳ Job status: {status} ({time.time() - start_time:.1f}s)") + else: - logger.warning(f"❌ Processor status API call failed with status: {response.status_code}") - + logger.warning(f"⚠️ Job status check returned {response.status_code}") + except Exception as e: - logger.warning(f"❌ Error calling processor status API: {e}") - - logger.info(f"⏳ Still waiting for processing... ({time.time() - start_time:.1f}s)") - time.sleep(3) - - if not processing_complete: - logger.error(f"❌ Processing did not complete within timeout for client_id: {client_id}") - # Don't fail immediately, try to get conversation anyway - - # Now get the conversation via API - logger.info("πŸ” Retrieving conversation...") + logger.warning(f"⚠️ Error checking job status: {e}") + + time.sleep(5) + + if not job_complete: + raise AssertionError(f"Transcription job did not complete within 60 seconds. Last status: {status if 'status' in locals() else 'unknown'}") + + # Get the conversation via API + logger.info(f"πŸ” Retrieving conversation...") conversation = None - + try: - # Get conversations via API + # Get conversations list response = requests.get( f"{BACKEND_URL}/api/conversations", headers={"Authorization": f"Bearer {self.token}"}, timeout=10 ) - + if response.status_code == 200: data = response.json() - conversations = data.get("conversations", {}) - - # Look for our client_id in the conversations - if client_id in conversations: - conversation_list = conversations[client_id] - if conversation_list: - conversation = conversation_list[0] # Get the first (most recent) conversation - logger.info(f"βœ… Found conversation for client_id: {client_id}") - else: - logger.warning(f"⚠️ Client ID found but no conversations in list") + conversations_list = data.get("conversations", []) + + # Find our conversation by conversation_id or client_id + for conv in conversations_list: + if conv.get('conversation_id') == conversation_id or conv.get('client_id') == client_id: + conversation = conv + logger.info(f"βœ… Found conversation in list: {conv.get('conversation_id')}") + break + + if not conversation: + logger.error(f"❌ Conversation not found in list of {len(conversations_list)} conversations") + if conversations_list: + logger.error(f"πŸ“Š Available conversations: {[c.get('conversation_id') for c in conversations_list[:5]]}") else: - # Debug: show available conversations - available_clients = list(conversations.keys()) - logger.error(f"❌ Client ID {client_id} not found in conversations") - logger.error(f"πŸ“Š Available client_ids: {available_clients}") - + # Fetch full conversation details (list endpoint excludes transcript for performance) + logger.info(f"πŸ” Fetching full conversation details...") + detail_response = requests.get( + f"{BACKEND_URL}/api/conversations/{conversation['conversation_id']}", + headers={"Authorization": f"Bearer {self.token}"}, + timeout=10 + ) + + if detail_response.status_code == 200: + conversation = detail_response.json()["conversation"] + logger.info(f"βœ… Retrieved full conversation details with transcript") + else: + logger.error(f"❌ Failed to fetch conversation details: {detail_response.status_code}") + logger.error(f"Response: {detail_response.text}") + else: - logger.error(f"❌ Conversations API call failed with status: {response.status_code}") - + logger.error(f"❌ Conversations API returned status: {response.status_code}") + logger.error(f"Response: {response.text}") + except Exception as e: - logger.error(f"❌ Error calling conversations API: {e}") - + logger.error(f"❌ Error retrieving conversations: {e}", exc_info=True) + if not conversation: - logger.error(f"❌ No conversation found for client_id: {client_id}") - raise AssertionError(f"No conversation found for client_id: {client_id}") + raise AssertionError(f"No conversation found for conversation_id: {conversation_id}") logger.info(f"βœ“ Conversation found: {conversation['audio_uuid']}") @@ -871,19 +877,14 @@ def verify_processing_results(self, client_id: str): logger.info(f" - Audio Path: {conversation.get('audio_path', 'N/A')}") logger.info(f" - Timestamp: {conversation.get('timestamp', 'N/A')}") - # Verify transcription (stored as array in conversation) - transcript_segments = conversation.get('transcript', []) + # Verify transcription (transcript is a string, segments is an array) + transcription = conversation.get('transcript', '') + segments = conversation.get('segments', []) + logger.info(f"πŸ“ Transcription details:") - logger.info(f" - Transcript segments: {len(transcript_segments)}") - - # Extract full transcription text from segments - transcription = "" - if transcript_segments: - # Combine all transcript segments - transcription = " ".join([segment.get('text', '') for segment in transcript_segments]) - - logger.info(f" - Length: {len(transcription)} characters") + logger.info(f" - Transcript length: {len(transcription)} characters") logger.info(f" - Word count: {len(transcription.split()) if transcription else 0}") + logger.info(f" - Speaker segments: {len(segments)}") if transcription: # Show first 200 characters of transcription @@ -916,7 +917,7 @@ def verify_processing_results(self, client_id: str): # Verify conversation has required fields assert conversation.get('transcript'), "Conversation missing transcript" - assert len(conversation['transcript']) > 0, "Transcript array is empty" + assert len(conversation['transcript']) > 0, "Transcript is empty" assert transcription.strip(), "Transcription text is empty" # Check for memory extraction (if LLM is configured) @@ -937,12 +938,20 @@ def verify_processing_results(self, client_id: str): return conversation, transcription - def validate_memory_extraction(self, client_id: str): + def validate_memory_extraction(self, upload_response: dict): """Validate that memory extraction worked correctly.""" + client_id = upload_response.get('client_id') + files = upload_response.get('files', []) + logger.info(f"🧠 Validating memory extraction for client: {client_id}") - + + # Get memory job ID from upload response + memory_job_id = files[0].get('memory_job_id') if files else None + if not memory_job_id: + raise RuntimeError("No memory_job_id in upload response") + # Wait for memory processing to complete - client_memories = self.wait_for_memory_processing(client_id) + client_memories = self.wait_for_memory_processing(memory_job_id, client_id) # Check if we're using OpenMemory MCP provider memory_provider = os.environ.get("MEMORY_PROVIDER", "friend_lite") @@ -1148,67 +1157,47 @@ def get_memories_from_api(self) -> list: logger.error(f"Error fetching memories: {e}") return [] - def wait_for_memory_processing(self, client_id: str, timeout: int = 120): - """Wait for memory processing to complete using processor status API.""" - logger.info(f"⏳ Waiting for memory processing to complete for client: {client_id}") - + def wait_for_memory_processing(self, memory_job_id: str, client_id: str, timeout: int = 120): + """Wait for memory processing to complete using queue API.""" + logger.info(f"⏳ Waiting for memory job {memory_job_id} to complete...") + start_time = time.time() - memory_processing_complete = False - - # First, wait for memory processing completion using processor status API + job_complete = False + while time.time() - start_time < timeout: try: - # Check processor status for this client (same pattern as transcription) + # Check job status via queue API response = requests.get( - f"{BACKEND_URL}/api/processor/tasks/{client_id}", + f"{BACKEND_URL}/api/queue/jobs/{memory_job_id}", headers={"Authorization": f"Bearer {self.token}"}, timeout=10 ) - + if response.status_code == 200: - data = response.json() - - # DEBUG: Log full API response to see exactly what we're getting - logger.info(f"πŸ” Full processor status API response: {data}") - - stages = data.get("stages", {}) - - # Check if memory stage is complete - memory_stage = stages.get("memory", {}) - logger.info(f"🧠 Memory stage data: {memory_stage}") - - if memory_stage.get("completed", False): - logger.info(f"βœ… Memory processing completed for client_id: {client_id}") - memory_processing_complete = True + job_data = response.json() + status = job_data.get("status") + + if status == "completed": + logger.info(f"βœ… Memory job completed successfully") + job_complete = True break - - # Check for errors - if memory_stage.get("error"): - logger.error(f"❌ Memory processing error: {memory_stage.get('error')}") + elif status == "failed": + error = job_data.get("exc_info", "Unknown error") + logger.error(f"❌ Memory job failed: {error}") break - - # Show processing status for memory stage - logger.info(f"πŸ“Š Memory processing status: {data.get('status', 'unknown')}") - for stage_name, stage_info in stages.items(): - if stage_name == "memory": # Focus on memory stage - completed = stage_info.get("completed", False) - error = stage_info.get("error") - status = "βœ…" if completed else "❌" if error else "⏳" - logger.info(f" {status} {stage_name}: {'completed' if completed else 'error' if error else 'processing'}") - # DEBUG: Show all fields in memory stage - logger.info(f" All memory stage fields: {stage_info}") - + else: + logger.info(f"⏳ Memory job status: {status} ({time.time() - start_time:.1f}s)") + else: - logger.warning(f"❌ Processor status API call failed with status: {response.status_code}") - + logger.warning(f"⚠️ Memory job status check returned {response.status_code}") + except Exception as e: - logger.warning(f"❌ Error calling processor status API: {e}") - - logger.info(f"⏳ Still waiting for memory processing... ({time.time() - start_time:.1f}s)") - time.sleep(3) - - if not memory_processing_complete: - logger.warning(f"⚠️ Memory processing did not complete within {timeout}s, trying to fetch existing memories anyway") + logger.warning(f"⚠️ Error checking memory job status: {e}") + + time.sleep(5) + + if not job_complete: + raise AssertionError(f"Memory job did not complete within {timeout} seconds. Last status: {status if 'status' in locals() else 'unknown'}") # Now fetch the memories from the API memories = self.get_memories_from_api() @@ -1467,21 +1456,22 @@ def test_full_pipeline_integration(test_runner): # Phase 5: Audio upload and processing phase_start = time.time() logger.info("πŸ“€ Phase 5: Audio upload...") - client_id = test_runner.upload_test_audio() + upload_response = test_runner.upload_test_audio() + client_id = upload_response.get('client_id') phase_times['audio_upload'] = time.time() - phase_start logger.info(f"βœ… Audio upload completed in {phase_times['audio_upload']:.2f}s") - + # Phase 6: Transcription processing phase_start = time.time() logger.info("🎀 Phase 6: Transcription processing...") - conversation, transcription = test_runner.verify_processing_results(client_id) + conversation, transcription = test_runner.verify_processing_results(upload_response) phase_times['transcription_processing'] = time.time() - phase_start logger.info(f"βœ… Transcription processing completed in {phase_times['transcription_processing']:.2f}s") - + # Phase 7: Memory extraction phase_start = time.time() logger.info("🧠 Phase 7: Memory extraction...") - memories = test_runner.validate_memory_extraction(client_id) + memories = test_runner.validate_memory_extraction(upload_response) phase_times['memory_extraction'] = time.time() - phase_start logger.info(f"βœ… Memory extraction completed in {phase_times['memory_extraction']:.2f}s") diff --git a/backends/advanced/upload_files.py b/backends/advanced/upload_files.py index 3724286b..44ca0e26 100755 --- a/backends/advanced/upload_files.py +++ b/backends/advanced/upload_files.py @@ -219,8 +219,8 @@ def upload_files_async(files: list[str], token: str, base_url: str = "http://loc logger.error("No files to upload.") return False - logger.info(f"πŸš€ Starting async upload to {base_url}/api/process-audio-files-async ...") - + logger.info(f"πŸš€ Starting async upload to {base_url}/api/audio/upload ...") + # Prepare files for upload files_data = [] for file_path in files: @@ -229,15 +229,15 @@ def upload_files_async(files: list[str], token: str, base_url: str = "http://loc except IOError as e: logger.error(f"Error opening file {file_path}: {e}") continue - + if not files_data: logger.error("No files could be opened for upload.") return False - + try: # Submit files for async processing response = requests.post( - f"{base_url}/api/process-audio-files-async", + f"{base_url}/api/audio/upload", files=files_data, data={'device_name': 'file_upload_batch'}, headers={ @@ -288,7 +288,7 @@ def upload_files_async(files: list[str], token: str, base_url: str = "http://loc def poll_job_status(job_id: str, token: str, base_url: str, total_files: int) -> bool: """Poll job status until completion with progress updates.""" - status_url = f"{base_url}/api/process-audio-files/jobs/{job_id}" + status_url = f"{base_url}/api/queue/jobs/{job_id}" headers = {'Authorization': f'Bearer {token}'} start_time = time.time() diff --git a/backends/advanced/webui/README.md b/backends/advanced/webui/README.md index 78d39dab..f093f66b 100644 --- a/backends/advanced/webui/README.md +++ b/backends/advanced/webui/README.md @@ -191,7 +191,7 @@ The frontend integrates with these backend endpoints: - `GET /api/clients/active` - Active WebSocket clients ### Upload (Admin) -- `POST /api/process-audio-files` - Upload and process audio files +- `POST /api/audio/upload` - Upload and process audio files ## Deployment Notes diff --git a/backends/advanced/webui/src/pages/Conversations.tsx b/backends/advanced/webui/src/pages/Conversations.tsx index 85df4008..370117f1 100644 --- a/backends/advanced/webui/src/pages/Conversations.tsx +++ b/backends/advanced/webui/src/pages/Conversations.tsx @@ -101,7 +101,11 @@ export default function Conversations() { const formatDate = (timestamp: number | string) => { // Handle both Unix timestamp (number) and ISO string if (typeof timestamp === 'string') { - return new Date(timestamp).toLocaleString() + // If the string doesn't include timezone info, append 'Z' to treat as UTC + const isoString = timestamp.endsWith('Z') || timestamp.includes('+') || timestamp.includes('T') && timestamp.split('T')[1].includes('-') + ? timestamp + : timestamp + 'Z' + return new Date(isoString).toLocaleString() } // If timestamp is 0, return placeholder if (timestamp === 0) { @@ -207,19 +211,19 @@ export default function Conversations() { } } - const toggleTranscriptExpansion = async (audioUuid: string) => { + const toggleTranscriptExpansion = async (conversationId: string) => { // If already expanded, just collapse - if (expandedTranscripts.has(audioUuid)) { + if (expandedTranscripts.has(conversationId)) { setExpandedTranscripts(prev => { const newSet = new Set(prev) - newSet.delete(audioUuid) + newSet.delete(conversationId) return newSet }) return } - // Find the conversation by audio_uuid - const conversation = conversations.find(c => c.audio_uuid === audioUuid) + // Find the conversation by conversation_id + const conversation = conversations.find(c => c.conversation_id === conversationId) if (!conversation || !conversation.conversation_id) { console.error('Cannot expand transcript: conversation_id missing') return @@ -227,7 +231,7 @@ export default function Conversations() { // If segments are already loaded, just expand if (conversation.segments && conversation.segments.length > 0) { - setExpandedTranscripts(prev => new Set(prev).add(audioUuid)) + setExpandedTranscripts(prev => new Set(prev).add(conversationId)) return } @@ -237,12 +241,12 @@ export default function Conversations() { if (response.status === 200 && response.data.conversation) { // Update the conversation in state with full segments and transcript setConversations(prev => prev.map(c => - c.audio_uuid === audioUuid + c.conversation_id === conversationId ? { ...c, segments: response.data.conversation.segments, transcript: response.data.conversation.transcript } : c )) // Expand the transcript - setExpandedTranscripts(prev => new Set(prev).add(audioUuid)) + setExpandedTranscripts(prev => new Set(prev).add(conversationId)) } } catch (err: any) { console.error('Failed to fetch conversation details:', err) @@ -557,7 +561,7 @@ export default function Conversations() { {/* Transcript Header with Expand/Collapse */}
toggleTranscriptExpansion(conversation.audio_uuid)} + onClick={() => conversation.conversation_id && toggleTranscriptExpansion(conversation.conversation_id)} >

Transcript {((conversation.segments && conversation.segments.length > 0) || conversation.segment_count) && ( @@ -567,7 +571,7 @@ export default function Conversations() { )}

- {expandedTranscripts.has(conversation.audio_uuid) ? ( + {conversation.conversation_id && expandedTranscripts.has(conversation.conversation_id) ? ( ) : ( @@ -576,7 +580,7 @@ export default function Conversations() {
{/* Transcript Content - Conditionally Rendered */} - {expandedTranscripts.has(conversation.audio_uuid) && ( + {conversation.conversation_id && expandedTranscripts.has(conversation.conversation_id) && (
{conversation.segments && conversation.segments.length > 0 ? (
diff --git a/backends/advanced/webui/src/pages/Queue.tsx b/backends/advanced/webui/src/pages/Queue.tsx index 4b7f4ed0..e5b27dca 100644 --- a/backends/advanced/webui/src/pages/Queue.tsx +++ b/backends/advanced/webui/src/pages/Queue.tsx @@ -24,28 +24,6 @@ import { } from 'lucide-react'; import { queueApi } from '../services/api'; -interface QueueJob { - job_id: string; - job_type: string; - user_id: string; - status: 'queued' | 'processing' | 'completed' | 'failed' | 'cancelled' | 'deferred' | 'waiting'; - priority: 'low' | 'normal' | 'high'; - data: { - description?: string; - [key: string]: any; - }; - result?: any; - error_message?: string; - created_at: string; - started_at?: string; - completed_at?: string; - ended_at?: string; // API returns this field instead of completed_at - retry_count: number; - max_retries: number; - progress_percent: number; - progress_message: string; -} - interface QueueStats { total_jobs: number; queued_jobs: number; @@ -75,6 +53,12 @@ interface StreamingSession { last_chunk_at: number; age_seconds: number; idle_seconds: number; + conversation_count?: number; + // Speech detection events + last_event?: string; + speech_detected_at?: string; + speaker_check_status?: string; + identified_speakers?: string; } interface StreamConsumer { @@ -109,10 +93,12 @@ interface CompletedSession { } interface StreamingStatus { - active_sessions: StreamingSession[]; + active_sessions: StreamingSession[]; // Kept for backward compatibility completed_sessions: CompletedSession[]; stream_health: { - [provider: string]: StreamHealth; + [streamKey: string]: StreamHealth & { + stream_age_seconds?: number; + }; }; rq_queues: { [queue: string]: { @@ -124,7 +110,7 @@ interface StreamingStatus { } const Queue: React.FC = () => { - const [jobs, setJobs] = useState([]); + const [jobs, setJobs] = useState([]); const [stats, setStats] = useState(null); const [streamingStatus, setStreamingStatus] = useState(null); const [loading, setLoading] = useState(true); @@ -150,6 +136,7 @@ const Queue: React.FC = () => { }); const [flushing, setFlushing] = useState(false); const [expandedSessions, setExpandedSessions] = useState>(new Set()); + const [expandedJobs, setExpandedJobs] = useState>(new Set()); const [sessionJobs, setSessionJobs] = useState<{[sessionId: string]: any[]}>({}); const [lastUpdate, setLastUpdate] = useState(Date.now()); const [autoRefreshEnabled, setAutoRefreshEnabled] = useState(() => { @@ -158,6 +145,11 @@ const Queue: React.FC = () => { return saved !== null ? saved === 'true' : true; }); + // Completed conversations pagination + const [completedConvPage, setCompletedConvPage] = useState(1); + const [completedConvItemsPerPage] = useState(10); + const [completedConvTimeRange, setCompletedConvTimeRange] = useState(24); // hours + // Use refs to track current state in interval const expandedSessionsRef = useRef>(new Set()); const streamingStatusRef = useRef(null); @@ -176,49 +168,7 @@ const Queue: React.FC = () => { refreshingRef.current = refreshing; }, [refreshing]); - // Refresh jobs for all expanded, active, and completed sessions - const refreshSessionJobs = useCallback(async () => { - const currentExpanded = expandedSessionsRef.current; - const currentStreamingStatus = streamingStatusRef.current; - - // Get all active session IDs - const activeSessionIds = currentStreamingStatus?.active_sessions - ?.filter(s => s.status !== 'complete') - .map(s => s.session_id) || []; - - // Get all completed session IDs - const completedSessionIds = currentStreamingStatus?.completed_sessions - ?.map(s => s.session_id) || []; - - // Get all session IDs that should have jobs loaded (expanded, active, or completed) - const sessionIdsToRefresh = new Set([...currentExpanded, ...activeSessionIds, ...completedSessionIds]); - - if (sessionIdsToRefresh.size === 0) return; - - // Fetch jobs for all sessions in parallel - const fetchPromises = Array.from(sessionIdsToRefresh).map(async (sessionId) => { - try { - const response = await queueApi.getJobsBySession(sessionId); - return { sessionId, jobs: response.data.jobs }; - } catch (error) { - console.error(`❌ Failed to refresh jobs for session ${sessionId}:`, error); - return { sessionId, jobs: [] }; - } - }); - - const results = await Promise.all(fetchPromises); - - // Update session jobs state with all results - setSessionJobs(prev => { - const updated = { ...prev }; - results.forEach(({ sessionId, jobs }) => { - updated[sessionId] = jobs; - }); - return updated; - }); - }, []); - - // Main data fetch function + // Main data fetch function - uses consolidated dashboard endpoint const fetchData = useCallback(async () => { if (refreshingRef.current) { return; @@ -227,20 +177,125 @@ const Queue: React.FC = () => { setRefreshing(true); try { - // Fetch all main data in parallel - await Promise.all([fetchJobs(), fetchStats(), fetchStreamingStatus()]); + const currentExpanded = expandedSessionsRef.current; + const expandedSessionIds = Array.from(currentExpanded); + + // Single API call to get all dashboard data + const response = await queueApi.getDashboard(expandedSessionIds); + const dashboardData = response.data; + + // Extract jobs from response + const queuedJobs = dashboardData.jobs.queued || []; + const processingJobs = dashboardData.jobs.processing || []; + const completedJobs = dashboardData.jobs.completed || []; + const failedJobs = dashboardData.jobs.failed || []; + + // Combine all jobs + const allFetchedJobs = [...queuedJobs, ...processingJobs, ...completedJobs, ...failedJobs]; + + console.log(`πŸ“Š Fetched ${allFetchedJobs.length} total jobs via consolidated endpoint`); + console.log(` - Queued: ${queuedJobs.length}`); + console.log(` - Processing: ${processingJobs.length}`); + console.log(` - Completed: ${completedJobs.length}`); + console.log(` - Failed: ${failedJobs.length}`); + + // Debug: Log open_conversation_job details + const openConvJobs = allFetchedJobs.filter(j => j?.job_type === 'open_conversation_job'); + console.log(`πŸ” Found ${openConvJobs.length} open_conversation_job(s):`); + openConvJobs.forEach(job => { + console.log(` Job ID: ${job.job_id}`); + console.log(` Status: ${job.status}`); + console.log(` meta.audio_uuid: ${job.meta?.audio_uuid}`); + console.log(` meta.conversation_id: ${job.meta?.conversation_id}`); + }); - // Then refresh session jobs - await refreshSessionJobs(); + // Group jobs by session_id (use audio_uuid from metadata) + const jobsBySession: {[sessionId: string]: any[]} = {}; + + allFetchedJobs.forEach(job => { + if (!job || !job.job_id) return; // Skip invalid jobs + + // Extract session_id from meta.audio_uuid + const sessionId = job.meta?.audio_uuid; + if (sessionId) { + if (!jobsBySession[sessionId]) { + jobsBySession[sessionId] = []; + } + jobsBySession[sessionId].push(job); + + // Debug logging for grouping + if (job.job_type === 'open_conversation_job') { + console.log(`βœ… Grouped open_conversation_job ${job.job_id} under session ${sessionId}`); + } + } else { + // Log jobs that couldn't be grouped + console.log(`⚠️ Job ${job.job_id} (${job.job_type}) has no session_id - cannot group`); + } + }); + // Merge session jobs from dashboard response + if (dashboardData.session_jobs) { + Object.entries(dashboardData.session_jobs).forEach(([sessionId, jobs]: [string, any]) => { + // Merge with existing jobs and deduplicate by job_id + const existingJobs = jobsBySession[sessionId] || []; + const existingJobIds = new Set(existingJobs.map((j: any) => j.job_id)); + const newJobs = jobs.filter((j: any) => !existingJobIds.has(j.job_id)); + jobsBySession[sessionId] = [...existingJobs, ...newJobs]; + }); + } + + // Update state + setJobs(allFetchedJobs); + setSessionJobs(jobsBySession); + setStats(dashboardData.stats); + setStreamingStatus(dashboardData.streaming_status); setLastUpdate(Date.now()); + + // Auto-expand active conversations (those with open_conversation_job in progress) + const newExpanded = new Set(expandedSessions); + const newExpandedJobs = new Set(expandedJobs); + let expandedCount = 0; + let expandedJobsCount = 0; + + // Find all conversations with active open_conversation_job + Object.entries(jobsBySession).forEach(([_sessionId, jobs]) => { + const openConvJob = jobs.find((j: any) => j.job_type === 'open_conversation_job'); + if (openConvJob && openConvJob.status === 'started') { + const conversationId = openConvJob.meta?.conversation_id; + if (conversationId && !expandedSessions.has(conversationId)) { + newExpanded.add(conversationId); + expandedCount++; + console.log(`πŸ”“ Auto-expanding active conversation: ${conversationId}`); + } + + // Also expand all job cards in active conversations + jobs.forEach((job: any) => { + if (!expandedJobs.has(job.job_id)) { + newExpandedJobs.add(job.job_id); + expandedJobsCount++; + } + }); + } + }); + + // Update expanded sessions if any new active conversations found + if (expandedCount > 0) { + console.log(`πŸ“‚ Auto-expanded ${expandedCount} active conversation(s)`); + setExpandedSessions(newExpanded); + } + + // Update expanded jobs if any new jobs found + if (expandedJobsCount > 0) { + console.log(`πŸ“‚ Auto-expanded ${expandedJobsCount} job card(s) in active conversations`); + setExpandedJobs(newExpandedJobs); + } } catch (error) { - console.error('❌ Error fetching queue data:', error); + console.error('❌ Error fetching dashboard data:', error); } finally { setLoading(false); setRefreshing(false); } - }, [refreshSessionJobs]); + }, []); // Save auto-refresh preference to localStorage useEffect(() => { @@ -267,70 +322,6 @@ const Queue: React.FC = () => { fetchData(); }, [filters, pagination.offset, fetchData]); - const fetchJobs = async () => { - try { - const params = new URLSearchParams({ - limit: pagination.limit.toString(), - offset: pagination.offset.toString(), - sort: 'created_at', - order: 'desc' - }); - - if (filters.status) params.append('status', filters.status); - if (filters.job_type) params.append('job_type', filters.job_type); - if (filters.priority) params.append('priority', filters.priority); - - const response = await queueApi.getJobs(params); - const data = response.data; - setJobs(data.jobs); - setPagination(prev => ({ - ...prev, - total: data.pagination.total, - has_more: data.pagination.has_more - })); - } catch (error) { - console.error('❌ Error fetching jobs:', error); - } - }; - - const fetchStats = async () => { - try { - const response = await queueApi.getStats(); - const data = response.data; - setStats(data); - } catch (error) { - console.error('❌ Error fetching stats:', error); - } - }; - - const fetchStreamingStatus = async () => { - try { - const response = await queueApi.getStreamingStatus(); - const data = response.data; - setStreamingStatus(data); - - // Auto-expand active sessions - if (data.active_sessions && data.active_sessions.length > 0) { - setExpandedSessions(prev => { - const newExpanded = new Set(prev); - let hasChanges = false; - - data.active_sessions.filter((s: StreamingSession) => s.status !== 'complete').forEach((session: StreamingSession) => { - if (!newExpanded.has(session.session_id)) { - newExpanded.add(session.session_id); - hasChanges = true; - } - }); - - return hasChanges ? newExpanded : prev; - }); - } - } catch (error) { - console.error('❌ Error fetching streaming status:', error); - // Don't fail the whole page if streaming status fails - setStreamingStatus(null); - } - }; const viewJobDetails = async (jobId: string) => { setLoadingJobDetails(true); @@ -345,25 +336,42 @@ const Queue: React.FC = () => { } }; - const retryJob = async (jobId: string) => { - try { - await queueApi.retryJob(jobId, false); - fetchJobs(); - } catch (error) { - console.error('Error retrying job:', error); - } - }; - - const cancelJob = async (jobId: string) => { - if (!confirm('Are you sure you want to cancel this job?')) return; + // ESC key handler for modals + useEffect(() => { + const handleEscape = (e: KeyboardEvent) => { + if (e.key === 'Escape') { + if (selectedJob) { + setSelectedJob(null); + } else if (showFlushModal) { + setShowFlushModal(false); + } + } + }; - try { - await queueApi.cancelJob(jobId); - fetchJobs(); - } catch (error) { - console.error('Error cancelling job:', error); - } - }; + document.addEventListener('keydown', handleEscape); + return () => document.removeEventListener('keydown', handleEscape); + }, [selectedJob, showFlushModal]); + + // Commented out - keeping for future use + // const retryJob = async (jobId: string) => { + // try { + // await queueApi.retryJob(jobId, false); + // fetchData(); + // } catch (error) { + // console.error('Error retrying job:', error); + // } + // }; + + // const cancelJob = async (jobId: string) => { + // if (!confirm('Are you sure you want to cancel this job?')) return; + + // try { + // await queueApi.cancelJob(jobId); + // fetchData(); + // } catch (error) { + // console.error('Error cancelling job:', error); + // } + // }; const cleanupStuckWorkers = async () => { if (!confirm('This will clean up all stuck workers and pending messages. Continue?')) return; @@ -380,8 +388,8 @@ const Queue: React.FC = () => { ).join('\n') }`); - // Refresh streaming status to show updated counts - fetchStreamingStatus(); + // Refresh data to show updated counts + fetchData(); } catch (error: any) { console.error('❌ Error during cleanup:', error); alert(`Failed to cleanup workers: ${error.response?.data?.error || error.message}`); @@ -399,8 +407,8 @@ const Queue: React.FC = () => { alert(`βœ… Cleanup complete!\n\nRemoved ${data.cleaned_count} old session(s)`); - // Refresh streaming status to show updated counts - fetchStreamingStatus(); + // Refresh data to show updated counts + fetchData(); } catch (error: any) { console.error('❌ Error during cleanup:', error); alert(`Failed to cleanup sessions: ${error.response?.data?.error || error.message}`); @@ -409,7 +417,7 @@ const Queue: React.FC = () => { const applyFilters = () => { setPagination(prev => ({ ...prev, offset: 0 })); - fetchJobs(); + fetchData(); }; const clearFilters = () => { @@ -417,20 +425,6 @@ const Queue: React.FC = () => { setPagination(prev => ({ ...prev, offset: 0 })); }; - const nextPage = () => { - if (pagination.has_more) { - setPagination(prev => ({ ...prev, offset: prev.offset + prev.limit })); - } - }; - - const prevPage = () => { - if (pagination.offset > 0) { - setPagination(prev => ({ - ...prev, - offset: Math.max(0, prev.offset - prev.limit) - })); - } - }; const getStatusIcon = (status: string) => { switch (status) { @@ -458,16 +452,6 @@ const Queue: React.FC = () => { } }; - const getJobTypeShort = (type: string) => { - const typeMap: { [key: string]: string } = { - 'process_audio_files': 'Process', - 'process_single_audio_file': 'Process', - 'reprocess_transcript': 'Reprocess', - 'reprocess_memory': 'Memory' - }; - return typeMap[type] || type; - }; - const getJobTypeIcon = (type: string) => { const iconClass = "w-3.5 h-3.5"; switch (type) { @@ -489,6 +473,11 @@ const Queue: React.FC = () => { }; const getJobTypeColor = (type: string, status: string) => { + // Safety check for undefined/null values + if (!type || !status) { + return { bgColor: 'bg-gray-400', borderColor: 'border-gray-500' }; + } + // Base colors by job type let bgColor = 'bg-gray-400'; let borderColor = 'border-gray-500'; @@ -542,213 +531,6 @@ const Queue: React.FC = () => { return { bgColor, borderColor }; }; - const renderJobTimeline = (jobs: any[], session: StreamingSession | CompletedSession) => { - if (!jobs || jobs.length === 0) return null; - - // Sort jobs by created_at first - const sortedJobs = [...jobs].sort((a, b) => - new Date(a.created_at).getTime() - new Date(b.created_at).getTime() - ); - - // Calculate timeline boundaries - // For active sessions, use session timestamps - // For completed sessions without started_at, use earliest job timestamp - let sessionStart: number; - let sessionEnd: number; - - if ('started_at' in session) { - // Active session - use session.started_at - sessionStart = session.started_at * 1000; - } else { - // Completed session - calculate from jobs - // Use the earliest job timestamp (created_at or started_at) - const earliestTime = Math.min(...sortedJobs.map(j => { - const created = new Date(j.created_at).getTime(); - const started = j.started_at ? new Date(j.started_at).getTime() : created; - return Math.min(created, started); - })); - sessionStart = earliestTime; - } - - if ('completed_at' in session) { - // Completed session - use the latest job end time (not session.completed_at) - // This handles batch jobs that run after the session is marked complete - const latestJobEnd = Math.max(...sortedJobs.map(j => { - const completed = j.completed_at ? new Date(j.completed_at).getTime() : 0; - const ended = j.ended_at ? new Date(j.ended_at).getTime() : 0; - const started = j.started_at ? new Date(j.started_at).getTime() : 0; - return Math.max(completed, ended, started); - })); - // Use the later of: session completion or latest job end - sessionEnd = Math.max(session.completed_at * 1000, latestJobEnd); - } else { - // Active session - use current time - sessionEnd = Date.now(); - } - - const totalDuration = sessionEnd - sessionStart; - - if (totalDuration <= 0) return null; - - // Smart row assignment - place jobs in rows to avoid overlaps - const rows: any[][] = []; - sortedJobs.forEach(job => { - const jobStart = job.started_at ? new Date(job.started_at).getTime() : new Date(job.created_at).getTime(); - - // Find first row where this job doesn't overlap - let assignedRow = -1; - for (let rowIndex = 0; rowIndex < rows.length; rowIndex++) { - const row = rows[rowIndex]; - const lastJobInRow = row[row.length - 1]; - // Calculate when the last job in this row ends (use Date.now() for active jobs) - const lastJobEnd = lastJobInRow.completed_at || lastJobInRow.ended_at - ? new Date((lastJobInRow.completed_at || lastJobInRow.ended_at)!).getTime() - : (lastJobInRow.status === 'processing' ? Date.now() : new Date(lastJobInRow.started_at || lastJobInRow.created_at).getTime()); - - // If this job starts after the last job in this row ends, we can use this row - if (jobStart >= lastJobEnd) { - assignedRow = rowIndex; - break; - } - } - - // If no suitable row found, create a new one - if (assignedRow === -1) { - assignedRow = rows.length; - rows.push([]); - } - - rows[assignedRow].push(job); - job._assignedRow = assignedRow; - }); - - // Calculate height based on number of rows needed - const rowCount = rows.length; - const timelineHeight = Math.max(4, rowCount * 2); // At least 4rem, 2rem per row - - return ( -
-
Timeline:
-
- {/* Timeline grid lines */} -
- {[0, 25, 50, 75, 100].map(percent => ( -
- ))} -
- - {/* Job bars */} - {sortedJobs.map((job) => { - const jobStart = job.started_at ? new Date(job.started_at).getTime() : new Date(job.created_at).getTime(); - const jobEnd = job.completed_at || job.ended_at - ? new Date((job.completed_at || job.ended_at)!).getTime() - : (job.status === 'processing' ? Date.now() : jobStart); - - const startPercent = Math.max(0, ((jobStart - sessionStart) / totalDuration) * 100); - const duration = jobEnd - jobStart; - const widthPercent = Math.max(1, (duration / totalDuration) * 100); - - // Color based on job type - const { bgColor, borderColor } = getJobTypeColor(job.job_type, job.status); - - // Calculate position in assigned row - const rowIndex = job._assignedRow; - const rowHeight = 100 / rowCount; - const barHeight = Math.min(25, rowHeight * 0.6); // 60% of row height, max 25% - const topPercent = (rowIndex * rowHeight) + (rowHeight - barHeight) / 2; - - // Format duration for display - const durationMs = jobEnd - jobStart; - let durationStr = ''; - if (durationMs < 1000) durationStr = `${durationMs}ms`; - else if (durationMs < 60000) durationStr = `${(durationMs / 1000).toFixed(1)}s`; - else durationStr = `${Math.floor(durationMs / 60000)}m ${Math.floor((durationMs % 60000) / 1000)}s`; - - return ( -
-
-
- {getJobTypeIcon(job.job_type)} -
-
- - {/* Tooltip on hover - smart positioning to avoid viewport overflow */} -
80 ? 'auto' : '50%', - right: startPercent > 80 ? '0' : 'auto', - transform: startPercent >= 20 && startPercent <= 80 ? 'translateX(-50%)' : 'none' - }} - > -
{job.job_type}
-
{job.status} β€’ {durationStr}
-
-
- ); - })} -
- - {/* Timeline labels */} -
- 0s - {(totalDuration / 1000).toFixed(0)}s -
-
- ); - }; - - const getJobResult = (job: QueueJob) => { - if (job.status !== 'completed' || !job.result) { - return -; - } - - const result = job.result; - - // Show different results based on job type - if (job.job_type === 'reprocess_transcript') { - const segments = result.transcript_segments || 0; - const speakers = result.speakers_identified || 0; - - return ( -
-
{segments} segments
- {speakers > 0 && ( -
{speakers} speakers identified
- )} -
- ); - } - - if (job.job_type === 'reprocess_memory') { - const memories = result.memory_count || 0; - return ( -
- {memories} memories -
- ); - } - - return ( -
- βœ“ Success -
- ); - }; const flushJobs = async () => { setFlushing(true); @@ -796,6 +578,52 @@ const Queue: React.FC = () => { return new Date(dateString).toLocaleString(); }; + const getJobTypeShort = (jobType: string) => { + const typeMap: {[key: string]: string} = { + 'open_conversation_job': 'Open Conv', + 'stream_speech_detection_job': 'Speech Detect', + 'enroll_speakers_job': 'Speaker Enroll', + 'check_enrolled_speakers_job': 'Check Speakers', + 'audio_persistence_job': 'Audio Persist', + 'process_transcription_job': 'Transcribe', + 'process_memory_job': 'Memory', + 'crop_audio_job': 'Crop Audio' + }; + return typeMap[jobType] || jobType; + }; + + const retryJob = async (jobId: string) => { + try { + await queueApi.retryJob(jobId); + fetchData(); + } catch (error) { + console.error('Failed to retry job:', error); + } + }; + + const cancelJob = async (jobId: string) => { + try { + await queueApi.cancelJob(jobId); + fetchData(); + } catch (error) { + console.error('Failed to cancel job:', error); + } + }; + + const prevPage = () => { + setPagination(prev => ({ + ...prev, + offset: Math.max(0, prev.offset - prev.limit) + })); + }; + + const nextPage = () => { + setPagination(prev => ({ + ...prev, + offset: prev.offset + prev.limit + })); + }; + const formatDuration = (job: any) => { if (!job.started_at) return '-'; @@ -812,7 +640,23 @@ const Queue: React.FC = () => { return `${Math.floor(durationMs / 3600000)}h ${Math.floor((durationMs % 3600000) / 60000)}m`; }; - const toggleSessionExpansion = async (sessionId: string) => { + // Format seconds to readable time format (e.g., 3m34s or 1h22m32s) + const formatSeconds = (seconds: number): string => { + if (seconds < 60) { + return `${Math.floor(seconds)}s`; + } else if (seconds < 3600) { + const mins = Math.floor(seconds / 60); + const secs = Math.floor(seconds % 60); + return `${mins}m${secs}s`; + } else { + const hours = Math.floor(seconds / 3600); + const mins = Math.floor((seconds % 3600) / 60); + const secs = Math.floor(seconds % 60); + return `${hours}h${mins}m${secs}s`; + } + }; + + const toggleSessionExpansion = (sessionId: string) => { const newExpanded = new Set(expandedSessions); if (newExpanded.has(sessionId)) { @@ -820,23 +664,27 @@ const Queue: React.FC = () => { newExpanded.delete(sessionId); setExpandedSessions(newExpanded); } else { - // Expand and fetch jobs if not already loaded + // Expand and trigger refresh to fetch jobs via dashboard endpoint newExpanded.add(sessionId); setExpandedSessions(newExpanded); + // Trigger a refresh if jobs not already loaded if (!sessionJobs[sessionId]) { - try { - const response = await queueApi.getJobsBySession(sessionId); - const data = response.data; - setSessionJobs(prev => ({ ...prev, [sessionId]: data.jobs })); - } catch (error) { - console.error(`❌ Failed to fetch jobs for session ${sessionId}:`, error); - setSessionJobs(prev => ({ ...prev, [sessionId]: [] })); - } + fetchData(); } } }; + const toggleJobExpansion = (jobId: string) => { + const newExpanded = new Set(expandedJobs); + if (newExpanded.has(jobId)) { + newExpanded.delete(jobId); + } else { + newExpanded.add(jobId); + } + setExpandedJobs(newExpanded); + }; + if (loading) { return (
@@ -977,7 +825,7 @@ const Queue: React.FC = () => { {streamingStatus && (
-

Audio Streaming Status

+

Audio Streaming & Conversations

- {streamingStatus?.active_sessions && streamingStatus.active_sessions.length > 0 && ( + {streamingStatus?.stream_health && Object.keys(streamingStatus.stream_health).length > 0 && ( )}
- {/* Active and Completed Sessions Grid */} + {/* Stream Workers Section - Shows audio streams + listen jobs */} +
+

Stream Workers (Client Sessions)

+
+ {streamingStatus?.stream_health && Object.entries(streamingStatus.stream_health).map(([streamKey, health]) => { + // Extract client_id from stream key (format: audio:stream:{client_id}) + const clientId = streamKey.replace('audio:stream:', ''); + + // Find all listen jobs for this client with deduplication + const allJobsRaw = Object.values(sessionJobs).flat().filter(job => job != null); + + // Deduplicate by job_id + const jobMap = new Map(); + allJobsRaw.forEach((job: any) => { + if (job && job.job_id) { + jobMap.set(job.job_id, job); + } + }); + const allJobs = Array.from(jobMap.values()); + + // Debug logging for listen job filtering + console.log(`πŸ” Stream ${streamKey}:`); + console.log(` - clientId extracted: ${clientId}`); + console.log(` - Total jobs available: ${allJobs.length}`); + const speechDetectionJobs = allJobs.filter((job: any) => job && job.job_type === 'stream_speech_detection_job'); + console.log(` - Speech detection jobs: ${speechDetectionJobs.length}`, speechDetectionJobs.map((j: any) => ({ job_id: j.job_id, meta_client_id: j.meta?.client_id }))); + + // Get all listen jobs for this client (only active/queued/processing, not completed) + const allListenJobs = allJobs.filter((job: any) => + job && job.job_type === 'stream_speech_detection_job' && + job.meta?.client_id === clientId && + job.status !== 'completed' && + job.status !== 'failed' + ); + + // Show only the LATEST active speech detection job (most recent created_at) + // Completed ones have already exited and shouldn't be shown here + const listenJobs = allListenJobs.length > 0 + ? [allListenJobs.sort((a, b) => + new Date(b.created_at).getTime() - new Date(a.created_at).getTime() + )[0]] + : []; + + console.log(` - All listen jobs (active): ${allListenJobs.length}, showing latest: ${listenJobs.length}`); + + return ( +
+
+ {streamKey} + Active +
+ +
+
+ Stream Length: + {health.stream_length} +
+
+ Age: + {(health.stream_age_seconds || 0).toFixed(0)}s +
+
+ Pending: + 0 ? 'text-yellow-600' : 'text-green-600'}`}> + {health.total_pending} + +
+ {health.consumer_groups && health.consumer_groups.map((group) => ( +
+
{group.name}:
+ {group.consumers.map((consumer) => ( +
+ {consumer.name} + 0 ? 'text-yellow-600' : 'text-green-600'}> + {consumer.pending} pending + +
+ ))} +
+ ))} + + {/* Current Speech Detection Job */} + {listenJobs.length > 0 && ( +
+
Current Speech Detection:
+ {listenJobs.map((job) => { + const runtime = job.started_at + ? Math.floor((Date.now() - new Date(job.started_at).getTime()) / 1000) + : 0; + const minutes = Math.floor(runtime / 60); + const seconds = runtime % 60; + + return ( +
+
+
+ {getStatusIcon(job.status)} + {job.job_type} + + {job.status} + +
+ +
+ + {/* Job metadata */} +
+
+ Job ID: + {job.job_id.substring(0, 12)}... +
+ {job.started_at && ( +
+ Runtime: + {minutes}m {seconds}s +
+ )} + {job.created_at && ( +
+ Created: + {new Date(job.created_at).toLocaleTimeString()} +
+ )} + {job.meta?.speech_detected_at && ( +
+ Speech Detected: + {new Date(job.meta.speech_detected_at).toLocaleString()} +
+ )} + {job.meta?.status && ( +
+ Status: + {job.meta.status.replace(/_/g, ' ')} +
+ )} +
+ + {/* Session Events */} + {(() => { + const session = streamingStatus?.active_sessions?.find((s: StreamingSession) => s.session_id === job.meta?.session_id); + if (!session) return null; + + return ( +
+
Speech Detection Events:
+ {session.last_event && ( +
+ Last Event: + {session.last_event.split(':')[0]} +
+ )} + {session.speaker_check_status && ( +
+ Speaker Check: + {session.speaker_check_status} +
+ )} + {session.identified_speakers && ( +
+ Speakers: + {session.identified_speakers} +
+ )} +
+ ); + })()} +
+ ); + })} +
+ )} +
+
+ ); + })} +
+
+ + {/* Active and Completed Conversations Grid */}
- {/* Active Sessions */} + {/* Active Conversations - Grouped by conversation_id */}
-

Active Streaming Sessions

- {streamingStatus?.active_sessions && streamingStatus.active_sessions.filter(s => s.status !== 'complete').length > 0 ? ( -
- {streamingStatus.active_sessions.filter(s => s.status !== 'complete').map((session) => { - const isExpanded = expandedSessions.has(session.session_id); - const jobs = sessionJobs[session.session_id] || []; - - return ( -
-
toggleSessionExpansion(session.session_id)} - > -
-
- {isExpanded ? ( - - ) : ( - +

Active Conversations

+ {(() => { + // Group all jobs by conversation_id with deduplication + const allJobsRaw = Object.values(sessionJobs).flat().filter(job => job != null); + + // Deduplicate by job_id + const jobMap = new Map(); + allJobsRaw.forEach((job: any) => { + if (job && job.job_id) { + jobMap.set(job.job_id, job); + } + }); + const allJobs = Array.from(jobMap.values()); + + // Group ALL jobs by conversation_id (regardless of status) + // Also link jobs by audio_uuid so persistence jobs get grouped with conversation + const allConversationJobs = new Map(); + const audioUuidToConversationId = new Map(); + + // First pass: collect conversation_id to audio_uuid mappings + allJobs.forEach(job => { + if (!job) return; + const conversationId = job.meta?.conversation_id; + const audioUuid = job.meta?.audio_uuid; + + if (conversationId && audioUuid) { + audioUuidToConversationId.set(audioUuid, conversationId); + } + }); + + // Second pass: group jobs by conversation_id or audio_uuid + // EXCLUDE session-level jobs (like audio persistence) + allJobs.forEach(job => { + if (!job) return; + + // Skip session-level jobs (they run for entire session, not per conversation) + // Also skip audio persistence jobs by job_type (for backward compatibility with old jobs) + if (job.meta?.session_level === true || job.job_type === 'audio_streaming_persistence_job') { + return; + } + + const conversationId = job.meta?.conversation_id; + const audioUuid = job.meta?.audio_uuid; + + // Determine the grouping key + let groupKey = conversationId; + if (!groupKey && audioUuid) { + // Try to find conversation_id via audio_uuid mapping + groupKey = audioUuidToConversationId.get(audioUuid); + } + + if (groupKey) { + if (!allConversationJobs.has(groupKey)) { + allConversationJobs.set(groupKey, []); + } + allConversationJobs.get(groupKey)!.push(job); + } + }); + + // Filter to only show conversations where at least one job is NOT completed + const conversationMap = new Map(); + allConversationJobs.forEach((jobs, conversationId) => { + const hasActiveJob = jobs.some(j => j.status !== 'completed' && j.status !== 'failed'); + if (hasActiveJob) { + conversationMap.set(conversationId, jobs); + } + }); + + if (conversationMap.size === 0) { + return ( +
+ No active conversations +
+ ); + } + + return ( +
+ {Array.from(conversationMap.entries()).map(([conversationId, jobs]) => { + const isExpanded = expandedSessions.has(conversationId); + + // Find the open_conversation_job for metadata + const openConvJob = jobs.find(j => j.job_type === 'open_conversation_job'); + const meta = openConvJob?.meta || {}; + + // Extract conversation info + const clientId = meta.client_id || 'Unknown'; + const transcript = meta.transcript || ''; + const speakers = meta.speakers || []; + const wordCount = meta.word_count || 0; + const lastUpdate = meta.last_update || ''; + const createdAt = openConvJob?.created_at || null; + + // Check if any jobs have failed + const hasFailedJob = jobs.some(j => j.status === 'failed'); + const failedJobCount = jobs.filter(j => j.status === 'failed').length; + + return ( +
+
toggleSessionExpansion(conversationId)} + > +
+
+ {isExpanded ? ( + + ) : ( + + )} + {hasFailedJob ? ( + + ) : ( + + )} + {clientId} + {hasFailedJob ? ( + + {failedJobCount} Error{failedJobCount > 1 ? 's' : ''} + + ) : ( + Active + )} + {speakers.length > 0 && ( + + {speakers.length} speaker{speakers.length > 1 ? 's' : ''} + + )} +
+
+ Conversation: {conversationId.substring(0, 8)}... β€’ + {createdAt && `Started: ${new Date(createdAt).toLocaleTimeString()} β€’ `} + Words: {wordCount} + {lastUpdate && ` β€’ Updated: ${new Date(lastUpdate).toLocaleTimeString()}`} +
+ {transcript && ( +
+ "{transcript.substring(0, 100)}{transcript.length > 100 ? '...' : ''}" +
)} - - {session.client_id} - {session.provider} - {session.status} -
-
- Session: {session.session_id.substring(0, 8)}... β€’ - Chunks: {session.chunks_published} β€’ - Duration: {Math.floor(session.age_seconds)}s β€’ - Idle: {session.idle_seconds.toFixed(1)}s
-
{/* Expanded Jobs Section */} {isExpanded && ( -
- {/* Timeline Visualization */} - {renderJobTimeline(jobs, session)} +
+ {/* Pipeline Timeline */} +
+
Pipeline Timeline:
+ {(() => { + // Helper function to get display name from job type + const getJobDisplayName = (jobType: string) => { + const nameMap: { [key: string]: string } = { + 'stream_speech_detection_job': 'Speech', + 'open_conversation_job': 'Open', + 'transcribe_full_audio_job': 'Transcript', + 'recognise_speakers_job': 'Speakers', + 'process_memory_job': 'Memory' + }; + return nameMap[jobType] || jobType.replace(/_/g, ' ').replace(/\b\w/g, l => l.toUpperCase()); + }; + + // Helper function to get icon for job type + const getJobIcon = (jobType: string) => { + if (jobType.includes('speech') || jobType.includes('detect')) return Brain; + if (jobType.includes('conversation') || jobType.includes('open')) return Brain; + if (jobType.includes('transcribe')) return FileText; + if (jobType.includes('speaker') || jobType.includes('recognise')) return Brain; + if (jobType.includes('memory')) return Brain; + return Brain; // Default icon + }; + + // Build dynamic pipeline from actual jobs with timing data + // Sort by start time to show chronological order + const jobsWithTiming = jobs + .filter(j => j && j.started_at) + .map(job => { + const startTime = new Date(job.started_at!).getTime(); + const endTime = job.completed_at || job.ended_at + ? new Date((job.completed_at || job.ended_at)!).getTime() + : (job.status === 'processing' ? Date.now() : startTime); + + return { + job, + startTime, + endTime, + duration: (endTime - startTime) / 1000, + name: getJobDisplayName(job.job_type), + icon: getJobIcon(job.job_type) + }; + }) + .sort((a, b) => a.startTime - b.startTime); + + const jobTimes = jobsWithTiming; + + // Find earliest start and latest end + const validTimes = jobTimes.filter(t => t !== null); + if (validTimes.length === 0) { + return ( +
No job timing data available
+ ); + } + + const earliestStart = Math.min(...validTimes.map(t => t!.startTime)); + const latestEnd = Math.max(...validTimes.map(t => t!.endTime)); + const totalDuration = (latestEnd - earliestStart) / 1000; // in seconds + + // Format duration for display + const formatDuration = (seconds: number) => { + if (seconds < 1) return `${(seconds * 1000).toFixed(0)}ms`; + if (seconds < 60) return `${seconds.toFixed(1)}s`; + const mins = Math.floor(seconds / 60); + const secs = Math.floor(seconds % 60); + return `${mins}m ${secs}s`; + }; + + // Generate time axis markers (0%, 25%, 50%, 75%, 100%) + const timeMarkers = [0, 0.25, 0.5, 0.75, 1].map(pct => ({ + percent: pct * 100, + time: formatDuration(totalDuration * pct) + })); + + return ( +
+ {/* Time axis */} +
+ {timeMarkers.map((marker, idx) => ( +
+
+
+ {marker.time} +
+
+ ))} +
+ + {/* Job timeline bars */} +
+ {jobTimes.map((jobTime) => { + const { job, startTime, endTime, duration, name, icon: Icon } = jobTime; + + // Calculate position and width as percentage of total timeline + const startPercent = ((startTime - earliestStart) / (latestEnd - earliestStart)) * 100; + const widthPercent = ((endTime - startTime) / (latestEnd - earliestStart)) * 100; + + // Use job type colors + const jobColors = getJobTypeColor(job.job_type, job.status); + const barColor = jobColors.bgColor; + const borderColor = jobColors.borderColor; + + return ( +
+ {/* Stage Icon */} +
+ +
+ + {/* Stage Name */} + {name} + + {/* Timeline Container */} +
+ {/* Job Bar */} +
+ + {formatDuration(duration)} + +
+
+
+ ); + })} +
+ + {/* Total Duration */} +
+ Total: {formatDuration(totalDuration)} +
+
+ ); + })()} +
-
Jobs for this session:
- {jobs.length > 0 ? ( +
Conversation Jobs:
+ {jobs.filter(j => j != null && j.job_id).length > 0 ? (
- {jobs.map((job, index) => ( -
-
-
- #{index + 1} - {getJobTypeIcon(job.job_type)} - {getStatusIcon(job.status)} - {job.job_type} - - {job.status} - - {job.queue} + {jobs + .filter(j => j != null && j.job_id) + .sort((a, b) => new Date(a.created_at).getTime() - new Date(b.created_at).getTime()) + .map((job, index) => ( +
+
toggleJobExpansion(job.job_id)} + > +
+
+ #{index + 1} + {getJobTypeIcon(job.job_type)} + {getStatusIcon(job.status)} + {job.job_type} + + {job.status} + + {job.queue} + {/* Show memory count badge on collapsed card */} + {!expandedJobs.has(job.job_id) && job.job_type === 'process_memory_job' && job.result?.memories_created !== undefined && ( + + {job.result.memories_created} memories + + )} +
-
- {job.started_at && ( - Started: {new Date(job.started_at).toLocaleTimeString()} - )} - {job.started_at && ( - β€’ Duration: {formatDuration(job)} +
+ + {/* Collapsible metadata section */} + {expandedJobs.has(job.job_id) && ( +
+
+ {job.started_at && ( + Started: {new Date(job.started_at).toLocaleTimeString()} + )} + {job.started_at && ( + β€’ Duration: {formatDuration(job)} + )} +
+ + {/* Show job-specific metadata */} + {job.meta && ( +
+ {/* open_conversation_job metadata */} + {job.job_type === 'open_conversation_job' && ( + <> + {job.meta.word_count !== undefined && ( +
Words: {job.meta.word_count}
+ )} + {job.meta.speakers && job.meta.speakers.length > 0 && ( +
Speakers: {job.meta.speakers.join(', ')}
+ )} + {job.meta.inactivity_seconds !== undefined && ( +
Idle: {Math.floor(job.meta.inactivity_seconds)}s
+ )} + {job.meta.transcript && ( +
+ "{job.meta.transcript.substring(0, 80)}..." +
+ )} + + )} + + {/* transcribe_full_audio_job metadata */} + {job.job_type === 'transcribe_full_audio_job' && job.result && ( + <> + {job.result.transcript && ( +
Transcript: {job.result.transcript.length} chars
+ )} + {job.result.processing_time_seconds && ( +
Processing: {job.result.processing_time_seconds.toFixed(1)}s
+ )} + + )} + + {/* recognise_speakers_job metadata */} + {job.job_type === 'recognise_speakers_job' && job.result && ( + <> + {job.result.identified_speakers && job.result.identified_speakers.length > 0 && ( +
Identified: {job.result.identified_speakers.join(', ')}
+ )} + {job.result.segment_count && ( +
Segments: {job.result.segment_count}
+ )} + + )} + + {/* process_memory_job metadata */} + {job.job_type === 'process_memory_job' && job.meta && ( + <> + {job.meta.memories_created !== undefined && ( +
Memories: {job.meta.memories_created} created
+ )} + {job.meta.processing_time && ( +
Processing: {job.meta.processing_time.toFixed(1)}s
+ )} + {job.meta.memory_details && job.meta.memory_details.length > 0 && ( +
+
Memories Created:
+ {job.meta.memory_details.map((memory: any, idx: number) => ( +
+ "{memory.text}" +
+ ))} +
+ )} + + )} + + {/* Show conversation_id if present */} + {job.meta.conversation_id && ( +
+ Conv: {job.meta.conversation_id.substring(0, 8)}... +
+ )} +
)}
-
+ )}
) : ( -
No jobs found for this session
+
No jobs found for this conversation
)}
)}
- ); - })} -
- ) : ( -
- No active sessions -
- )} + ); + })} +
+ ); + })()}
- {/* Completed Sessions */} + {/* Completed Conversations - Grouped by conversation_id */}
-

Completed Sessions (Last Hour)

- {streamingStatus?.completed_sessions && streamingStatus.completed_sessions.length > 0 ? ( -
- {streamingStatus.completed_sessions.map((session) => { - const isExpanded = expandedSessions.has(session.session_id); - const jobs = sessionJobs[session.session_id] || []; - - return ( -
-
toggleSessionExpansion(session.session_id)} - > -
-
- {isExpanded ? ( - - ) : ( - - )} - {session.has_conversation ? ( - - ) : ( - - )} - {session.client_id} - {session.has_conversation ? ( - Conversation +
+

Completed Conversations

+
+ + +
+
+ {(() => { + // Group all jobs by conversation_id for completed conversations with deduplication + const allJobsRaw = Object.values(sessionJobs).flat().filter(job => job != null); + + // Deduplicate by job_id + const jobMap = new Map(); + allJobsRaw.forEach((job: any) => { + if (job && job.job_id) { + jobMap.set(job.job_id, job); + } + }); + const allJobs = Array.from(jobMap.values()); + + // Group ALL jobs by conversation_id (regardless of status) + // Also link jobs by audio_uuid so persistence jobs get grouped with conversation + const allConversationJobs = new Map(); + const audioUuidToConversationId = new Map(); + + // First pass: collect conversation_id to audio_uuid mappings + allJobs.forEach(job => { + if (!job) return; + const conversationId = job.meta?.conversation_id; + const audioUuid = job.meta?.audio_uuid; + + if (conversationId && audioUuid) { + audioUuidToConversationId.set(audioUuid, conversationId); + } + }); + + // Second pass: group jobs by conversation_id or audio_uuid + // EXCLUDE session-level jobs (like audio persistence) + allJobs.forEach(job => { + if (!job) return; + + // Skip session-level jobs (they run for entire session, not per conversation) + // Also skip audio persistence jobs by job_type (for backward compatibility with old jobs) + if (job.meta?.session_level === true || job.job_type === 'audio_streaming_persistence_job') { + return; + } + + const conversationId = job.meta?.conversation_id; + const audioUuid = job.meta?.audio_uuid; + + // Determine the grouping key + let groupKey = conversationId; + if (!groupKey && audioUuid) { + // Try to find conversation_id via audio_uuid mapping + groupKey = audioUuidToConversationId.get(audioUuid); + } + + if (groupKey) { + if (!allConversationJobs.has(groupKey)) { + allConversationJobs.set(groupKey, []); + } + allConversationJobs.get(groupKey)!.push(job); + } + }); + + // Filter to only show conversations where ALL jobs are completed or failed + const conversationMap = new Map(); + allConversationJobs.forEach((jobs, conversationId) => { + const allJobsComplete = jobs.every(j => j.status === 'completed' || j.status === 'failed'); + if (allJobsComplete) { + conversationMap.set(conversationId, jobs); + } + }); + + if (conversationMap.size === 0) { + return ( +
+ No completed conversations +
+ ); + } + + // Convert to array and filter by time range + const now = Date.now(); + const timeRangeMs = completedConvTimeRange * 60 * 60 * 1000; // hours to milliseconds + + let conversationsArray = Array.from(conversationMap.entries()) + .map(([conversationId, jobs]) => { + // Find the open_conversation_job for created_at + const openConvJob = jobs.find(j => j.job_type === 'open_conversation_job'); + const createdAt = openConvJob?.created_at ? new Date(openConvJob.created_at).getTime() : 0; + return { conversationId, jobs, createdAt }; + }) + .filter(({ createdAt }) => { + // Filter by time range + return createdAt > 0 && (now - createdAt) <= timeRangeMs; + }) + .sort((a, b) => b.createdAt - a.createdAt); // Most recent first + + // Apply pagination + const totalConversations = conversationsArray.length; + const totalPages = Math.ceil(totalConversations / completedConvItemsPerPage); + const startIndex = (completedConvPage - 1) * completedConvItemsPerPage; + const endIndex = startIndex + completedConvItemsPerPage; + const paginatedConversations = conversationsArray.slice(startIndex, endIndex); + + if (conversationsArray.length === 0) { + return ( +
+ No completed conversations in the selected time range +
+ ); + } + + return ( + <> +
+ {paginatedConversations.map(({ conversationId, jobs }) => { + const isExpanded = expandedSessions.has(conversationId); + + // Find the open_conversation_job for metadata + const openConvJob = jobs.find(j => j.job_type === 'open_conversation_job'); + const meta = openConvJob?.meta || {}; + + // Find transcription job for title/summary + const transcriptionJob = jobs.find(j => j.job_type === 'transcribe_full_audio_job'); + const transcriptionMeta = transcriptionJob?.meta || {}; + + // Extract conversation info from metadata + const clientId = meta.client_id || 'Unknown'; + const transcript = meta.transcript || ''; + const speakers = meta.speakers || []; + const wordCount = meta.word_count || 0; + const createdAt = openConvJob?.created_at || null; + const title = transcriptionMeta.title || null; + const summary = transcriptionMeta.summary || null; + + // Check job statuses + const allComplete = jobs.every(j => j.status === 'completed'); + const hasFailedJob = jobs.some(j => j.status === 'failed'); + const failedJobCount = jobs.filter(j => j.status === 'failed').length; + + // Determine status styling + let bgColor = 'bg-yellow-50 border-yellow-200'; + let hoverColor = 'hover:bg-yellow-100'; + let iconColor = 'text-yellow-600'; + let statusBadge = 'bg-yellow-100 text-yellow-700'; + let statusText = 'Processing'; + let StatusIcon = Clock; + + if (hasFailedJob) { + bgColor = 'bg-red-50 border-red-300'; + hoverColor = 'hover:bg-red-100'; + iconColor = 'text-red-600'; + statusBadge = 'bg-red-200 text-red-800'; + statusText = `${failedJobCount} Error${failedJobCount > 1 ? 's' : ''}`; + StatusIcon = AlertTriangle; + } else if (allComplete) { + bgColor = 'bg-green-50 border-green-200'; + hoverColor = 'hover:bg-green-100'; + iconColor = 'text-green-600'; + statusBadge = 'bg-green-100 text-green-700'; + statusText = 'Complete'; + StatusIcon = CheckCircle; + } + + return ( +
+
toggleSessionExpansion(conversationId)} + > +
+
+ {isExpanded ? ( + + ) : ( + + )} + + {clientId} + + {statusText} + + {speakers.length > 0 && ( + + {speakers.length} speaker{speakers.length > 1 ? 's' : ''} + + )} +
+
+ Conversation: {conversationId.substring(0, 8)}... β€’ + Words: {wordCount} + {createdAt && ( + <> β€’ Created: {new Date(createdAt).toLocaleString()} + )} +
+ {/* Show title/summary for completed, or transcript for in-progress or when no title exists */} + {allComplete ? ( + <> + {title ? ( +
+ {title} +
+ ) : transcript ? ( +
+ "{transcript.substring(0, 100)}{transcript.length > 100 ? '...' : ''}" +
+ ) : null} + {summary && ( +
+ {summary} +
+ )} + ) : ( - {session.reason || 'No speech'} + transcript && ( +
+ "{transcript.substring(0, 100)}{transcript.length > 100 ? '...' : ''}" +
+ ) )}
-
- Session: {session.session_id.substring(0, 8)}... β€’ - {new Date(session.completed_at * 1000).toLocaleTimeString()} -
-
- {/* Expanded Jobs Section */} - {isExpanded && ( -
- {/* Timeline Visualization */} - {renderJobTimeline(jobs, session)} - -
Jobs for this session:
- {jobs.length > 0 ? ( -
- {jobs.map((job, index) => ( -
-
-
- #{index + 1} - {getJobTypeIcon(job.job_type)} - {getStatusIcon(job.status)} - {job.job_type} - - {job.status} - - {job.queue} + {/* Expanded Jobs Section */} + {isExpanded && ( +
+ {/* Pipeline Timeline */} +
+
Pipeline Timeline:
+ {(() => { + // Helper function to get display name from job type + const getJobDisplayName = (jobType: string) => { + const nameMap: { [key: string]: string } = { + 'stream_speech_detection_job': 'Speech', + 'open_conversation_job': 'Open', + 'transcribe_full_audio_job': 'Transcript', + 'recognise_speakers_job': 'Speakers', + 'process_memory_job': 'Memory' + }; + return nameMap[jobType] || jobType.replace(/_/g, ' ').replace(/\b\w/g, l => l.toUpperCase()); + }; + + // Helper function to get icon for job type + const getJobIcon = (jobType: string) => { + if (jobType.includes('speech') || jobType.includes('detect')) return Brain; + if (jobType.includes('conversation') || jobType.includes('open')) return Brain; + if (jobType.includes('transcribe')) return FileText; + if (jobType.includes('speaker') || jobType.includes('recognise')) return Brain; + if (jobType.includes('memory')) return Brain; + return Brain; // Default icon + }; + + // Build dynamic pipeline from actual jobs with timing data + // Sort by start time to show chronological order + const jobsWithTiming = jobs + .filter(j => j && j.started_at) + .map(job => { + const startTime = new Date(job.started_at!).getTime(); + const endTime = job.completed_at || job.ended_at + ? new Date((job.completed_at || job.ended_at)!).getTime() + : (job.status === 'processing' ? Date.now() : startTime); + + return { + job, + startTime, + endTime, + duration: (endTime - startTime) / 1000, + name: getJobDisplayName(job.job_type), + icon: getJobIcon(job.job_type) + }; + }) + .sort((a, b) => a.startTime - b.startTime); + + const jobTimes = jobsWithTiming; + + // Find earliest start and latest end + const validTimes = jobTimes.filter(t => t !== null); + if (validTimes.length === 0) { + return ( +
No job timing data available
+ ); + } + + const earliestStart = Math.min(...validTimes.map(t => t!.startTime)); + const latestEnd = Math.max(...validTimes.map(t => t!.endTime)); + const totalDuration = (latestEnd - earliestStart) / 1000; // in seconds + + // Format duration for display + const formatDuration = (seconds: number) => { + if (seconds < 1) return `${(seconds * 1000).toFixed(0)}ms`; + if (seconds < 60) return `${seconds.toFixed(1)}s`; + const mins = Math.floor(seconds / 60); + const secs = Math.floor(seconds % 60); + return `${mins}m ${secs}s`; + }; + + // Generate time axis markers (0%, 25%, 50%, 75%, 100%) + const timeMarkers = [0, 0.25, 0.5, 0.75, 1].map(pct => ({ + percent: pct * 100, + time: formatDuration(totalDuration * pct) + })); + + return ( +
+ {/* Time axis */} +
+ {timeMarkers.map((marker, idx) => ( +
+
+
+ {marker.time} +
+
+ ))}
-
- {job.started_at && ( - Started: {new Date(job.started_at).toLocaleTimeString()} - )} - {job.started_at && ( - β€’ Duration: {formatDuration(job)} - )} + + {/* Job timeline bars */} +
+ {jobTimes.map((jobTime) => { + const { job, startTime, endTime, duration, name, icon: Icon } = jobTime; + + // Calculate position and width as percentage of total timeline + const startPercent = ((startTime - earliestStart) / (latestEnd - earliestStart)) * 100; + const widthPercent = ((endTime - startTime) / (latestEnd - earliestStart)) * 100; + + // Use job type colors + const jobColors = getJobTypeColor(job.job_type, job.status); + const barColor = jobColors.bgColor; + const borderColor = jobColors.borderColor; + + return ( +
+ {/* Stage Icon */} +
+ +
+ + {/* Stage Name */} + {name} + + {/* Timeline Container */} +
+ {/* Job Bar */} +
+ + {formatDuration(duration)} + +
+
+
+ ); + })} +
+ + {/* Total Duration */} +
+ Total: {formatDuration(totalDuration)}
- -
- ))} + ); + })()}
- ) : ( -
No jobs found for this session
- )} -
- )} -
- ); - })} -
- ) : ( -
- No completed sessions -
- )} -
-
- {/* Stream Health */} -
-

Stream Workers

-
- {streamingStatus?.stream_health && Object.entries(streamingStatus.stream_health).map(([provider, health]) => ( -
-
- {provider} - {health.error ? ( - Inactive - ) : ( - Active - )} -
+
Conversation Jobs:
+ {jobs.filter(j => j != null && j.job_id).length > 0 ? ( +
+ {jobs + .filter(j => j != null && j.job_id) + .sort((a, b) => new Date(a.created_at).getTime() - new Date(b.created_at).getTime()) + .map((job, index) => ( +
+
+
toggleJobExpansion(job.job_id)} + > + #{index + 1} + {getJobTypeIcon(job.job_type)} + {getStatusIcon(job.status)} + {job.job_type} + + {job.status} + + {job.queue || job.data?.queue || 'unknown'} + {/* Show memory count badge on collapsed card */} + {!expandedJobs.has(job.job_id) && job.job_type === 'process_memory_job' && job.result?.memories_created !== undefined && ( + + {job.result.memories_created} memories + + )} +
+ +
- {health.error ? ( -

{health.error}

- ) : ( -
-
- Stream Length: - {health.stream_length} -
-
- Pending: - 0 ? 'text-yellow-600' : 'text-green-600'}`}> - {health.total_pending} - -
- {health.consumer_groups && health.consumer_groups.map((group) => ( -
-
Consumers:
- {group.consumers.map((consumer) => ( -
- {consumer.name} - 0 ? 'text-yellow-600' : 'text-green-600'}> - {consumer.pending} pending - + {/* Collapsible metadata section */} + {expandedJobs.has(job.job_id) && ( +
+
+ {job.started_at && ( + Started: {new Date(job.started_at).toLocaleTimeString()} + )} + {job.started_at && ( + β€’ Duration: {formatDuration(job)} + )} +
+ + {/* Show job-specific metadata */} + {job.meta && ( +
+ {/* open_conversation_job metadata */} + {job.job_type === 'open_conversation_job' && ( + <> + {job.meta.word_count !== undefined && ( +
Words: {job.meta.word_count}
+ )} + {job.meta.speakers && job.meta.speakers.length > 0 && ( +
Speakers: {job.meta.speakers.join(', ')}
+ )} + {job.meta.inactivity_seconds !== undefined && ( +
Idle: {Math.floor(job.meta.inactivity_seconds)}s
+ )} + {job.meta.transcript && ( +
+ "{job.meta.transcript.substring(0, 80)}..." +
+ )} + + )} + + {/* transcribe_full_audio_job metadata */} + {job.job_type === 'transcribe_full_audio_job' && job.result && ( + <> + {job.result.transcript && ( +
Transcript: {job.result.transcript.length} chars
+ )} + {job.result.processing_time_seconds && ( +
Processing: {job.result.processing_time_seconds.toFixed(1)}s
+ )} + + )} + + {/* recognise_speakers_job metadata */} + {job.job_type === 'recognise_speakers_job' && job.result && ( + <> + {job.result.identified_speakers && job.result.identified_speakers.length > 0 && ( +
Identified: {job.result.identified_speakers.join(', ')}
+ )} + {job.result.segment_count && ( +
Segments: {job.result.segment_count}
+ )} + + )} + + {/* process_memory_job metadata */} + {job.job_type === 'process_memory_job' && job.result && ( + <> + {job.result.memories_created !== undefined && ( +
Memories: {job.result.memories_created} created
+ )} + {job.result.processing_time_seconds && ( +
Processing: {job.result.processing_time_seconds.toFixed(1)}s
+ )} + + )} + + {/* Show conversation_id if present */} + {job.meta.conversation_id && ( +
+ Conv: {job.meta.conversation_id.substring(0, 8)}... +
+ )} +
+ )} +
+ )} +
+ ))} +
+ ) : ( +
No jobs found for this conversation
+ )}
- ))} + )}
- ))} -
- )} -
- ))} + ); + })} +
+ + {/* Pagination Controls */} + {totalPages > 1 && ( +
+
+ Showing {startIndex + 1}-{Math.min(endIndex, totalConversations)} of {totalConversations} conversations +
+
+ + + Page {completedConvPage} of {totalPages} + + +
+
+ )} + + ); + })()}
@@ -1352,7 +2145,7 @@ const Queue: React.FC = () => {
- {/* Jobs Table */} + {/* Jobs Table */}

Jobs

@@ -1363,20 +2156,25 @@ const Queue: React.FC = () => { Date + Conversation ID Job ID Type Status Duration - Result Actions - {jobs.map((job) => ( + {jobs.sort((a, b) => new Date(b.created_at).getTime() - new Date(a.created_at).getTime()).map((job) => ( {new Date(job.created_at).toLocaleString('en-US', { month: 'short', day: 'numeric', hour: '2-digit', minute: '2-digit' })} + +
+ {job.meta?.conversation_id ? job.meta.conversation_id.substring(0, 8) : 'β€”'} +
+
{job.job_id} @@ -1396,33 +2194,44 @@ const Queue: React.FC = () => { {formatDuration(job)}
- - {getJobResult(job)} - - - {job.status === 'failed' && ( - - )} - - {(job.status === 'queued' || job.status === 'processing') && ( + +
+ {job.status === 'failed' && ( + + )} - )} + {(job.status === 'queued' || job.status === 'processing') && ( + + )} + {job.status === 'completed' && ( + + )} +
))} @@ -1455,6 +2264,7 @@ const Queue: React.FC = () => {
)}
+ {/* Old Jobs Table and Pagination - Removed in favor of session-based view above */} {/* Job Details Modal */} {selectedJob && ( @@ -1514,7 +2324,7 @@ const Queue: React.FC = () => { {selectedJob.args && selectedJob.args.length > 0 && (
-
+                    
                       {JSON.stringify(selectedJob.args, null, 2)}
                     
@@ -1523,7 +2333,7 @@ const Queue: React.FC = () => { {selectedJob.kwargs && Object.keys(selectedJob.kwargs).length > 0 && (
-
+                    
                       {JSON.stringify(selectedJob.kwargs, null, 2)}
                     
@@ -1532,7 +2342,7 @@ const Queue: React.FC = () => { {selectedJob.error_message && (
-
+                    
                       {selectedJob.error_message}
                     
@@ -1541,11 +2351,164 @@ const Queue: React.FC = () => { {selectedJob.result && (
-
+                    
                       {JSON.stringify(selectedJob.result, null, 2)}
                     
)} + + {/* Formatted Job Metadata - Job-specific displays */} + {selectedJob.meta && Object.keys(selectedJob.meta).length > 0 && ( +
+ + + {/* open_conversation_job formatted metadata */} + {selectedJob.func_name?.includes('open_conversation_job') && ( +
+ {selectedJob.meta.word_count !== undefined && ( +
+ Word Count: {selectedJob.meta.word_count} +
+ )} + {selectedJob.meta.speakers && selectedJob.meta.speakers.length > 0 && ( +
+ Speakers: {selectedJob.meta.speakers.join(', ')} +
+ )} + {selectedJob.meta.transcript_length !== undefined && ( +
+ Transcript Length: {selectedJob.meta.transcript_length} chars +
+ )} + {selectedJob.meta.duration_seconds !== undefined && ( +
+ Duration: {selectedJob.meta.duration_seconds.toFixed(1)}s +
+ )} + {selectedJob.meta.inactivity_seconds !== undefined && ( +
+ Idle Time: {Math.floor(selectedJob.meta.inactivity_seconds)}s +
+ )} + {selectedJob.meta.chunks_processed !== undefined && ( +
+ Chunks Processed: {selectedJob.meta.chunks_processed} +
+ )} + {selectedJob.meta.transcript && ( +
+
Transcript:
+
+ "{selectedJob.meta.transcript}" +
+
+ )} +
+ )} + + {/* process_memory_job formatted metadata */} + {selectedJob.func_name?.includes('process_memory_job') && selectedJob.meta.memory_details && selectedJob.meta.memory_details.length > 0 && ( +
+
+ Memories Created: {selectedJob.meta.memories_created || selectedJob.meta.memory_details.length} +
+ {selectedJob.meta.processing_time !== undefined && ( +
+ Processing Time: {selectedJob.meta.processing_time.toFixed(1)}s +
+ )} +
+
Memory Details:
+
+ {selectedJob.meta.memory_details.map((mem: any, idx: number) => ( +
+ {mem.text} +
+ ))} +
+
+
+ )} + + {/* stream_speech_detection_job formatted metadata */} + {selectedJob.func_name?.includes('stream_speech_detection_job') && ( +
+ {selectedJob.meta.speech_detected_at && ( +
+ Speech Detected At: {new Date(selectedJob.meta.speech_detected_at).toLocaleString()} +
+ )} + {selectedJob.meta.detected_speakers && selectedJob.meta.detected_speakers.length > 0 && ( +
+ Detected Speakers: {selectedJob.meta.detected_speakers.join(', ')} +
+ )} + {selectedJob.meta.conversation_job_id && ( +
+ Conversation Job: {selectedJob.meta.conversation_job_id} +
+ )} +
+ )} + + {/* transcribe_full_audio_job formatted metadata */} + {selectedJob.func_name?.includes('transcribe_full_audio_job') && (selectedJob.meta.title || selectedJob.meta.summary) && ( +
+ {selectedJob.meta.title && ( +
+ Title: {selectedJob.meta.title} +
+ )} + {selectedJob.meta.summary && ( +
+ Summary: {selectedJob.meta.summary} +
+ )} + {selectedJob.meta.transcript_length !== undefined && ( +
+ Transcript Length: {selectedJob.meta.transcript_length} chars +
+ )} + {selectedJob.meta.word_count !== undefined && ( +
+ Word Count: {selectedJob.meta.word_count} +
+ )} + {selectedJob.meta.processing_time !== undefined && ( +
+ Processing Time: {selectedJob.meta.processing_time.toFixed(1)}s +
+ )} +
+ )} + + {/* process_cropping_job formatted metadata */} + {selectedJob.func_name?.includes('process_cropping_job') && ( +
+ {selectedJob.meta.cropped_duration_seconds !== undefined && ( +
+ Cropped Duration: {formatSeconds(selectedJob.meta.cropped_duration_seconds)} +
+ )} + {selectedJob.meta.segments_cropped !== undefined && ( +
+ Segments Cropped: {selectedJob.meta.segments_cropped} +
+ )} +
+ )} + + {/* Raw JSON metadata (collapsible) */} +
+ + Raw Metadata JSON + +
+                        {JSON.stringify(selectedJob.meta, null, 2)}
+                      
+
+
+ )}
)}
diff --git a/backends/advanced/webui/src/pages/System.tsx b/backends/advanced/webui/src/pages/System.tsx index f5996d97..3ca54a59 100644 --- a/backends/advanced/webui/src/pages/System.tsx +++ b/backends/advanced/webui/src/pages/System.tsx @@ -9,12 +9,6 @@ interface HealthData { services: Record timestamp?: string } @@ -279,15 +273,9 @@ export default function System() { )} {(status as any).provider && ( - ({(status as any).provider} - {service === 'audioai' && (status as any).model && ` - ${(status as any).model}`}) + ({(status as any).provider}) )} - {service === 'audioai' && (status as any).embedder_model && ( -
- Embedder: {(status as any).embedder_status} ({(status as any).embedder_model}) -
- )} {service === 'redis' && (status as any).worker_count !== undefined && (
Workers: {(status as any).worker_count} total diff --git a/backends/advanced/webui/src/services/api.ts b/backends/advanced/webui/src/services/api.ts index 41a1810d..abe7678c 100644 --- a/backends/advanced/webui/src/services/api.ts +++ b/backends/advanced/webui/src/services/api.ts @@ -142,16 +142,26 @@ export const systemApi = { } export const queueApi = { - getJobs: (params: URLSearchParams) => api.get(`/api/queue/jobs?${params}`), + // Consolidated dashboard endpoint - replaces individual getJobs, getStats, getStreamingStatus calls + getDashboard: (expandedSessions: string[] = []) => api.get('/api/queue/dashboard', { + params: { expanded_sessions: expandedSessions.join(',') } + }), + + // Individual endpoints (kept for debugging and specific use cases) getJob: (jobId: string) => api.get(`/api/queue/jobs/${jobId}`), - getJobsBySession: (sessionId: string) => api.get(`/api/queue/jobs/by-session/${sessionId}`), - getStats: () => api.get('/api/queue/stats'), - getStreamingStatus: () => api.get('/api/streaming/status'), - cleanupStuckWorkers: () => api.post('/api/streaming/cleanup'), - cleanupOldSessions: (maxAgeSeconds: number = 3600) => api.post(`/api/streaming/cleanup-sessions?max_age_seconds=${maxAgeSeconds}`), retryJob: (jobId: string, force: boolean = false) => api.post(`/api/queue/jobs/${jobId}/retry`, { force }), cancelJob: (jobId: string) => api.delete(`/api/queue/jobs/${jobId}`), + + // Cleanup operations + cleanupStuckWorkers: () => api.post('/api/streaming/cleanup'), + cleanupOldSessions: (maxAgeSeconds: number = 3600) => api.post(`/api/streaming/cleanup-sessions?max_age_seconds=${maxAgeSeconds}`), + + // Legacy endpoints - kept for backward compatibility but not used in Queue page + // getJobs: (params: URLSearchParams) => api.get(`/api/queue/jobs?${params}`), + // getJobsBySession: (sessionId: string) => api.get(`/api/queue/jobs/by-session/${sessionId}`), + // getStats: () => api.get('/api/queue/stats'), + // getStreamingStatus: () => api.get('/api/streaming/status'), } export const uploadApi = { diff --git a/backends/charts/advanced-backend/templates/deployment.yaml b/backends/charts/advanced-backend/templates/deployment.yaml index 45dc2031..4082bd65 100644 --- a/backends/charts/advanced-backend/templates/deployment.yaml +++ b/backends/charts/advanced-backend/templates/deployment.yaml @@ -15,6 +15,7 @@ spec: {{- include "advanced-backend.selectorLabels" . | nindent 8 }} spec: containers: + # Main FastAPI backend - name: {{ .Chart.Name }} image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" imagePullPolicy: {{ .Values.image.pullPolicy }} @@ -60,6 +61,37 @@ spec: mountPath: /etc/ssl/certs/mem0-ca.crt subPath: ca.crt readOnly: true + + {{- if .Values.workers.sidecar.enabled }} + # Workers sidecar container (optional) + - name: {{ .Chart.Name }}-workers + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + command: ["./start-workers.sh"] + envFrom: + - configMapRef: + name: friend-lite-config + - secretRef: + name: friend-lite-secrets + env: + {{- range $key, $value := .Values.env }} + - name: {{ $key }} + value: {{ $value | quote }} + {{- end }} + resources: + {{- toYaml .Values.workers.resources | default .Values.resources | nindent 12 }} + volumeMounts: + {{- if .Values.persistence.enabled }} + - name: data-volume + mountPath: {{ .Values.persistence.mountPath }} + - name: data-volume + mountPath: /app/audio_chunks + {{- end }} + - name: mem0-ca-cert + mountPath: /etc/ssl/certs/mem0-ca.crt + subPath: ca.crt + readOnly: true + {{- end }} volumes: {{- if .Values.persistence.enabled }} - name: data-volume diff --git a/backends/charts/advanced-backend/templates/workers-deployment.yaml b/backends/charts/advanced-backend/templates/workers-deployment.yaml new file mode 100644 index 00000000..effcc10d --- /dev/null +++ b/backends/charts/advanced-backend/templates/workers-deployment.yaml @@ -0,0 +1,69 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "advanced-backend.fullname" . }}-workers + labels: + {{- include "advanced-backend.labels" . | nindent 4 }} + app.kubernetes.io/component: workers +spec: + replicas: {{ .Values.workers.replicaCount | default 1 }} + selector: + matchLabels: + {{- include "advanced-backend.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: workers + template: + metadata: + labels: + {{- include "advanced-backend.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: workers + spec: + containers: + - name: {{ .Chart.Name }}-workers + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + command: ["./start-workers.sh"] + envFrom: + - configMapRef: + name: friend-lite-config + - secretRef: + name: friend-lite-secrets + env: + # Override specific values from Kubernetes/Helm if needed + {{- range $key, $value := .Values.env }} + - name: {{ $key }} + value: {{ $value | quote }} + {{- end }} + resources: + {{- toYaml .Values.workers.resources | default .Values.resources | nindent 12 }} + volumeMounts: + {{- if .Values.persistence.enabled }} + - name: data-volume + mountPath: {{ .Values.persistence.mountPath }} + - name: data-volume + mountPath: /app/audio_chunks + {{- end }} + - name: mem0-ca-cert + mountPath: /etc/ssl/certs/mem0-ca.crt + subPath: ca.crt + readOnly: true + volumes: + {{- if .Values.persistence.enabled }} + - name: data-volume + persistentVolumeClaim: + claimName: {{ include "advanced-backend.fullname" . }}-data + {{- end }} + - name: mem0-ca-cert + configMap: + name: mem0-ca-cert + {{- with .Values.workers.nodeSelector | default .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.workers.affinity | default .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.workers.tolerations | default .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/backends/charts/advanced-backend/values.yaml b/backends/charts/advanced-backend/values.yaml index 5598c142..8827a62d 100644 --- a/backends/charts/advanced-backend/values.yaml +++ b/backends/charts/advanced-backend/values.yaml @@ -1,6 +1,26 @@ # Default values for advanced-backend replicaCount: 1 +# Workers configuration +workers: + # Option 1: Sidecar mode (workers in same pod as API) + # Simpler setup, shares volumes automatically, no separate deployment + sidecar: + enabled: true # Set to false to use separate deployment instead + + # Option 2: Separate deployment (only used if sidecar.enabled = false) + # Better for production - independent scaling and resource isolation + replicaCount: 1 + + # Worker resource limits (applies to both modes) + resources: + limits: + cpu: 2000m + memory: 4Gi + requests: + cpu: 50m # Reduced from 500m - actual usage is ~1-6m + memory: 2Gi + image: repository: advanced-backend pullPolicy: Always diff --git a/extras/speaker-recognition/docker-compose.yml b/extras/speaker-recognition/docker-compose.yml index 9118e732..c0821de5 100644 --- a/extras/speaker-recognition/docker-compose.yml +++ b/extras/speaker-recognition/docker-compose.yml @@ -108,4 +108,4 @@ services: networks: default: name: friend-network - external: true + external: true \ No newline at end of file diff --git a/extras/speaker-recognition/pyproject.toml b/extras/speaker-recognition/pyproject.toml index 673551d5..e9f6bbbf 100644 --- a/extras/speaker-recognition/pyproject.toml +++ b/extras/speaker-recognition/pyproject.toml @@ -125,4 +125,4 @@ test = [ ] [tool.isort] -profile = "black" +profile = "black" \ No newline at end of file