diff --git a/CLAUDE.md b/CLAUDE.md index 35bde2fc..8ee8193c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -178,17 +178,127 @@ Optional: 1. **Audio Ingestion**: OMI devices stream audio via WebSocket using Wyoming protocol with JWT auth 2. **Wyoming Protocol Session Management**: Clients send audio-start/audio-stop events for session boundaries 3. **Application-Level Processing**: Global queues and processors handle all audio/transcription/memory tasks -4. **Conversation Storage**: Transcripts saved to MongoDB `audio_chunks` collection with segments array -5. **Conversation Management**: Session-based conversation segmentation using Wyoming protocol events -6. **Memory Processing**: Pluggable providers (Friend-Lite native with individual facts or OpenMemory MCP delegation) -7. **Memory Storage**: Direct Qdrant (Friend-Lite) or OpenMemory server (MCP provider) -8. **Action Items**: Automatic task detection with "Simon says" trigger phrases -9. **Audio Optimization**: Speech segment extraction removes silence automatically -10. **Task Tracking**: BackgroundTaskManager ensures proper cleanup of all async operations +4. **Speech-Driven Conversation Creation**: User-facing conversations only created when speech is detected +5. **Dual Storage System**: Audio sessions always stored in `audio_chunks`, conversations created in `conversations` collection only with speech +6. **Versioned Processing**: Transcript and memory versions tracked with active version pointers +7. **Memory Processing**: Pluggable providers (Friend-Lite native with individual facts or OpenMemory MCP delegation) +8. **Memory Storage**: Direct Qdrant (Friend-Lite) or OpenMemory server (MCP provider) +9. **Action Items**: Automatic task detection with "Simon says" trigger phrases +10. **Audio Optimization**: Speech segment extraction removes silence automatically +11. **Task Tracking**: BackgroundTaskManager ensures proper cleanup of all async operations + +### Speech-Driven Architecture + +**Core Principle**: Conversations are only created when speech is detected, eliminating noise-only sessions from user interfaces. + +**Storage Architecture**: +- **`audio_chunks` Collection**: Always stores audio sessions by `audio_uuid` (raw audio capture) +- **`conversations` Collection**: Only created when speech is detected, identified by `conversation_id` +- **Speech Detection**: Analyzes transcript content, duration, and meaningfulness before conversation creation +- **Automatic Filtering**: No user-facing conversations for silence, noise, or brief audio without speech + +**Benefits**: +- Clean user experience with only meaningful conversations displayed +- Reduced noise in conversation lists and memory processing +- Efficient storage utilization for speech-only content +- Automatic quality filtering without manual intervention + +### Versioned Transcript and Memory System + +**Version Architecture**: +- **`transcript_versions`**: Array of transcript processing attempts with timestamps and providers +- **`memory_versions`**: Array of memory extraction attempts with different models/prompts +- **`active_transcript_version`**: Pointer to currently displayed transcript +- **`active_memory_version`**: Pointer to currently active memory extraction + +**Reprocessing Capabilities**: +- **Transcript Reprocessing**: Re-run speech-to-text with different providers or settings +- **Memory Reprocessing**: Re-extract memories using different LLM models or prompts +- **Version Management**: Switch between different processing results +- **Backward Compatibility**: Legacy fields auto-populated from active versions + +**Data Consistency**: +- All reprocessing operations use `conversation_id` (not `audio_uuid`) +- DateTime objects stored as ISO strings for MongoDB/JSON compatibility +- Legacy field support ensures existing integrations continue working ### Database Schema Details -- **Conversations**: Stored in `audio_chunks` collection (not `conversations`) -- **Transcript Format**: Array of segment objects with `text`, `speaker`, `start`, `end` fields + +**Collections Overview**: +- **`audio_chunks`**: All audio sessions by `audio_uuid` (always created) +- **`conversations`**: Speech-detected conversations by `conversation_id` (created conditionally) +- **`users`**: User accounts and authentication data + +**Speech-Driven Schema**: +```javascript +// audio_chunks collection (always created) +{ + "_id": ObjectId, + "audio_uuid": "uuid", // Primary identifier + "user_id": ObjectId, + "client_id": "user_suffix-device_name", + "audio_file_path": "/path/to/audio.wav", + "created_at": ISODate, + "transcript": "fallback transcript", // For non-speech audio + "segments": [...], // Speaker segments + "has_speech": boolean, // Speech detection result + "speech_analysis": {...}, // Detection metadata + "conversation_id": "conv_id" | null // Link to conversations collection +} + +// conversations collection (speech-detected only) +{ + "_id": ObjectId, + "conversation_id": "conv_uuid", // Primary identifier for user-facing operations + "audio_uuid": "audio_uuid", // Link to audio_chunks + "user_id": ObjectId, + "client_id": "user_suffix-device_name", + "created_at": ISODate, + + // Versioned Transcript System + "transcript_versions": [ + { + "version_id": "uuid", + "transcript": "text content", + "segments": [...], // Speaker diarization + "provider": "deepgram|mistral|parakeet", + "model": "nova-3|voxtral-mini-2507", + "created_at": ISODate, + "processing_time_seconds": 12.5, + "metadata": {...} + } + ], + "active_transcript_version": "uuid", // Points to current version + + // Versioned Memory System + "memory_versions": [ + { + "version_id": "uuid", + "memory_count": 5, + "transcript_version_id": "uuid", // Which transcript was used + "provider": "friend_lite|openmemory_mcp", + "model": "gpt-4o-mini|ollama-llama3", + "created_at": ISODate, + "processing_time_seconds": 45.2, + "metadata": {...} + } + ], + "active_memory_version": "uuid", // Points to current version + + // Legacy Fields (auto-populated from active versions) + "transcript": "text", // From active_transcript_version + "segments": [...], // From active_transcript_version + "memories": [...], // From active_memory_version + "memory_count": 5 // From active_memory_version +} +``` + +**Key Architecture Benefits**: +- **Clean Separation**: Raw audio storage vs user-facing conversations +- **Speech Filtering**: Only meaningful conversations appear in UI +- **Version History**: Complete audit trail of processing attempts +- **Backward Compatibility**: Legacy fields ensure existing code works +- **Reprocessing Support**: Easy to re-run with different providers/models - **Service Decoupling**: Conversation creation independent of memory processing - **Error Isolation**: Memory service failures don't affect conversation storage @@ -225,6 +335,7 @@ MEMORY_PROVIDER=friend_lite # or openmemory_mcp # Database MONGODB_URI=mongodb://mongo:27017 +# Database name: friend-lite QDRANT_BASE_URL=qdrant # Network Configuration @@ -576,6 +687,88 @@ docker compose up --build -d - You want access to OpenMemory's web interface - You're already using OpenMemory in other tools +## Versioned Processing System + +### Overview + +Friend-Lite implements a comprehensive versioning system for both transcript and memory processing, allowing multiple processing attempts with different providers, models, or settings while maintaining a clean user experience. + +### Version Data Structure + +**Transcript Versions**: +```json +{ + "transcript_versions": [ + { + "version_id": "uuid", + "transcript": "processed text", + "segments": [...], + "provider": "deepgram|mistral|parakeet", + "model": "nova-3|voxtral-mini-2507", + "created_at": "2025-01-15T10:30:00Z", + "processing_time_seconds": 12.5, + "metadata": { + "confidence_scores": [...], + "speaker_diarization": true + } + } + ], + "active_transcript_version": "uuid" +} +``` + +**Memory Versions**: +```json +{ + "memory_versions": [ + { + "version_id": "uuid", + "memory_count": 5, + "transcript_version_id": "uuid", + "provider": "friend_lite|openmemory_mcp", + "model": "gpt-4o-mini|ollama-llama3", + "created_at": "2025-01-15T10:32:00Z", + "processing_time_seconds": 45.2, + "metadata": { + "prompt_version": "v2.1", + "extraction_quality": "high" + } + } + ], + "active_memory_version": "uuid" +} +``` + +### Reprocessing Workflows + +**Transcript Reprocessing**: +1. Trigger via API: `POST /api/conversations/{conversation_id}/reprocess-transcript` +2. System creates new transcript version with different provider/model +3. New version added to `transcript_versions` array +4. User can activate any version via `activate-transcript` endpoint +5. Legacy `transcript` field automatically updated from active version + +**Memory Reprocessing**: +1. Trigger via API: `POST /api/conversations/{conversation_id}/reprocess-memory` +2. Specify which transcript version to use as input +3. System creates new memory version using specified transcript +4. New version added to `memory_versions` array +5. User can activate any version via `activate-memory` endpoint +6. Legacy `memories` field automatically updated from active version + +### Legacy Field Compatibility + +**Automatic Population**: +- `transcript`: Auto-populated from active transcript version +- `segments`: Auto-populated from active transcript version +- `memories`: Auto-populated from active memory version +- `memory_count`: Auto-populated from active memory version + +**Backward Compatibility**: +- Existing API clients continue working without modification +- WebUI displays active versions by default +- Advanced users can access version history and switch between versions + ## Development Notes ### Package Management @@ -649,7 +842,12 @@ Project includes `.cursor/rules/always-plan-first.mdc` requiring understanding b - **GET /api/memories/unfiltered**: User's memories without filtering - **GET /api/memories/search**: Semantic memory search with relevance scoring - **GET /api/conversations**: User's conversations with transcripts -- **GET /api/conversations/{audio_uuid}**: Specific conversation details +- **GET /api/conversations/{conversation_id}**: Specific conversation details +- **POST /api/conversations/{conversation_id}/reprocess-transcript**: Re-run transcript processing +- **POST /api/conversations/{conversation_id}/reprocess-memory**: Re-extract memories with different parameters +- **GET /api/conversations/{conversation_id}/versions**: Get all transcript and memory versions +- **POST /api/conversations/{conversation_id}/activate-transcript**: Switch to a different transcript version +- **POST /api/conversations/{conversation_id}/activate-memory**: Switch to a different memory version ### Client Management - **GET /api/clients/active**: Currently active WebSocket clients @@ -661,12 +859,11 @@ Project includes `.cursor/rules/always-plan-first.mdc` requiring understanding b - Client timeout: 5 minutes, Server processing: up to 3x audio duration + 60s - Example usage: ```bash - # Get admin credentials from .env file first + # Step 1: Read .env file for ADMIN_EMAIL and ADMIN_PASSWORD + # Step 2: Get auth token + # Step 3: Use token in file upload curl -X POST \ - -H "Authorization: Bearer $(curl -s -X POST \ - -H "Content-Type: application/x-www-form-urlencoded" \ - -d "username=ADMIN_EMAIL&password=ADMIN_PASSWORD" \ - http://localhost:8000/auth/jwt/login | jq -r '.access_token')" \ + -H "Authorization: Bearer YOUR_TOKEN_HERE" \ -F "files=@/path/to/audio.wav" \ -F "device_name=test-upload" \ http://localhost:8000/api/process-audio-files @@ -677,13 +874,92 @@ Project includes `.cursor/rules/always-plan-first.mdc` requiring understanding b - **GET /users/me**: Get current authenticated user - **GET /api/auth/config**: Authentication configuration +### Step-by-Step API Testing Guide + +When testing API endpoints that require authentication, follow these steps: + +#### Step 1: Read credentials from .env file +```bash +# Use the Read tool to view the .env file and identify credentials +# Look for: +# ADMIN_EMAIL=admin@example.com +# ADMIN_PASSWORD=your-password-here +``` + +#### Step 2: Get authentication token +```bash +curl -s -X POST \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -d "username=admin@example.com&password=your-password-here" \ + http://localhost:8000/auth/jwt/login +``` +This returns: +```json +{"access_token":"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...","token_type":"bearer"} +``` + +#### Step 3: Use the token in API calls +```bash +# Extract the token from the response above and use it: +curl -s -H "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9..." \ + http://localhost:8000/api/conversations + +# For reprocessing endpoints: +curl -s -X POST \ + -H "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9..." \ + -H "Content-Type: application/json" \ + http://localhost:8000/api/conversations/{conversation_id}/reprocess-transcript +``` + +**Important**: Always read the .env file first using the Read tool rather than using shell commands like `grep` or `cut`. This ensures you see the exact values and can copy them accurately. + +#### Step 4: Testing Reprocessing Endpoints +Once you have the auth token, you can test the reprocessing functionality: + +```bash +# Get list of conversations to find a conversation_id +curl -s -H "Authorization: Bearer YOUR_TOKEN" \ + http://localhost:8000/api/conversations + +# Test transcript reprocessing (uses conversation_id) +curl -s -X POST \ + -H "Authorization: Bearer YOUR_TOKEN" \ + -H "Content-Type: application/json" \ + http://localhost:8000/api/conversations/YOUR_CONVERSATION_ID/reprocess-transcript + +# Test memory reprocessing (uses conversation_id and transcript_version_id) +curl -s -X POST \ + -H "Authorization: Bearer YOUR_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"transcript_version_id": "VERSION_ID"}' \ + http://localhost:8000/api/conversations/YOUR_CONVERSATION_ID/reprocess-memory + +# Get transcript and memory versions +curl -s -H "Authorization: Bearer YOUR_TOKEN" \ + http://localhost:8000/api/conversations/YOUR_CONVERSATION_ID/versions + +# Activate a specific transcript version +curl -s -X POST \ + -H "Authorization: Bearer YOUR_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"transcript_version_id": "VERSION_ID"}' \ + http://localhost:8000/api/conversations/YOUR_CONVERSATION_ID/activate-transcript + +# Activate a specific memory version +curl -s -X POST \ + -H "Authorization: Bearer YOUR_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"memory_version_id": "VERSION_ID"}' \ + http://localhost:8000/api/conversations/YOUR_CONVERSATION_ID/activate-memory +``` + ### Development Reset Endpoints Useful endpoints for resetting state during development: #### Data Cleanup - **DELETE /api/admin/memory/delete-all**: Delete all memories for the current user - **DELETE /api/memories/{memory_id}**: Delete a specific memory -- **DELETE /api/conversations/{audio_uuid}**: Delete a specific conversation and its audio file +- **DELETE /api/conversations/{conversation_id}**: Delete a specific conversation (keeps original audio file in audio_chunks) - **DELETE /api/chat/sessions/{session_id}**: Delete a chat session and all its messages - **DELETE /api/users/{user_id}**: Delete a user (Admin only) - Optional query params: `delete_conversations=true`, `delete_memories=true` @@ -851,7 +1127,7 @@ OPENAI_BASE_URL=http://100.64.1.100:8080 # For vLLM/OpenAI-compatible APIs SPEAKER_SERVICE_URL=http://100.64.1.100:8085 # Database services (can be on separate machine) -MONGODB_URI=mongodb://100.64.1.200:27017 +MONGODB_URI=mongodb://100.64.1.200:27017 # Database name: friend-lite QDRANT_BASE_URL=http://100.64.1.200:6333 ``` diff --git a/backends/advanced/Docs/architecture.md b/backends/advanced/Docs/architecture.md index 78435ef1..c6cdf996 100644 --- a/backends/advanced/Docs/architecture.md +++ b/backends/advanced/Docs/architecture.md @@ -4,7 +4,7 @@ ## System Overview -Friend-Lite is a comprehensive real-time conversation processing system that captures audio streams, performs speech-to-text transcription, and extracts memories. The system features a FastAPI backend with WebSocket audio streaming, a modern React web dashboard with advanced search capabilities, and complete user authentication with role-based access control. +Friend-Lite is a comprehensive real-time conversation processing system that captures audio streams, performs speech-to-text transcription, and extracts memories using a speech-driven architecture. The system features a FastAPI backend with WebSocket audio streaming, versioned transcript and memory processing, a modern React web dashboard with advanced search capabilities, and complete user authentication with role-based access control. **Core Implementation**: The complete system is implemented in `src/advanced_omi_backend/main.py` with supporting services in dedicated modules, using a modular router/controller architecture pattern. @@ -67,13 +67,14 @@ graph TB %% Storage Systems subgraph "Storage" FS[File System
Audio Files] - + subgraph "MongoDB" - AC_COL[audio_chunks
conversations] + AC_COL[audio_chunks
All Audio Sessions] + CONV_COL[conversations
Speech-Detected Only] USERS[users] CONV_REPO[ConversationRepository
Clean Data Access] end - + subgraph "Qdrant" MEM[Memory Vectors] end @@ -769,13 +770,16 @@ flowchart TB ### Audio Ingestion & Processing 1. **Client Authentication**: JWT token validation for WebSocket connection (email or user_id based) -2. **Client ID Generation**: Automatic `user_id-device_name` format creation for client identification +2. **Client ID Generation**: Automatic `user_id-device_name` format creation for client identification 3. **Permission Registration**: Client-user relationship tracking in permission dictionaries 4. **Audio Streaming**: Real-time Opus/PCM packets over WebSocket with user context 5. **Per-Client Processing**: Isolated audio queues and state management per user -6. **Transcription Pipeline**: Configurable ASR service integration with user-scoped storage -7. **Conversation Lifecycle**: Automatic timeout handling and memory processing -8. **Audio Optimization**: Speech segment extraction and silence removal +6. **Audio Session Storage**: All audio sessions stored in `audio_chunks` collection by `audio_uuid` +7. **Speech Detection**: Automatic analysis of transcript content for meaningful speech +8. **Conversation Creation**: User-facing conversations created in `conversations` collection only when speech detected +9. **Transcription Pipeline**: Configurable ASR service integration with versioned storage +10. **Conversation Lifecycle**: Automatic timeout handling and memory processing +11. **Audio Optimization**: Speech segment extraction and silence removal #### Critical Timing Considerations **Transcription Manager Creation Race Condition**: When processing uploaded audio files, there's a timing dependency between: @@ -792,13 +796,16 @@ flowchart TB 4. Client audio closure triggers transcript completion ### Memory & Intelligence Processing -1. **Conversation Completion**: End-of-session trigger for memory extraction +1. **Conversation Completion**: End-of-session trigger for memory extraction (speech-detected conversations only) 2. **Transcript Validation**: Multi-layer validation prevents empty/short transcripts from reaching LLM implemented in memory controller 3. **User Resolution**: Client-ID to database user mapping for proper data association -4. **LLM Processing**: Ollama-based conversation summarization with user context (only for validated transcripts) -5. **Vector Storage**: Semantic embeddings stored in Qdrant keyed by user_id -6. **Metadata Enhancement**: Client information and user email stored in metadata -7. **Search & Retrieval**: User-scoped semantic memory search capabilities +4. **Versioned Processing**: Multiple transcript and memory versions with provider/model tracking +5. **LLM Processing**: Ollama-based conversation summarization with user context (only for validated transcripts) +6. **Vector Storage**: Semantic embeddings stored in Qdrant keyed by user_id +7. **Metadata Enhancement**: Client information and user email stored in metadata +8. **Reprocessing Capabilities**: Re-run transcript or memory extraction with different parameters +9. **Version Management**: Active version pointers with automatic legacy field population +10. **Search & Retrieval**: User-scoped semantic memory search capabilities ### User Management & Security 1. **Registration**: Admin-controlled user creation with email/password and auto-generated user_id @@ -958,7 +965,12 @@ src/advanced_omi_backend/ /api/ ├── /users # User management (admin only) ├── /clients/active # Active client monitoring -├── /conversations # Conversation CRUD operations +├── /conversations # Conversation CRUD operations (speech-detected only) +│ ├── /{conversation_id}/reprocess-transcript # Transcript reprocessing +│ ├── /{conversation_id}/reprocess-memory # Memory reprocessing +│ ├── /{conversation_id}/versions # Version history +│ ├── /{conversation_id}/activate-transcript # Switch transcript version +│ └── /{conversation_id}/activate-memory # Switch memory version ├── /memories # Memory management and search │ ├── /admin # Admin view (all users) │ └── /search # Semantic memory search @@ -984,7 +996,13 @@ src/advanced_omi_backend/ - `GET /api/memories/search?query=` - Semantic memory search #### Audio & Conversations -- `GET /api/conversations` - User conversations +- `GET /api/conversations` - User conversations (speech-detected only) +- `GET /api/conversations/{conversation_id}` - Specific conversation details +- `POST /api/conversations/{conversation_id}/reprocess-transcript` - Re-run transcript processing +- `POST /api/conversations/{conversation_id}/reprocess-memory` - Re-extract memories +- `GET /api/conversations/{conversation_id}/versions` - Get version history +- `POST /api/conversations/{conversation_id}/activate-transcript` - Switch transcript version +- `POST /api/conversations/{conversation_id}/activate-memory` - Switch memory version - `POST /api/process-audio-files` - Batch audio file processing - WebSocket `/ws_omi` - Real-time Opus audio streaming with Wyoming protocol (OMI devices) - WebSocket `/ws_pcm` - Real-time PCM audio streaming with Wyoming protocol (all apps) @@ -1004,13 +1022,38 @@ src/advanced_omi_backend/ "client_id": "cd7994-laptop", "user_id": "507f1f77bcf86cd799439011", "connected_at": "2025-01-15T10:30:00Z", - "conversation_count": 3 + "conversation_count": 3 // speech-detected conversations only } ], "active_clients_count": 1, "total_count": 1 } +// Conversation with versions response +{ + "conversation_id": "conv_12345", + "audio_uuid": "audio_67890", + "transcript": "Active transcript content", // from active version + "active_transcript_version": "version_abc", + "active_memory_version": "version_def", + "transcript_versions": [ + { + "version_id": "version_abc", + "provider": "deepgram", + "created_at": "2025-01-15T10:30:00Z", + "transcript": "Processed content" + } + ], + "memory_versions": [ + { + "version_id": "version_def", + "provider": "friend_lite", + "created_at": "2025-01-15T10:32:00Z", + "memory_count": 5 + } + ] +} + // Admin memories response { "memories": [...], // Flat list for compatibility @@ -1039,4 +1082,4 @@ src/advanced_omi_backend/ - **Error Handling**: Graceful degradation with detailed logging - **System Tracking**: Debug tracking and monitoring via SystemTracker -This architecture supports a fully-featured conversation processing system with enterprise-grade authentication, real-time audio processing, and intelligent content analysis, all deployable via a single Docker Compose command. \ No newline at end of file +This architecture supports a fully-featured conversation processing system with enterprise-grade authentication, real-time audio processing, speech-driven conversation creation, versioned transcript and memory processing, and intelligent content analysis, all deployable via a single Docker Compose command. \ No newline at end of file diff --git a/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py index 0f4ca289..0db39a98 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py @@ -3,6 +3,7 @@ """ import asyncio +import hashlib import logging import time from pathlib import Path @@ -16,15 +17,17 @@ client_belongs_to_user, get_user_clients_all, ) -from advanced_omi_backend.database import AudioChunksRepository, chunks_col, conversations_col, ConversationsRepository +from advanced_omi_backend.database import AudioChunksRepository, ProcessingRunsRepository, chunks_col, processing_runs_col, conversations_col, ConversationsRepository from advanced_omi_backend.users import User from fastapi.responses import JSONResponse logger = logging.getLogger(__name__) audio_logger = logging.getLogger("audio_processing") -# Initialize chunk repository +# Initialize repositories chunk_repo = AudioChunksRepository(chunks_col) +processing_runs_repo = ProcessingRunsRepository(processing_runs_col) +conversations_repo = ConversationsRepository(conversations_col) async def close_current_conversation(client_id: str, user: User, client_manager: ClientManager): @@ -244,14 +247,33 @@ async def reprocess_audio_cropping(audio_uuid: str, user: User): status_code=400, content={"error": "No audio file found for this conversation"} ) - # Check if file exists - if not Path(audio_path).exists(): - return JSONResponse(status_code=404, content={"error": "Audio file not found on disk"}) + # Check if file exists - try multiple possible locations + possible_paths = [ + Path("/app/data/audio_chunks") / audio_path, + Path("/app/audio_chunks") / audio_path, + Path(audio_path), # fallback to relative path + ] + + full_audio_path = None + for path in possible_paths: + if path.exists(): + full_audio_path = path + break + + if not full_audio_path: + return JSONResponse( + status_code=422, + content={ + "error": "Audio file not found on disk", + "details": f"Conversation exists but audio file '{audio_path}' is missing from expected locations", + "searched_paths": [str(p) for p in possible_paths] + } + ) # Reprocess the audio cropping try: result = await asyncio.get_running_loop().run_in_executor( - None, _process_audio_cropping_with_relative_timestamps, audio_path, audio_uuid + None, _process_audio_cropping_with_relative_timestamps, str(full_audio_path), audio_uuid ) if result: @@ -375,27 +397,28 @@ async def delete_conversation(audio_uuid: str, user: User): logger.debug(f"Total conversations in collection: {total_count}") logger.debug(f"UUID length: {len(audio_uuid)}, type: {type(audio_uuid)}") - # First, get the conversation to check ownership - conversation = await chunks_col.find_one({"audio_uuid": audio_uuid}) + # First, get the audio chunk record to check ownership and get conversation_id + audio_chunk = await chunks_col.find_one({"audio_uuid": audio_uuid}) if logger.isEnabledFor(logging.DEBUG): - logger.debug(f"Conversation lookup result: {'found' if conversation else 'not found'}") - if conversation: - logger.debug(f"Found conversation with client_id: {conversation.get('client_id')}") + logger.debug(f"Audio chunk lookup result: {'found' if audio_chunk else 'not found'}") + if audio_chunk: + logger.debug(f"Found audio chunk with client_id: {audio_chunk.get('client_id')}") + logger.debug(f"Audio chunk has conversation_id: {audio_chunk.get('conversation_id')}") else: # Try alternative queries for debugging regex_result = await chunks_col.find_one({"audio_uuid": {"$regex": f"^{audio_uuid}$", "$options": "i"}}) contains_result = await chunks_col.find_one({"audio_uuid": {"$regex": audio_uuid}}) logger.debug(f"Alternative query attempts - case insensitive: {'found' if regex_result else 'not found'}, substring: {'found' if contains_result else 'not found'}") - - if not conversation: + + if not audio_chunk: return JSONResponse( status_code=404, - content={"error": f"Conversation with audio_uuid '{audio_uuid}' not found"} + content={"error": f"Audio chunk with audio_uuid '{audio_uuid}' not found"} ) # Check if user has permission to delete this conversation - client_id = conversation.get("client_id") + client_id = audio_chunk.get("client_id") if not user.is_superuser and not client_belongs_to_user(client_id, user.user_id): logger.warning( f"User {user.user_id} attempted to delete conversation {audio_uuid} without permission" @@ -408,19 +431,37 @@ async def delete_conversation(audio_uuid: str, user: User): } ) - # Get audio file path for deletion - audio_path = conversation.get("audio_path") - cropped_audio_path = conversation.get("cropped_audio_path") - - # Delete the conversation from database - result = await chunks_col.delete_one({"audio_uuid": audio_uuid}) - - if result.deleted_count == 0: + # Get audio file paths for deletion + audio_path = audio_chunk.get("audio_path") + cropped_audio_path = audio_chunk.get("cropped_audio_path") + + # Get conversation_id if this audio chunk has an associated conversation + conversation_id = audio_chunk.get("conversation_id") + conversation_deleted = False + + # Delete from audio_chunks collection first + audio_result = await chunks_col.delete_one({"audio_uuid": audio_uuid}) + + if audio_result.deleted_count == 0: return JSONResponse( status_code=404, - content={"error": f"Conversation with audio_uuid '{audio_uuid}' not found"} + content={"error": f"Failed to delete audio chunk with audio_uuid '{audio_uuid}'"} ) + logger.info(f"Deleted audio chunk {audio_uuid}") + + # If this audio chunk has an associated conversation, delete it from conversations collection too + if conversation_id: + try: + conversation_result = await conversations_col.delete_one({"conversation_id": conversation_id}) + if conversation_result.deleted_count > 0: + conversation_deleted = True + logger.info(f"Deleted conversation {conversation_id} associated with audio chunk {audio_uuid}") + else: + logger.warning(f"Conversation {conversation_id} not found in conversations collection, but audio chunk was deleted") + except Exception as e: + logger.warning(f"Failed to delete conversation {conversation_id}: {e}") + # Delete associated audio files deleted_files = [] if audio_path: @@ -446,13 +487,23 @@ async def delete_conversation(audio_uuid: str, user: User): logger.warning(f"Failed to delete cropped audio file {cropped_audio_path}: {e}") logger.info(f"Successfully deleted conversation {audio_uuid} for user {user.user_id}") - + + # Prepare response message + delete_summary = [] + delete_summary.append("audio chunk") + if conversation_deleted: + delete_summary.append("conversation record") + if deleted_files: + delete_summary.append(f"{len(deleted_files)} audio file(s)") + return JSONResponse( status_code=200, content={ - "message": f"Conversation '{audio_uuid}' deleted successfully", + "message": f"Successfully deleted {', '.join(delete_summary)} for '{audio_uuid}'", "deleted_files": deleted_files, - "client_id": client_id + "client_id": client_id, + "conversation_id": conversation_id, + "conversation_deleted": conversation_deleted } ) @@ -462,3 +513,280 @@ async def delete_conversation(audio_uuid: str, user: User): status_code=500, content={"error": f"Failed to delete conversation: {str(e)}"} ) + + +async def reprocess_transcript(conversation_id: str, user: User): + """Reprocess transcript for a conversation. Users can only reprocess their own conversations.""" + try: + # Find the conversation in conversations collection + conversations_repo = ConversationsRepository(conversations_col) + conversation = await conversations_repo.get_conversation(conversation_id) + if not conversation: + return JSONResponse(status_code=404, content={"error": "Conversation not found"}) + + # Check ownership for non-admin users + if not user.is_superuser and conversation["user_id"] != str(user.user_id): + return JSONResponse(status_code=403, content={"error": "Access forbidden. You can only reprocess your own conversations."}) + + # Get audio_uuid for file access + audio_uuid = conversation["audio_uuid"] + + # Get audio file path from audio_chunks collection + chunk = await chunks_col.find_one({"audio_uuid": audio_uuid}) + if not chunk: + return JSONResponse(status_code=404, content={"error": "Audio session not found"}) + + audio_path = chunk.get("audio_path") + if not audio_path: + return JSONResponse( + status_code=400, content={"error": "No audio file found for this conversation"} + ) + + # Check if file exists - try multiple possible locations + possible_paths = [ + Path("/app/data/audio_chunks") / audio_path, + Path("/app/audio_chunks") / audio_path, + Path(audio_path), # fallback to relative path + ] + + full_audio_path = None + for path in possible_paths: + if path.exists(): + full_audio_path = path + break + + if not full_audio_path: + return JSONResponse( + status_code=422, + content={ + "error": "Audio file not found on disk", + "details": f"Conversation exists but audio file '{audio_path}' is missing from expected locations", + "searched_paths": [str(p) for p in possible_paths] + } + ) + + # Generate configuration hash for duplicate detection + config_data = { + "audio_path": str(full_audio_path), + "transcription_provider": "deepgram", # This would come from settings + "trigger": "manual_reprocess" + } + config_hash = hashlib.sha256(str(config_data).encode()).hexdigest()[:16] + + # Create processing run + run_id = await processing_runs_repo.create_run( + conversation_id=conversation_id, + audio_uuid=audio_uuid, + run_type="transcript", + user_id=user.user_id, + trigger="manual_reprocess", + config_hash=config_hash + ) + + # Create new transcript version in conversations collection + version_id = await conversations_repo.create_transcript_version( + conversation_id=conversation_id, + processing_run_id=run_id + ) + + if not version_id: + return JSONResponse( + status_code=500, content={"error": "Failed to create transcript version"} + ) + + # TODO: Queue audio for reprocessing with ProcessorManager + # This is where we would integrate with the existing processor + # For now, we'll return the version ID for the caller to handle + + logger.info(f"Created transcript reprocessing job {run_id} (version {version_id}) for conversation {conversation_id}") + + return JSONResponse(content={ + "message": f"Transcript reprocessing started for conversation {conversation_id}", + "run_id": run_id, + "version_id": version_id, + "config_hash": config_hash, + "status": "PENDING" + }) + + except Exception as e: + logger.error(f"Error starting transcript reprocessing: {e}") + return JSONResponse(status_code=500, content={"error": "Error starting transcript reprocessing"}) + + +async def reprocess_memory(conversation_id: str, transcript_version_id: str, user: User): + """Reprocess memory extraction for a specific transcript version. Users can only reprocess their own conversations.""" + try: + # Find the conversation in conversations collection + conversations_repo = ConversationsRepository(conversations_col) + conversation = await conversations_repo.get_conversation(conversation_id) + if not conversation: + return JSONResponse(status_code=404, content={"error": "Conversation not found"}) + + # Check ownership for non-admin users + if not user.is_superuser and conversation["user_id"] != str(user.user_id): + return JSONResponse(status_code=403, content={"error": "Access forbidden. You can only reprocess your own conversations."}) + + # Get audio_uuid for processing run tracking + audio_uuid = conversation["audio_uuid"] + + # Resolve transcript version ID + transcript_versions = conversation.get("transcript_versions", []) + + # Handle special "active" version ID + if transcript_version_id == "active": + active_version_id = conversation.get("active_transcript_version") + if not active_version_id: + return JSONResponse( + status_code=404, content={"error": "No active transcript version found"} + ) + transcript_version_id = active_version_id + + # Find the specific transcript version + transcript_version = None + for version in transcript_versions: + if version["version_id"] == transcript_version_id: + transcript_version = version + break + + if not transcript_version: + return JSONResponse( + status_code=404, content={"error": f"Transcript version '{transcript_version_id}' not found"} + ) + + # Generate configuration hash for duplicate detection + config_data = { + "transcript_version_id": transcript_version_id, + "memory_provider": "friend_lite", # This would come from settings + "trigger": "manual_reprocess" + } + config_hash = hashlib.sha256(str(config_data).encode()).hexdigest()[:16] + + # Create processing run + run_id = await processing_runs_repo.create_run( + conversation_id=conversation_id, + audio_uuid=audio_uuid, + run_type="memory", + user_id=user.user_id, + trigger="manual_reprocess", + config_hash=config_hash + ) + + # Create new memory version in conversations collection + version_id = await conversations_repo.create_memory_version( + conversation_id=conversation_id, + transcript_version_id=transcript_version_id, + processing_run_id=run_id + ) + + if not version_id: + return JSONResponse( + status_code=500, content={"error": "Failed to create memory version"} + ) + + # TODO: Queue memory extraction for processing + # This is where we would integrate with the existing memory processor + + logger.info(f"Created memory reprocessing job {run_id} (version {version_id}) for conversation {conversation_id}") + + return JSONResponse(content={ + "message": f"Memory reprocessing started for conversation {conversation_id}", + "run_id": run_id, + "version_id": version_id, + "transcript_version_id": transcript_version_id, + "config_hash": config_hash, + "status": "PENDING" + }) + + except Exception as e: + logger.error(f"Error starting memory reprocessing: {e}") + return JSONResponse(status_code=500, content={"error": "Error starting memory reprocessing"}) + + +async def activate_transcript_version(conversation_id: str, version_id: str, user: User): + """Activate a specific transcript version. Users can only modify their own conversations.""" + try: + # Find the conversation in conversations collection + conversations_repo = ConversationsRepository(conversations_col) + conversation = await conversations_repo.get_conversation(conversation_id) + if not conversation: + return JSONResponse(status_code=404, content={"error": "Conversation not found"}) + + # Check ownership for non-admin users + if not user.is_superuser and conversation["user_id"] != str(user.user_id): + return JSONResponse(status_code=403, content={"error": "Access forbidden. You can only modify your own conversations."}) + + # Activate the transcript version + success = await conversations_repo.activate_transcript_version(conversation_id, version_id) + if not success: + return JSONResponse( + status_code=400, content={"error": "Failed to activate transcript version"} + ) + + # TODO: Trigger speaker recognition if configured + # This would integrate with existing speaker recognition logic + + logger.info(f"Activated transcript version {version_id} for conversation {conversation_id} by user {user.user_id}") + + return JSONResponse(content={ + "message": f"Transcript version {version_id} activated successfully", + "active_transcript_version": version_id + }) + + except Exception as e: + logger.error(f"Error activating transcript version: {e}") + return JSONResponse(status_code=500, content={"error": "Error activating transcript version"}) + + +async def activate_memory_version(conversation_id: str, version_id: str, user: User): + """Activate a specific memory version. Users can only modify their own conversations.""" + try: + # Find the conversation in conversations collection + conversations_repo = ConversationsRepository(conversations_col) + conversation = await conversations_repo.get_conversation(conversation_id) + if not conversation: + return JSONResponse(status_code=404, content={"error": "Conversation not found"}) + + # Check ownership for non-admin users + if not user.is_superuser and conversation["user_id"] != str(user.user_id): + return JSONResponse(status_code=403, content={"error": "Access forbidden. You can only modify your own conversations."}) + + # Activate the memory version + success = await conversations_repo.activate_memory_version(conversation_id, version_id) + if not success: + return JSONResponse( + status_code=400, content={"error": "Failed to activate memory version"} + ) + + logger.info(f"Activated memory version {version_id} for conversation {conversation_id} by user {user.user_id}") + + return JSONResponse(content={ + "message": f"Memory version {version_id} activated successfully", + "active_memory_version": version_id + }) + + except Exception as e: + logger.error(f"Error activating memory version: {e}") + return JSONResponse(status_code=500, content={"error": "Error activating memory version"}) + + +async def get_conversation_version_history(conversation_id: str, user: User): + """Get version history for a conversation. Users can only access their own conversations.""" + try: + # Find the conversation in conversations collection to check ownership + conversations_repo = ConversationsRepository(conversations_col) + conversation = await conversations_repo.get_conversation(conversation_id) + if not conversation: + return JSONResponse(status_code=404, content={"error": "Conversation not found"}) + + # Check ownership for non-admin users + if not user.is_superuser and conversation["user_id"] != str(user.user_id): + return JSONResponse(status_code=403, content={"error": "Access forbidden. You can only access your own conversations."}) + + # Get version history + history = await conversations_repo.get_version_history(conversation_id) + + return JSONResponse(content=history) + + except Exception as e: + logger.error(f"Error fetching version history: {e}") + return JSONResponse(status_code=500, content={"error": "Error fetching version history"}) diff --git a/backends/advanced/src/advanced_omi_backend/database.py b/backends/advanced/src/advanced_omi_backend/database.py index c75681d7..39f3b8a1 100644 --- a/backends/advanced/src/advanced_omi_backend/database.py +++ b/backends/advanced/src/advanced_omi_backend/database.py @@ -9,6 +9,8 @@ import os import time from datetime import UTC, datetime +from typing import Optional +import uuid from motor.motor_asyncio import AsyncIOMotorClient @@ -21,6 +23,7 @@ # Collection references chunks_col = db["audio_chunks"] +processing_runs_col = db["processing_runs"] users_col = db["users"] speakers_col = db["speakers"] conversations_col = db["conversations"] @@ -35,6 +38,7 @@ def get_collections(): """Get commonly used collection references.""" return { "chunks_col": chunks_col, + "processing_runs_col": processing_runs_col, "users_col": users_col, "speakers_col": speakers_col, "conversations_col": conversations_col, @@ -62,6 +66,40 @@ async def create_chunk( transcription_status="PENDING", memory_processing_status="PENDING", ): + # Create initial transcript version if provided + transcript_versions = [] + active_transcript_version = None + + if transcript: + version_id = str(uuid.uuid4()) + transcript_versions.append({ + "version_id": version_id, + "segments": transcript, + "status": transcription_status, + "provider": None, + "created_at": datetime.now(UTC).isoformat(), + "processing_run_id": None, + "raw_data": {}, + "speakers_identified": speakers_identified or [] + }) + active_transcript_version = version_id + + # Create initial memory version if provided + memory_versions = [] + active_memory_version = None + + if memories: + version_id = str(uuid.uuid4()) + memory_versions.append({ + "version_id": version_id, + "memories": memories, + "status": memory_processing_status, + "created_at": datetime.now(UTC).isoformat(), + "processing_run_id": None, + "transcript_version_id": active_transcript_version + }) + active_memory_version = version_id + doc = { "audio_uuid": audio_uuid, "audio_path": audio_path, @@ -69,20 +107,76 @@ async def create_chunk( "timestamp": timestamp, "user_id": user_id, "user_email": user_email, - "transcript": transcript or [], # List of conversation segments - "speakers_identified": speakers_identified or [], # List of identified speakers - "memories": memories or [], # List of memory references created from this audio - "transcription_status": transcription_status, # PENDING, COMPLETED, FAILED, EMPTY - "memory_processing_status": memory_processing_status, # PENDING, COMPLETED, FAILED, SKIPPED - "raw_transcript_data": {}, # Raw response from transcription provider + + # Versioned transcript data + "transcript_versions": transcript_versions, + "active_transcript_version": active_transcript_version, + + # Versioned memory data + "memory_versions": memory_versions, + "active_memory_version": active_memory_version, + + # Compatibility fields (computed from active versions) + "transcript": transcript or [], + "speakers_identified": speakers_identified or [], + "memories": memories or [], + "transcription_status": transcription_status, + "memory_processing_status": memory_processing_status, + "raw_transcript_data": {} } await self.col.insert_one(doc) async def add_transcript_segment(self, audio_uuid, transcript_segment): - """Add a single transcript segment to the conversation.""" - await self.col.update_one( - {"audio_uuid": audio_uuid}, {"$push": {"transcript": transcript_segment}} - ) + """Add a single transcript segment to the conversation. + + Interface compatibility method - adds to active transcript version. + Creates first transcript version if none exists. + """ + chunk = await self.get_chunk(audio_uuid) + if not chunk: + return False + + active_version = chunk.get("active_transcript_version") + if not active_version: + # Create initial version if none exists + version_id = str(uuid.uuid4()) + version_data = { + "version_id": version_id, + "segments": [transcript_segment], + "status": "PENDING", + "provider": None, + "created_at": datetime.now(UTC).isoformat(), + "processing_run_id": None, + "raw_data": {}, + "speakers_identified": [] + } + + result = await self.col.update_one( + {"audio_uuid": audio_uuid}, + { + "$push": {"transcript_versions": version_data}, + "$set": { + "active_transcript_version": version_id, + # Update compatibility field too + "transcript": [transcript_segment] + } + } + ) + else: + # Add to existing active version + result = await self.col.update_one( + {"audio_uuid": audio_uuid}, + { + "$push": { + f"transcript_versions.$[version].segments": transcript_segment, + # Update compatibility field too + "transcript": transcript_segment + } + }, + array_filters=[{"version.version_id": active_version}] + ) + + return result.modified_count > 0 async def add_speaker(self, audio_uuid, speaker_id): """Add a speaker to the speakers_identified list if not already present.""" @@ -154,10 +248,26 @@ async def get_chunk_by_audio_uuid(self, audio_uuid: str): return await self.col.find_one({"audio_uuid": audio_uuid}) async def get_transcript_segments(self, audio_uuid: str): - """Get transcript segments for a specific audio UUID.""" - document = await self.col.find_one({"audio_uuid": audio_uuid}, {"transcript": 1}) - if document and "transcript" in document: + """Get transcript segments for a specific audio UUID from active version.""" + document = await self.col.find_one( + {"audio_uuid": audio_uuid}, + {"transcript_versions": 1, "active_transcript_version": 1, "transcript": 1} + ) + + if not document: + return [] + + # Try to get from active version first (new versioned approach) + active_version_id = document.get("active_transcript_version") + if active_version_id and "transcript_versions" in document: + for version in document["transcript_versions"]: + if version.get("version_id") == active_version_id: + return version.get("segments", []) + + # Fallback to legacy transcript field for backward compatibility + if "transcript" in document: return document["transcript"] + return [] async def update_transcript(self, audio_uuid, full_transcript): @@ -214,40 +324,64 @@ async def update_cropped_audio( logger.info(f"Updated cropped audio info for {audio_uuid}: {cropped_path}") return result.modified_count > 0 - async def update_transcription_status( - self, audio_uuid: str, status: str, provider: str = None, error_message: str = None - ): - """Update transcription status and completion timestamp.""" - update_doc = { - "transcription_status": status, - "transcription_updated_at": datetime.now(UTC).isoformat(), - } - if provider: - update_doc["transcription_provider"] = provider - if status == "COMPLETED": - update_doc["transcription_completed_at"] = datetime.now(UTC).isoformat() - if error_message: - update_doc["transcription_error"] = error_message - - result = await self.col.update_one({"audio_uuid": audio_uuid}, {"$set": update_doc}) - if result.modified_count > 0: - logger.info(f"Updated transcription status to {status} for {audio_uuid}") - return result.modified_count > 0 async def update_memory_processing_status( self, audio_uuid: str, status: str, error_message: str = None ): - """Update memory processing status and completion timestamp.""" - update_doc = { - "memory_processing_status": status, - "memory_processing_updated_at": datetime.now(UTC).isoformat(), - } - if status == "COMPLETED": - update_doc["memory_processing_completed_at"] = datetime.now(UTC).isoformat() - if error_message: - update_doc["memory_processing_error"] = error_message + """Update memory processing status and completion timestamp. + + Interface compatibility method - updates active memory version. + """ + chunk = await self.get_chunk(audio_uuid) + if not chunk: + return False + + active_version = chunk.get("active_memory_version") + if not active_version: + # Create initial memory version if none exists + version_id = str(uuid.uuid4()) + version_data = { + "version_id": version_id, + "memories": [], + "status": status, + "created_at": datetime.now(UTC).isoformat(), + "processing_run_id": None, + "transcript_version_id": chunk.get("active_transcript_version") + } + if error_message: + version_data["error_message"] = error_message + + result = await self.col.update_one( + {"audio_uuid": audio_uuid}, + { + "$push": {"memory_versions": version_data}, + "$set": { + "active_memory_version": version_id, + "memory_processing_status": status, + "memory_processing_updated_at": datetime.now(UTC).isoformat(), + } + } + ) + else: + # Update existing active version + update_doc = { + f"memory_versions.$[version].status": status, + f"memory_versions.$[version].updated_at": datetime.now(UTC), + "memory_processing_status": status, + "memory_processing_updated_at": datetime.now(UTC).isoformat(), + } + if status == "COMPLETED": + update_doc["memory_processing_completed_at"] = datetime.now(UTC).isoformat() + if error_message: + update_doc[f"memory_versions.$[version].error_message"] = error_message + update_doc["memory_processing_error"] = error_message + + result = await self.col.update_one( + {"audio_uuid": audio_uuid}, + {"$set": update_doc}, + array_filters=[{"version.version_id": active_version}] + ) - result = await self.col.update_one({"audio_uuid": audio_uuid}, {"$set": update_doc}) if result.modified_count > 0: logger.info(f"Updated memory processing status to {status} for {audio_uuid}") return result.modified_count > 0 @@ -269,7 +403,6 @@ async def add_audio_file_path(self, audio_uuid: str, file_path: str): logger.info(f"Added audio file path {file_path} to session {audio_uuid}") return result.modified_count > 0 - async def update_speech_detection(self, audio_uuid: str, **speech_data): """Update speech detection results.""" update_doc = { @@ -328,14 +461,49 @@ async def create_conversation(self, conversation_data: dict) -> str: result = await self.col.insert_one(conversation_data) return conversation_data["conversation_id"] + def _populate_legacy_fields(self, conversation): + """Auto-populate legacy fields from active versions for backward compatibility.""" + if not conversation: + return conversation + + # Auto-populate transcript from active transcript version + active_transcript_version_id = conversation.get("active_transcript_version") + if active_transcript_version_id: + for version in conversation.get("transcript_versions", []): + if version.get("version_id") == active_transcript_version_id: + conversation["transcript"] = version.get("segments", []) + conversation["speakers_identified"] = version.get("speakers_identified", []) + break + else: + # No active version - ensure empty transcript + conversation["transcript"] = [] + + # Auto-populate memories from active memory version + active_memory_version_id = conversation.get("active_memory_version") + if active_memory_version_id: + for version in conversation.get("memory_versions", []): + if version.get("version_id") == active_memory_version_id: + conversation["memories"] = version.get("memories", []) + conversation["memory_processing_status"] = version.get("status", "pending") + break + else: + # No active version - ensure empty memories + conversation["memories"] = [] + conversation["memory_processing_status"] = "pending" + + return conversation + async def get_conversation(self, conversation_id: str): - """Get conversation by conversation_id.""" - return await self.col.find_one({"conversation_id": conversation_id}) + """Get conversation by conversation_id with auto-populated legacy fields.""" + conversation = await self.col.find_one({"conversation_id": conversation_id}) + return self._populate_legacy_fields(conversation) async def get_user_conversations(self, user_id: str, limit=100): """Get all conversations for a user (only shows conversations with speech).""" cursor = self.col.find({"user_id": user_id}) - return await cursor.sort("created_at", -1).limit(limit).to_list() + conversations = await cursor.sort("created_at", -1).limit(limit).to_list() + # Auto-populate legacy fields for all conversations + return [self._populate_legacy_fields(conv) for conv in conversations] async def update_conversation(self, conversation_id: str, update_data: dict): """Update conversation data.""" @@ -368,3 +536,248 @@ async def update_memory_processing_status(self, conversation_id: str, status: st } ) return result.modified_count > 0 + + # ======================================== + # NEW: VERSIONING METHODS FOR REPROCESSING + # ======================================== + + async def create_transcript_version( + self, + conversation_id: str, + segments: list = None, + processing_run_id: str = None, + provider: str = None, + raw_data: dict = None + ) -> Optional[str]: + """Create a new transcript version in conversation.""" + version_id = str(uuid.uuid4()) + version_data = { + "version_id": version_id, + "segments": segments or [], + "status": "PENDING", + "provider": provider, + "created_at": datetime.now(UTC).isoformat(), + "processing_run_id": processing_run_id, + "raw_data": raw_data or {}, + "speakers_identified": [] + } + + result = await self.col.update_one( + {"conversation_id": conversation_id}, + {"$push": {"transcript_versions": version_data}} + ) + + if result.modified_count > 0: + logger.info(f"Created new transcript version {version_id} for conversation {conversation_id}") + return version_id + return None + + async def create_memory_version( + self, + conversation_id: str, + transcript_version_id: str, + memories: list = None, + processing_run_id: str = None + ) -> Optional[str]: + """Create a new memory version in conversation.""" + version_id = str(uuid.uuid4()) + version_data = { + "version_id": version_id, + "memories": memories or [], + "status": "PENDING", + "created_at": datetime.now(UTC).isoformat(), + "processing_run_id": processing_run_id, + "transcript_version_id": transcript_version_id + } + + result = await self.col.update_one( + {"conversation_id": conversation_id}, + {"$push": {"memory_versions": version_data}} + ) + + if result.modified_count > 0: + logger.info(f"Created new memory version {version_id} for conversation {conversation_id}") + return version_id + return None + + async def activate_transcript_version(self, conversation_id: str, version_id: str) -> bool: + """Activate a specific transcript version in conversation.""" + # First verify the version exists + conversation = await self.col.find_one( + {"conversation_id": conversation_id, "transcript_versions.version_id": version_id} + ) + if not conversation: + return False + + # Find the version and update active fields + version_data = None + for version in conversation.get("transcript_versions", []): + if version["version_id"] == version_id: + version_data = version + break + + if not version_data: + return False + + result = await self.col.update_one( + {"conversation_id": conversation_id}, + { + "$set": { + "active_transcript_version": version_id, + "transcript": version_data["segments"], + "speakers_identified": version_data["speakers_identified"], + "updated_at": datetime.now(UTC) + } + } + ) + + if result.modified_count > 0: + logger.info(f"Activated transcript version {version_id} for conversation {conversation_id}") + return result.modified_count > 0 + + async def activate_memory_version(self, conversation_id: str, version_id: str) -> bool: + """Activate a specific memory version in conversation.""" + # First verify the version exists + conversation = await self.col.find_one( + {"conversation_id": conversation_id, "memory_versions.version_id": version_id} + ) + if not conversation: + return False + + # Find the version and update active fields + version_data = None + for version in conversation.get("memory_versions", []): + if version["version_id"] == version_id: + version_data = version + break + + if not version_data: + return False + + result = await self.col.update_one( + {"conversation_id": conversation_id}, + { + "$set": { + "active_memory_version": version_id, + "memories": version_data["memories"], + "memory_processing_status": version_data["status"], + "updated_at": datetime.now(UTC) + } + } + ) + + if result.modified_count > 0: + logger.info(f"Activated memory version {version_id} for conversation {conversation_id}") + return result.modified_count > 0 + + async def get_version_history(self, conversation_id: str) -> dict: + """Get all version history for a conversation.""" + conversation = await self.col.find_one({"conversation_id": conversation_id}) + if not conversation: + return {} + + return { + "conversation_id": conversation_id, + "active_transcript_version": conversation.get("active_transcript_version"), + "active_memory_version": conversation.get("active_memory_version"), + "transcript_versions": conversation.get("transcript_versions", []), + "memory_versions": conversation.get("memory_versions", []) + } + + async def update_transcript_processing_status( + self, + conversation_id: str, + status: str, + provider: str = None, + error_message: str = None + ): + """Update transcript processing status for conversation.""" + update_doc = { + "transcript_processing_status": status, + "transcript_processing_updated_at": datetime.now(UTC), + "updated_at": datetime.now(UTC) + } + if provider: + update_doc["transcript_provider"] = provider + if error_message: + update_doc["transcript_processing_error"] = error_message + + result = await self.col.update_one( + {"conversation_id": conversation_id}, + {"$set": update_doc} + ) + return result.modified_count > 0 + + +class ProcessingRunsRepository: + """Repository for processing run tracking (updated for conversation_id).""" + + def __init__(self, collection): + self.col = collection + + async def create_run( + self, + *, + conversation_id: str, + audio_uuid: str, # Keep for audio file access + run_type: str, # 'transcript' or 'memory' + user_id: str, + trigger: str, # 'manual_reprocess', 'initial_processing', etc. + config_hash: str = None + ) -> str: + """Create a new processing run for conversation.""" + run_id = str(uuid.uuid4()) + doc = { + "run_id": run_id, + "conversation_id": conversation_id, + "audio_uuid": audio_uuid, # Keep for file access + "run_type": run_type, + "user_id": user_id, + "trigger": trigger, + "config_hash": config_hash, + "status": "PENDING", + "started_at": datetime.now(UTC), + "completed_at": None, + "error_message": None, + "result_version_id": None + } + await self.col.insert_one(doc) + logger.info(f"Created processing run {run_id} for conversation {conversation_id}") + return run_id + + async def update_run_status( + self, + run_id: str, + status: str, + error_message: str = None, + result_version_id: str = None + ) -> bool: + """Update processing run status.""" + update_doc = { + "status": status, + "updated_at": datetime.now(UTC) + } + if status in ["COMPLETED", "FAILED"]: + update_doc["completed_at"] = datetime.now(UTC) + if error_message: + update_doc["error_message"] = error_message + if result_version_id: + update_doc["result_version_id"] = result_version_id + + result = await self.col.update_one( + {"run_id": run_id}, + {"$set": update_doc} + ) + + if result.modified_count > 0: + logger.info(f"Updated processing run {run_id} status to {status}") + return result.modified_count > 0 + + async def get_run(self, run_id: str): + """Get a processing run by ID.""" + return await self.col.find_one({"run_id": run_id}) + + async def get_runs_for_conversation(self, conversation_id: str): + """Get all processing runs for a conversation.""" + cursor = self.col.find({"conversation_id": conversation_id}).sort("started_at", -1) + return await cursor.to_list(length=None) diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/conversation_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/conversation_routes.py index c2b294de..02442a24 100644 --- a/backends/advanced/src/advanced_omi_backend/routers/modules/conversation_routes.py +++ b/backends/advanced/src/advanced_omi_backend/routers/modules/conversation_routes.py @@ -7,7 +7,7 @@ import logging from typing import Optional -from fastapi import APIRouter, Depends +from fastapi import APIRouter, Depends, Query from advanced_omi_backend.auth import current_active_user from advanced_omi_backend.client_manager import ( @@ -89,3 +89,58 @@ async def update_transcript_segment( return await conversation_controller.update_transcript_segment( audio_uuid, segment_index, current_user, speaker_id, start_time, end_time ) + + +# New reprocessing endpoints +@router.post("/{conversation_id}/reprocess-transcript") +async def reprocess_transcript( + conversation_id: str, current_user: User = Depends(current_active_user) +): + """Reprocess transcript for a conversation. Users can only reprocess their own conversations.""" + return await conversation_controller.reprocess_transcript(conversation_id, current_user) + + +@router.post("/{conversation_id}/reprocess-memory") +async def reprocess_memory( + conversation_id: str, + current_user: User = Depends(current_active_user), + transcript_version_id: str = Query(default="active") +): + """Reprocess memory extraction for a specific transcript version. Users can only reprocess their own conversations.""" + return await conversation_controller.reprocess_memory(conversation_id, transcript_version_id, current_user) + + +@router.post("/{conversation_id}/activate-transcript/{version_id}") +async def activate_transcript_version( + conversation_id: str, + version_id: str, + current_user: User = Depends(current_active_user) +): + """Activate a specific transcript version. Users can only modify their own conversations.""" + return await conversation_controller.activate_transcript_version(conversation_id, version_id, current_user) + + +@router.post("/{conversation_id}/activate-memory/{version_id}") +async def activate_memory_version( + conversation_id: str, + version_id: str, + current_user: User = Depends(current_active_user) +): + """Activate a specific memory version. Users can only modify their own conversations.""" + return await conversation_controller.activate_memory_version(conversation_id, version_id, current_user) + + +@router.get("/{conversation_id}/versions") +async def get_conversation_version_history( + conversation_id: str, current_user: User = Depends(current_active_user) +): + """Get version history for a conversation. Users can only access their own conversations.""" + return await conversation_controller.get_conversation_version_history(conversation_id, current_user) + + +@router.delete("/{audio_uuid}") +async def delete_conversation( + audio_uuid: str, current_user: User = Depends(current_active_user) +): + """Delete a conversation and its associated audio file. Users can only delete their own conversations.""" + return await conversation_controller.delete_conversation(audio_uuid, current_user) diff --git a/backends/advanced/src/advanced_omi_backend/transcription.py b/backends/advanced/src/advanced_omi_backend/transcription.py index fec2a038..2a3f876a 100644 --- a/backends/advanced/src/advanced_omi_backend/transcription.py +++ b/backends/advanced/src/advanced_omi_backend/transcription.py @@ -281,10 +281,13 @@ async def _process_transcript_result(self, transcript_result): try: # Store raw transcript data provider_name = self.provider.name if self.provider else "unknown" + logger.info(f"🔍 DEBUG: transcript_result type={type(transcript_result)}, content preview: {str(transcript_result)[:200]}") if self.chunk_repo: + logger.info(f"🔍 DEBUG: About to store raw transcript data for {self._current_audio_uuid}") await self.chunk_repo.store_raw_transcript_data( self._current_audio_uuid, transcript_result, provider_name ) + logger.info(f"🔍 DEBUG: Successfully stored raw transcript data for {self._current_audio_uuid}") # Normalize transcript result normalized_result = self._normalize_transcript_result(transcript_result) @@ -307,7 +310,6 @@ async def _process_transcript_result(self, transcript_result): "words": normalized_result.get("words", []), "text": normalized_result.get("text", ""), } - # SPEECH DETECTION: Analyze transcript for meaningful speech speech_analysis = self._analyze_speech(transcript_data) logger.info(f"🎯 Speech analysis for {self._current_audio_uuid}: {speech_analysis}") @@ -402,6 +404,9 @@ async def _process_transcript_result(self, transcript_result): logger.info( f"🎤 Speaker service returned {len(final_segments)} segments with matched text" ) + # Debug: Log first few segments to see text content + for i, seg in enumerate(final_segments[:3]): + logger.info(f"🔍 DEBUG: Segment {i}: text='{seg.get('text', 'MISSING')}', speaker={seg.get('speaker', 'UNKNOWN')}") else: logger.info("🎤 Speaker service returned no segments") else: @@ -470,10 +475,23 @@ async def _process_transcript_result(self, transcript_result): try: conversations_repo = ConversationsRepository(conversations_col) - # Update conversation with transcript segments and speaker info + # Check if this is the first transcript for this conversation + conversation = await conversations_repo.get_conversation(conversation_id) + if conversation and not conversation.get("active_transcript_version"): + # This is the first transcript - create initial version + version_id = await conversations_repo.create_transcript_version( + conversation_id=conversation_id, + segments=segments_to_store, + provider="speech_detection", + raw_data={} + ) + if version_id: + # Activate this version + await conversations_repo.activate_transcript_version(conversation_id, version_id) + logger.info(f"✅ Created and activated initial transcript version {version_id} for conversation {conversation_id}") + + # Update conversation with speaker info and metadata (NOT transcript data - that's in versions) update_data = { - "transcript": segments_to_store, - "speakers_identified": list(speakers_found), "speaker_names": speaker_names, "updated_at": datetime.now(UTC) } @@ -497,7 +515,7 @@ async def _process_transcript_result(self, transcript_result): await self._queue_memory_processing(conversation_id) # Queue audio cropping if we have diarization segments and cropping is enabled - if final_segments and os.getenv("AUDIO_CROPPING_ENABLED", "false").lower() == "true": + if final_segments and os.getenv("AUDIO_CROPPING_ENABLED", "true").lower() == "true": await self._queue_diarization_based_cropping(final_segments) # Update database transcription status @@ -620,14 +638,20 @@ async def _create_conversation(self, audio_uuid: str, transcript_data: dict, spe "client_id": audio_session["client_id"], "title": title, "summary": summary, - "transcript": [], # Will be populated by existing segment processing + + # Versioned system (source of truth) + "transcript_versions": [], + "active_transcript_version": None, + "memory_versions": [], + "active_memory_version": None, + + # Legacy compatibility fields (auto-populated on read) + # Note: These will be auto-populated from active versions when retrieved + "duration_seconds": speech_analysis.get("duration", 0.0), "speech_start_time": speech_analysis.get("speech_start", 0.0), "speech_end_time": speech_analysis.get("speech_end", 0.0), - "speakers_identified": [], "speaker_names": {}, - "memories": [], - "memory_processing_status": "pending", "action_items": [], "created_at": datetime.now(UTC), "updated_at": datetime.now(UTC), diff --git a/backends/advanced/uv.lock b/backends/advanced/uv.lock index 75d8e7ae..9837aa0e 100644 --- a/backends/advanced/uv.lock +++ b/backends/advanced/uv.lock @@ -36,9 +36,6 @@ deepgram = [ local-audio = [ { name = "easy-audio-interfaces", extra = ["local-audio"] }, ] -webui = [ - { name = "streamlit" }, -] [package.dev-dependencies] dev = [ @@ -70,11 +67,10 @@ requires-dist = [ { name = "python-dotenv", specifier = ">=1.1.0" }, { name = "pyyaml", specifier = ">=6.0.1" }, { name = "spacy", specifier = ">=3.8.2" }, - { name = "streamlit", marker = "extra == 'webui'", specifier = ">=1.45.1" }, { name = "uvicorn", specifier = ">=0.34.2" }, { name = "wyoming", specifier = ">=1.6.1" }, ] -provides-extras = ["deepgram", "webui", "local-audio"] +provides-extras = ["deepgram", "local-audio"] [package.metadata.requires-dev] dev = [ @@ -177,22 +173,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ec/6a/bc7e17a3e87a2985d3e8f4da4cd0f481060eb78fb08596c42be62c90a4d9/aiosignal-1.3.2-py2.py3-none-any.whl", hash = "sha256:45cde58e409a301715980c2b01d0c28bdde3770d8290b5eb2173759d9acb31a5", size = 7597, upload_time = "2024-12-13T17:10:38.469Z" }, ] -[[package]] -name = "altair" -version = "5.5.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "jinja2" }, - { name = "jsonschema" }, - { name = "narwhals" }, - { name = "packaging" }, - { name = "typing-extensions", marker = "python_full_version < '3.14'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/16/b1/f2969c7bdb8ad8bbdda031687defdce2c19afba2aa2c8e1d2a17f78376d8/altair-5.5.0.tar.gz", hash = "sha256:d960ebe6178c56de3855a68c47b516be38640b73fb3b5111c2a9ca90546dd73d", size = 705305, upload_time = "2024-11-23T23:39:58.542Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/aa/f3/0b6ced594e51cc95d8c1fc1640d3623770d01e4969d29c0bd09945fafefa/altair-5.5.0-py3-none-any.whl", hash = "sha256:91a310b926508d560fe0148d02a194f38b824122641ef528113d029fcd129f8c", size = 731200, upload_time = "2024-11-23T23:39:56.4Z" }, -] - [[package]] name = "annotated-types" version = "0.7.0" @@ -380,15 +360,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0a/ce/3adf9e742bb22e4a4b3435f24111cb46a1d12731ba655ee00bb5ab0308cc/bleak-0.22.3-py3-none-any.whl", hash = "sha256:1e62a9f5e0c184826e6c906e341d8aca53793e4596eeaf4e0b191e7aca5c461c", size = 142719, upload_time = "2024-10-05T21:20:58.547Z" }, ] -[[package]] -name = "blinker" -version = "1.9.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/21/28/9b3f50ce0e048515135495f198351908d99540d69bfdc8c1d15b73dc55ce/blinker-1.9.0.tar.gz", hash = "sha256:b4ce2265a7abece45e7cc896e98dbebe6cead56bcf805a3d23136d145f5445bf", size = 22460, upload_time = "2024-11-08T17:25:47.436Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/10/cb/f2ad4230dc2eb1a74edf38f1a38b9b52277f75bef262d8908e60d957e13c/blinker-1.9.0-py3-none-any.whl", hash = "sha256:ba0efaa9080b619ff2f3459d1d500c57bddea4a6b424b60a91141db6fd2f08bc", size = 8458, upload_time = "2024-11-08T17:25:46.184Z" }, -] - [[package]] name = "blis" version = "1.3.0" @@ -418,15 +389,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/16/f1/8cc8118946dbb9cbd74f406d30d31ee8d2f723f6fb4c8245e2bc67175fd4/blis-1.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:91de2baf03da3a173cf62771f1d6b9236a27a8cbd0e0033be198f06ef6224986", size = 6258624, upload_time = "2025-04-03T15:09:46.056Z" }, ] -[[package]] -name = "cachetools" -version = "5.5.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6c/81/3747dad6b14fa2cf53fcf10548cf5aea6913e96fab41a3c198676f8948a5/cachetools-5.5.2.tar.gz", hash = "sha256:1a661caa9175d26759571b2e19580f9d6393969e5dfca11fdb1f947a23e640d4", size = 28380, upload_time = "2025-02-20T21:01:19.524Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/72/76/20fa66124dbe6be5cafeb312ece67de6b61dd91a0247d1ea13db4ebb33c2/cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a", size = 10080, upload_time = "2025-02-20T21:01:16.647Z" }, -] - [[package]] name = "catalogue" version = "2.0.10" @@ -912,30 +874,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/de/86/5486b0188d08aa643e127774a99bac51ffa6cf343e3deb0583956dca5b22/fsspec-2024.12.0-py3-none-any.whl", hash = "sha256:b520aed47ad9804237ff878b504267a3b0b441e97508bd6d2d8774e3db85cee2", size = 183862, upload_time = "2024-12-19T19:57:28.258Z" }, ] -[[package]] -name = "gitdb" -version = "4.0.12" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "smmap" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/72/94/63b0fc47eb32792c7ba1fe1b694daec9a63620db1e313033d18140c2320a/gitdb-4.0.12.tar.gz", hash = "sha256:5ef71f855d191a3326fcfbc0d5da835f26b13fbcba60c32c21091c349ffdb571", size = 394684, upload_time = "2025-01-02T07:20:46.413Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a0/61/5c78b91c3143ed5c14207f463aecfc8f9dbb5092fb2869baf37c273b2705/gitdb-4.0.12-py3-none-any.whl", hash = "sha256:67073e15955400952c6565cc3e707c554a4eea2e428946f7a4c162fab9bd9bcf", size = 62794, upload_time = "2025-01-02T07:20:43.624Z" }, -] - -[[package]] -name = "gitpython" -version = "3.1.44" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "gitdb" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/c0/89/37df0b71473153574a5cdef8f242de422a0f5d26d7a9e231e6f169b4ad14/gitpython-3.1.44.tar.gz", hash = "sha256:c87e30b26253bf5418b01b0660f818967f3c503193838337fe5e573331249269", size = 214196, upload_time = "2025-01-02T07:32:43.59Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/1d/9a/4114a9057db2f1462d5c8f8390ab7383925fe1ac012eaa42402ad65c2963/GitPython-3.1.44-py3-none-any.whl", hash = "sha256:9e0e10cda9bed1ee64bc9a6de50e7e38a9c9943241cd7f585f6df3ed28011110", size = 207599, upload_time = "2025-01-02T07:32:40.731Z" }, -] - [[package]] name = "googleapis-common-protos" version = "1.70.0" @@ -1220,33 +1158,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/71/92/5e77f98553e9e75130c78900d000368476aed74276eb8ae8796f65f00918/jsonpointer-3.0.0-py2.py3-none-any.whl", hash = "sha256:13e088adc14fca8b6aa8177c044e12701e6ad4b28ff10e65f2267a90109c9942", size = 7595, upload_time = "2024-06-10T19:24:40.698Z" }, ] -[[package]] -name = "jsonschema" -version = "4.24.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "attrs" }, - { name = "jsonschema-specifications" }, - { name = "referencing" }, - { name = "rpds-py" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/bf/d3/1cf5326b923a53515d8f3a2cd442e6d7e94fcc444716e879ea70a0ce3177/jsonschema-4.24.0.tar.gz", hash = "sha256:0b4e8069eb12aedfa881333004bccaec24ecef5a8a6a4b6df142b2cc9599d196", size = 353480, upload_time = "2025-05-26T18:48:10.459Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a2/3d/023389198f69c722d039351050738d6755376c8fd343e91dc493ea485905/jsonschema-4.24.0-py3-none-any.whl", hash = "sha256:a462455f19f5faf404a7902952b6f0e3ce868f3ee09a359b05eca6673bd8412d", size = 88709, upload_time = "2025-05-26T18:48:08.417Z" }, -] - -[[package]] -name = "jsonschema-specifications" -version = "2025.4.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "referencing" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/bf/ce/46fbd9c8119cfc3581ee5643ea49464d168028cfb5caff5fc0596d0cf914/jsonschema_specifications-2025.4.1.tar.gz", hash = "sha256:630159c9f4dbea161a6a2205c3011cc4f18ff381b189fff48bb39b9bf26ae608", size = 15513, upload_time = "2025-04-23T12:34:07.418Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/01/0e/b27cdbaccf30b890c40ed1da9fd4a3593a5cf94dae54fb34f8a4b74fcd3f/jsonschema_specifications-2025.4.1-py3-none-any.whl", hash = "sha256:4653bffbd6584f7de83a67e0d620ef16900b390ddc7939d56684d6c81e33f1af", size = 18437, upload_time = "2025-04-23T12:34:05.422Z" }, -] - [[package]] name = "langchain" version = "0.3.26" @@ -1629,15 +1540,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload_time = "2025-04-22T14:54:22.983Z" }, ] -[[package]] -name = "narwhals" -version = "1.42.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a2/7e/9484c2427453bd0024fd36cf7923de4367d749f0b216b9ca56b9dfc3c516/narwhals-1.42.0.tar.gz", hash = "sha256:a5e554782446d1197593312651352cd39b2025e995053d8e6bdfaa01a70a91d3", size = 490671, upload_time = "2025-06-09T09:20:27.794Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8d/0f/f9ae7c8c55f9078c852b13ea4a6e92e5f4d6d4c8fc0781ec2882957006bb/narwhals-1.42.0-py3-none-any.whl", hash = "sha256:ef6cedf7700dc22c09d17973b9ede11b53e25331e238b24ac73884a8c5e27c19", size = 359033, upload_time = "2025-06-09T09:20:25.668Z" }, -] - [[package]] name = "neo4j" version = "5.28.1" @@ -1887,40 +1789,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/88/ef/eb23f262cca3c0c4eb7ab1933c3b1f03d021f2c48f54763065b6f0e321be/packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759", size = 65451, upload_time = "2024-11-08T09:47:44.722Z" }, ] -[[package]] -name = "pandas" -version = "2.3.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "numpy" }, - { name = "python-dateutil" }, - { name = "pytz" }, - { name = "tzdata" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/72/51/48f713c4c728d7c55ef7444ba5ea027c26998d96d1a40953b346438602fc/pandas-2.3.0.tar.gz", hash = "sha256:34600ab34ebf1131a7613a260a61dbe8b62c188ec0ea4c296da7c9a06b004133", size = 4484490, upload_time = "2025-06-05T03:27:54.133Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/94/46/24192607058dd607dbfacdd060a2370f6afb19c2ccb617406469b9aeb8e7/pandas-2.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2eb4728a18dcd2908c7fccf74a982e241b467d178724545a48d0caf534b38ebf", size = 11573865, upload_time = "2025-06-05T03:26:46.774Z" }, - { url = "https://files.pythonhosted.org/packages/9f/cc/ae8ea3b800757a70c9fdccc68b67dc0280a6e814efcf74e4211fd5dea1ca/pandas-2.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b9d8c3187be7479ea5c3d30c32a5d73d62a621166675063b2edd21bc47614027", size = 10702154, upload_time = "2025-06-05T16:50:14.439Z" }, - { url = "https://files.pythonhosted.org/packages/d8/ba/a7883d7aab3d24c6540a2768f679e7414582cc389876d469b40ec749d78b/pandas-2.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9ff730713d4c4f2f1c860e36c005c7cefc1c7c80c21c0688fd605aa43c9fcf09", size = 11262180, upload_time = "2025-06-05T16:50:17.453Z" }, - { url = "https://files.pythonhosted.org/packages/01/a5/931fc3ad333d9d87b10107d948d757d67ebcfc33b1988d5faccc39c6845c/pandas-2.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba24af48643b12ffe49b27065d3babd52702d95ab70f50e1b34f71ca703e2c0d", size = 11991493, upload_time = "2025-06-05T03:26:51.813Z" }, - { url = "https://files.pythonhosted.org/packages/d7/bf/0213986830a92d44d55153c1d69b509431a972eb73f204242988c4e66e86/pandas-2.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:404d681c698e3c8a40a61d0cd9412cc7364ab9a9cc6e144ae2992e11a2e77a20", size = 12470733, upload_time = "2025-06-06T00:00:18.651Z" }, - { url = "https://files.pythonhosted.org/packages/a4/0e/21eb48a3a34a7d4bac982afc2c4eb5ab09f2d988bdf29d92ba9ae8e90a79/pandas-2.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6021910b086b3ca756755e86ddc64e0ddafd5e58e076c72cb1585162e5ad259b", size = 13212406, upload_time = "2025-06-05T03:26:55.992Z" }, - { url = "https://files.pythonhosted.org/packages/1f/d9/74017c4eec7a28892d8d6e31ae9de3baef71f5a5286e74e6b7aad7f8c837/pandas-2.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:094e271a15b579650ebf4c5155c05dcd2a14fd4fdd72cf4854b2f7ad31ea30be", size = 10976199, upload_time = "2025-06-05T03:26:59.594Z" }, - { url = "https://files.pythonhosted.org/packages/d3/57/5cb75a56a4842bbd0511c3d1c79186d8315b82dac802118322b2de1194fe/pandas-2.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2c7e2fc25f89a49a11599ec1e76821322439d90820108309bf42130d2f36c983", size = 11518913, upload_time = "2025-06-05T03:27:02.757Z" }, - { url = "https://files.pythonhosted.org/packages/05/01/0c8785610e465e4948a01a059562176e4c8088aa257e2e074db868f86d4e/pandas-2.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c6da97aeb6a6d233fb6b17986234cc723b396b50a3c6804776351994f2a658fd", size = 10655249, upload_time = "2025-06-05T16:50:20.17Z" }, - { url = "https://files.pythonhosted.org/packages/e8/6a/47fd7517cd8abe72a58706aab2b99e9438360d36dcdb052cf917b7bf3bdc/pandas-2.3.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb32dc743b52467d488e7a7c8039b821da2826a9ba4f85b89ea95274f863280f", size = 11328359, upload_time = "2025-06-05T03:27:06.431Z" }, - { url = "https://files.pythonhosted.org/packages/2a/b3/463bfe819ed60fb7e7ddffb4ae2ee04b887b3444feee6c19437b8f834837/pandas-2.3.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:213cd63c43263dbb522c1f8a7c9d072e25900f6975596f883f4bebd77295d4f3", size = 12024789, upload_time = "2025-06-05T03:27:09.875Z" }, - { url = "https://files.pythonhosted.org/packages/04/0c/e0704ccdb0ac40aeb3434d1c641c43d05f75c92e67525df39575ace35468/pandas-2.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1d2b33e68d0ce64e26a4acc2e72d747292084f4e8db4c847c6f5f6cbe56ed6d8", size = 12480734, upload_time = "2025-06-06T00:00:22.246Z" }, - { url = "https://files.pythonhosted.org/packages/e9/df/815d6583967001153bb27f5cf075653d69d51ad887ebbf4cfe1173a1ac58/pandas-2.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:430a63bae10b5086995db1b02694996336e5a8ac9a96b4200572b413dfdfccb9", size = 13223381, upload_time = "2025-06-05T03:27:15.641Z" }, - { url = "https://files.pythonhosted.org/packages/79/88/ca5973ed07b7f484c493e941dbff990861ca55291ff7ac67c815ce347395/pandas-2.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:4930255e28ff5545e2ca404637bcc56f031893142773b3468dc021c6c32a1390", size = 10970135, upload_time = "2025-06-05T03:27:24.131Z" }, - { url = "https://files.pythonhosted.org/packages/24/fb/0994c14d1f7909ce83f0b1fb27958135513c4f3f2528bde216180aa73bfc/pandas-2.3.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:f925f1ef673b4bd0271b1809b72b3270384f2b7d9d14a189b12b7fc02574d575", size = 12141356, upload_time = "2025-06-05T03:27:34.547Z" }, - { url = "https://files.pythonhosted.org/packages/9d/a2/9b903e5962134497ac4f8a96f862ee3081cb2506f69f8e4778ce3d9c9d82/pandas-2.3.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e78ad363ddb873a631e92a3c063ade1ecfb34cae71e9a2be6ad100f875ac1042", size = 11474674, upload_time = "2025-06-05T03:27:39.448Z" }, - { url = "https://files.pythonhosted.org/packages/81/3a/3806d041bce032f8de44380f866059437fb79e36d6b22c82c187e65f765b/pandas-2.3.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:951805d146922aed8357e4cc5671b8b0b9be1027f0619cea132a9f3f65f2f09c", size = 11439876, upload_time = "2025-06-05T03:27:43.652Z" }, - { url = "https://files.pythonhosted.org/packages/15/aa/3fc3181d12b95da71f5c2537c3e3b3af6ab3a8c392ab41ebb766e0929bc6/pandas-2.3.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a881bc1309f3fce34696d07b00f13335c41f5f5a8770a33b09ebe23261cfc67", size = 11966182, upload_time = "2025-06-05T03:27:47.652Z" }, - { url = "https://files.pythonhosted.org/packages/37/e7/e12f2d9b0a2c4a2cc86e2aabff7ccfd24f03e597d770abfa2acd313ee46b/pandas-2.3.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e1991bbb96f4050b09b5f811253c4f3cf05ee89a589379aa36cd623f21a31d6f", size = 12547686, upload_time = "2025-06-06T00:00:26.142Z" }, - { url = "https://files.pythonhosted.org/packages/39/c2/646d2e93e0af70f4e5359d870a63584dacbc324b54d73e6b3267920ff117/pandas-2.3.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:bb3be958022198531eb7ec2008cfc78c5b1eed51af8600c6c5d9160d89d8d249", size = 13231847, upload_time = "2025-06-05T03:27:51.465Z" }, -] - [[package]] name = "pathspec" version = "0.12.1" @@ -1930,47 +1798,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191, upload_time = "2023-12-10T22:30:43.14Z" }, ] -[[package]] -name = "pillow" -version = "11.2.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/af/cb/bb5c01fcd2a69335b86c22142b2bccfc3464087efb7fd382eee5ffc7fdf7/pillow-11.2.1.tar.gz", hash = "sha256:a64dd61998416367b7ef979b73d3a85853ba9bec4c2925f74e588879a58716b6", size = 47026707, upload_time = "2025-04-12T17:50:03.289Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c7/40/052610b15a1b8961f52537cc8326ca6a881408bc2bdad0d852edeb6ed33b/pillow-11.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:78afba22027b4accef10dbd5eed84425930ba41b3ea0a86fa8d20baaf19d807f", size = 3190185, upload_time = "2025-04-12T17:48:00.417Z" }, - { url = "https://files.pythonhosted.org/packages/e5/7e/b86dbd35a5f938632093dc40d1682874c33dcfe832558fc80ca56bfcb774/pillow-11.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:78092232a4ab376a35d68c4e6d5e00dfd73454bd12b230420025fbe178ee3b0b", size = 3030306, upload_time = "2025-04-12T17:48:02.391Z" }, - { url = "https://files.pythonhosted.org/packages/a4/5c/467a161f9ed53e5eab51a42923c33051bf8d1a2af4626ac04f5166e58e0c/pillow-11.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25a5f306095c6780c52e6bbb6109624b95c5b18e40aab1c3041da3e9e0cd3e2d", size = 4416121, upload_time = "2025-04-12T17:48:04.554Z" }, - { url = "https://files.pythonhosted.org/packages/62/73/972b7742e38ae0e2ac76ab137ca6005dcf877480da0d9d61d93b613065b4/pillow-11.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c7b29dbd4281923a2bfe562acb734cee96bbb129e96e6972d315ed9f232bef4", size = 4501707, upload_time = "2025-04-12T17:48:06.831Z" }, - { url = "https://files.pythonhosted.org/packages/e4/3a/427e4cb0b9e177efbc1a84798ed20498c4f233abde003c06d2650a6d60cb/pillow-11.2.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:3e645b020f3209a0181a418bffe7b4a93171eef6c4ef6cc20980b30bebf17b7d", size = 4522921, upload_time = "2025-04-12T17:48:09.229Z" }, - { url = "https://files.pythonhosted.org/packages/fe/7c/d8b1330458e4d2f3f45d9508796d7caf0c0d3764c00c823d10f6f1a3b76d/pillow-11.2.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b2dbea1012ccb784a65349f57bbc93730b96e85b42e9bf7b01ef40443db720b4", size = 4612523, upload_time = "2025-04-12T17:48:11.631Z" }, - { url = "https://files.pythonhosted.org/packages/b3/2f/65738384e0b1acf451de5a573d8153fe84103772d139e1e0bdf1596be2ea/pillow-11.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:da3104c57bbd72948d75f6a9389e6727d2ab6333c3617f0a89d72d4940aa0443", size = 4587836, upload_time = "2025-04-12T17:48:13.592Z" }, - { url = "https://files.pythonhosted.org/packages/6a/c5/e795c9f2ddf3debb2dedd0df889f2fe4b053308bb59a3cc02a0cd144d641/pillow-11.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:598174aef4589af795f66f9caab87ba4ff860ce08cd5bb447c6fc553ffee603c", size = 4669390, upload_time = "2025-04-12T17:48:15.938Z" }, - { url = "https://files.pythonhosted.org/packages/96/ae/ca0099a3995976a9fce2f423166f7bff9b12244afdc7520f6ed38911539a/pillow-11.2.1-cp312-cp312-win32.whl", hash = "sha256:1d535df14716e7f8776b9e7fee118576d65572b4aad3ed639be9e4fa88a1cad3", size = 2332309, upload_time = "2025-04-12T17:48:17.885Z" }, - { url = "https://files.pythonhosted.org/packages/7c/18/24bff2ad716257fc03da964c5e8f05d9790a779a8895d6566e493ccf0189/pillow-11.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:14e33b28bf17c7a38eede290f77db7c664e4eb01f7869e37fa98a5aa95978941", size = 2676768, upload_time = "2025-04-12T17:48:19.655Z" }, - { url = "https://files.pythonhosted.org/packages/da/bb/e8d656c9543276517ee40184aaa39dcb41e683bca121022f9323ae11b39d/pillow-11.2.1-cp312-cp312-win_arm64.whl", hash = "sha256:21e1470ac9e5739ff880c211fc3af01e3ae505859392bf65458c224d0bf283eb", size = 2415087, upload_time = "2025-04-12T17:48:21.991Z" }, - { url = "https://files.pythonhosted.org/packages/36/9c/447528ee3776e7ab8897fe33697a7ff3f0475bb490c5ac1456a03dc57956/pillow-11.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:fdec757fea0b793056419bca3e9932eb2b0ceec90ef4813ea4c1e072c389eb28", size = 3190098, upload_time = "2025-04-12T17:48:23.915Z" }, - { url = "https://files.pythonhosted.org/packages/b5/09/29d5cd052f7566a63e5b506fac9c60526e9ecc553825551333e1e18a4858/pillow-11.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b0e130705d568e2f43a17bcbe74d90958e8a16263868a12c3e0d9c8162690830", size = 3030166, upload_time = "2025-04-12T17:48:25.738Z" }, - { url = "https://files.pythonhosted.org/packages/71/5d/446ee132ad35e7600652133f9c2840b4799bbd8e4adba881284860da0a36/pillow-11.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bdb5e09068332578214cadd9c05e3d64d99e0e87591be22a324bdbc18925be0", size = 4408674, upload_time = "2025-04-12T17:48:27.908Z" }, - { url = "https://files.pythonhosted.org/packages/69/5f/cbe509c0ddf91cc3a03bbacf40e5c2339c4912d16458fcb797bb47bcb269/pillow-11.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d189ba1bebfbc0c0e529159631ec72bb9e9bc041f01ec6d3233d6d82eb823bc1", size = 4496005, upload_time = "2025-04-12T17:48:29.888Z" }, - { url = "https://files.pythonhosted.org/packages/f9/b3/dd4338d8fb8a5f312021f2977fb8198a1184893f9b00b02b75d565c33b51/pillow-11.2.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:191955c55d8a712fab8934a42bfefbf99dd0b5875078240943f913bb66d46d9f", size = 4518707, upload_time = "2025-04-12T17:48:31.874Z" }, - { url = "https://files.pythonhosted.org/packages/13/eb/2552ecebc0b887f539111c2cd241f538b8ff5891b8903dfe672e997529be/pillow-11.2.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:ad275964d52e2243430472fc5d2c2334b4fc3ff9c16cb0a19254e25efa03a155", size = 4610008, upload_time = "2025-04-12T17:48:34.422Z" }, - { url = "https://files.pythonhosted.org/packages/72/d1/924ce51bea494cb6e7959522d69d7b1c7e74f6821d84c63c3dc430cbbf3b/pillow-11.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:750f96efe0597382660d8b53e90dd1dd44568a8edb51cb7f9d5d918b80d4de14", size = 4585420, upload_time = "2025-04-12T17:48:37.641Z" }, - { url = "https://files.pythonhosted.org/packages/43/ab/8f81312d255d713b99ca37479a4cb4b0f48195e530cdc1611990eb8fd04b/pillow-11.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fe15238d3798788d00716637b3d4e7bb6bde18b26e5d08335a96e88564a36b6b", size = 4667655, upload_time = "2025-04-12T17:48:39.652Z" }, - { url = "https://files.pythonhosted.org/packages/94/86/8f2e9d2dc3d308dfd137a07fe1cc478df0a23d42a6c4093b087e738e4827/pillow-11.2.1-cp313-cp313-win32.whl", hash = "sha256:3fe735ced9a607fee4f481423a9c36701a39719252a9bb251679635f99d0f7d2", size = 2332329, upload_time = "2025-04-12T17:48:41.765Z" }, - { url = "https://files.pythonhosted.org/packages/6d/ec/1179083b8d6067a613e4d595359b5fdea65d0a3b7ad623fee906e1b3c4d2/pillow-11.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:74ee3d7ecb3f3c05459ba95eed5efa28d6092d751ce9bf20e3e253a4e497e691", size = 2676388, upload_time = "2025-04-12T17:48:43.625Z" }, - { url = "https://files.pythonhosted.org/packages/23/f1/2fc1e1e294de897df39fa8622d829b8828ddad938b0eaea256d65b84dd72/pillow-11.2.1-cp313-cp313-win_arm64.whl", hash = "sha256:5119225c622403afb4b44bad4c1ca6c1f98eed79db8d3bc6e4e160fc6339d66c", size = 2414950, upload_time = "2025-04-12T17:48:45.475Z" }, - { url = "https://files.pythonhosted.org/packages/c4/3e/c328c48b3f0ead7bab765a84b4977acb29f101d10e4ef57a5e3400447c03/pillow-11.2.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:8ce2e8411c7aaef53e6bb29fe98f28cd4fbd9a1d9be2eeea434331aac0536b22", size = 3192759, upload_time = "2025-04-12T17:48:47.866Z" }, - { url = "https://files.pythonhosted.org/packages/18/0e/1c68532d833fc8b9f404d3a642991441d9058eccd5606eab31617f29b6d4/pillow-11.2.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:9ee66787e095127116d91dea2143db65c7bb1e232f617aa5957c0d9d2a3f23a7", size = 3033284, upload_time = "2025-04-12T17:48:50.189Z" }, - { url = "https://files.pythonhosted.org/packages/b7/cb/6faf3fb1e7705fd2db74e070f3bf6f88693601b0ed8e81049a8266de4754/pillow-11.2.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9622e3b6c1d8b551b6e6f21873bdcc55762b4b2126633014cea1803368a9aa16", size = 4445826, upload_time = "2025-04-12T17:48:52.346Z" }, - { url = "https://files.pythonhosted.org/packages/07/94/8be03d50b70ca47fb434a358919d6a8d6580f282bbb7af7e4aa40103461d/pillow-11.2.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63b5dff3a68f371ea06025a1a6966c9a1e1ee452fc8020c2cd0ea41b83e9037b", size = 4527329, upload_time = "2025-04-12T17:48:54.403Z" }, - { url = "https://files.pythonhosted.org/packages/fd/a4/bfe78777076dc405e3bd2080bc32da5ab3945b5a25dc5d8acaa9de64a162/pillow-11.2.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:31df6e2d3d8fc99f993fd253e97fae451a8db2e7207acf97859732273e108406", size = 4549049, upload_time = "2025-04-12T17:48:56.383Z" }, - { url = "https://files.pythonhosted.org/packages/65/4d/eaf9068dc687c24979e977ce5677e253624bd8b616b286f543f0c1b91662/pillow-11.2.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:062b7a42d672c45a70fa1f8b43d1d38ff76b63421cbbe7f88146b39e8a558d91", size = 4635408, upload_time = "2025-04-12T17:48:58.782Z" }, - { url = "https://files.pythonhosted.org/packages/1d/26/0fd443365d9c63bc79feb219f97d935cd4b93af28353cba78d8e77b61719/pillow-11.2.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4eb92eca2711ef8be42fd3f67533765d9fd043b8c80db204f16c8ea62ee1a751", size = 4614863, upload_time = "2025-04-12T17:49:00.709Z" }, - { url = "https://files.pythonhosted.org/packages/49/65/dca4d2506be482c2c6641cacdba5c602bc76d8ceb618fd37de855653a419/pillow-11.2.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f91ebf30830a48c825590aede79376cb40f110b387c17ee9bd59932c961044f9", size = 4692938, upload_time = "2025-04-12T17:49:02.946Z" }, - { url = "https://files.pythonhosted.org/packages/b3/92/1ca0c3f09233bd7decf8f7105a1c4e3162fb9142128c74adad0fb361b7eb/pillow-11.2.1-cp313-cp313t-win32.whl", hash = "sha256:e0b55f27f584ed623221cfe995c912c61606be8513bfa0e07d2c674b4516d9dd", size = 2335774, upload_time = "2025-04-12T17:49:04.889Z" }, - { url = "https://files.pythonhosted.org/packages/a5/ac/77525347cb43b83ae905ffe257bbe2cc6fd23acb9796639a1f56aa59d191/pillow-11.2.1-cp313-cp313t-win_amd64.whl", hash = "sha256:36d6b82164c39ce5482f649b437382c0fb2395eabc1e2b1702a6deb8ad647d6e", size = 2681895, upload_time = "2025-04-12T17:49:06.635Z" }, - { url = "https://files.pythonhosted.org/packages/67/32/32dc030cfa91ca0fc52baebbba2e009bb001122a1daa8b6a79ad830b38d3/pillow-11.2.1-cp313-cp313t-win_arm64.whl", hash = "sha256:225c832a13326e34f212d2072982bb1adb210e0cc0b153e688743018c94a2681", size = 2417234, upload_time = "2025-04-12T17:49:08.399Z" }, -] - [[package]] name = "platformdirs" version = "4.3.8" @@ -2160,41 +1987,6 @@ bcrypt = [ { name = "bcrypt" }, ] -[[package]] -name = "pyarrow" -version = "20.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a2/ee/a7810cb9f3d6e9238e61d312076a9859bf3668fd21c69744de9532383912/pyarrow-20.0.0.tar.gz", hash = "sha256:febc4a913592573c8d5805091a6c2b5064c8bd6e002131f01061797d91c783c1", size = 1125187, upload_time = "2025-04-27T12:34:23.264Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a1/d6/0c10e0d54f6c13eb464ee9b67a68b8c71bcf2f67760ef5b6fbcddd2ab05f/pyarrow-20.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:75a51a5b0eef32727a247707d4755322cb970be7e935172b6a3a9f9ae98404ba", size = 30815067, upload_time = "2025-04-27T12:29:44.384Z" }, - { url = "https://files.pythonhosted.org/packages/7e/e2/04e9874abe4094a06fd8b0cbb0f1312d8dd7d707f144c2ec1e5e8f452ffa/pyarrow-20.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:211d5e84cecc640c7a3ab900f930aaff5cd2702177e0d562d426fb7c4f737781", size = 32297128, upload_time = "2025-04-27T12:29:52.038Z" }, - { url = "https://files.pythonhosted.org/packages/31/fd/c565e5dcc906a3b471a83273039cb75cb79aad4a2d4a12f76cc5ae90a4b8/pyarrow-20.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ba3cf4182828be7a896cbd232aa8dd6a31bd1f9e32776cc3796c012855e1199", size = 41334890, upload_time = "2025-04-27T12:29:59.452Z" }, - { url = "https://files.pythonhosted.org/packages/af/a9/3bdd799e2c9b20c1ea6dc6fa8e83f29480a97711cf806e823f808c2316ac/pyarrow-20.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c3a01f313ffe27ac4126f4c2e5ea0f36a5fc6ab51f8726cf41fee4b256680bd", size = 42421775, upload_time = "2025-04-27T12:30:06.875Z" }, - { url = "https://files.pythonhosted.org/packages/10/f7/da98ccd86354c332f593218101ae56568d5dcedb460e342000bd89c49cc1/pyarrow-20.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:a2791f69ad72addd33510fec7bb14ee06c2a448e06b649e264c094c5b5f7ce28", size = 40687231, upload_time = "2025-04-27T12:30:13.954Z" }, - { url = "https://files.pythonhosted.org/packages/bb/1b/2168d6050e52ff1e6cefc61d600723870bf569cbf41d13db939c8cf97a16/pyarrow-20.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:4250e28a22302ce8692d3a0e8ec9d9dde54ec00d237cff4dfa9c1fbf79e472a8", size = 42295639, upload_time = "2025-04-27T12:30:21.949Z" }, - { url = "https://files.pythonhosted.org/packages/b2/66/2d976c0c7158fd25591c8ca55aee026e6d5745a021915a1835578707feb3/pyarrow-20.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:89e030dc58fc760e4010148e6ff164d2f44441490280ef1e97a542375e41058e", size = 42908549, upload_time = "2025-04-27T12:30:29.551Z" }, - { url = "https://files.pythonhosted.org/packages/31/a9/dfb999c2fc6911201dcbf348247f9cc382a8990f9ab45c12eabfd7243a38/pyarrow-20.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6102b4864d77102dbbb72965618e204e550135a940c2534711d5ffa787df2a5a", size = 44557216, upload_time = "2025-04-27T12:30:36.977Z" }, - { url = "https://files.pythonhosted.org/packages/a0/8e/9adee63dfa3911be2382fb4d92e4b2e7d82610f9d9f668493bebaa2af50f/pyarrow-20.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:96d6a0a37d9c98be08f5ed6a10831d88d52cac7b13f5287f1e0f625a0de8062b", size = 25660496, upload_time = "2025-04-27T12:30:42.809Z" }, - { url = "https://files.pythonhosted.org/packages/9b/aa/daa413b81446d20d4dad2944110dcf4cf4f4179ef7f685dd5a6d7570dc8e/pyarrow-20.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:a15532e77b94c61efadde86d10957950392999503b3616b2ffcef7621a002893", size = 30798501, upload_time = "2025-04-27T12:30:48.351Z" }, - { url = "https://files.pythonhosted.org/packages/ff/75/2303d1caa410925de902d32ac215dc80a7ce7dd8dfe95358c165f2adf107/pyarrow-20.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:dd43f58037443af715f34f1322c782ec463a3c8a94a85fdb2d987ceb5658e061", size = 32277895, upload_time = "2025-04-27T12:30:55.238Z" }, - { url = "https://files.pythonhosted.org/packages/92/41/fe18c7c0b38b20811b73d1bdd54b1fccba0dab0e51d2048878042d84afa8/pyarrow-20.0.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa0d288143a8585806e3cc7c39566407aab646fb9ece164609dac1cfff45f6ae", size = 41327322, upload_time = "2025-04-27T12:31:05.587Z" }, - { url = "https://files.pythonhosted.org/packages/da/ab/7dbf3d11db67c72dbf36ae63dcbc9f30b866c153b3a22ef728523943eee6/pyarrow-20.0.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b6953f0114f8d6f3d905d98e987d0924dabce59c3cda380bdfaa25a6201563b4", size = 42411441, upload_time = "2025-04-27T12:31:15.675Z" }, - { url = "https://files.pythonhosted.org/packages/90/c3/0c7da7b6dac863af75b64e2f827e4742161128c350bfe7955b426484e226/pyarrow-20.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:991f85b48a8a5e839b2128590ce07611fae48a904cae6cab1f089c5955b57eb5", size = 40677027, upload_time = "2025-04-27T12:31:24.631Z" }, - { url = "https://files.pythonhosted.org/packages/be/27/43a47fa0ff9053ab5203bb3faeec435d43c0d8bfa40179bfd076cdbd4e1c/pyarrow-20.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:97c8dc984ed09cb07d618d57d8d4b67a5100a30c3818c2fb0b04599f0da2de7b", size = 42281473, upload_time = "2025-04-27T12:31:31.311Z" }, - { url = "https://files.pythonhosted.org/packages/bc/0b/d56c63b078876da81bbb9ba695a596eabee9b085555ed12bf6eb3b7cab0e/pyarrow-20.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9b71daf534f4745818f96c214dbc1e6124d7daf059167330b610fc69b6f3d3e3", size = 42893897, upload_time = "2025-04-27T12:31:39.406Z" }, - { url = "https://files.pythonhosted.org/packages/92/ac/7d4bd020ba9145f354012838692d48300c1b8fe5634bfda886abcada67ed/pyarrow-20.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e8b88758f9303fa5a83d6c90e176714b2fd3852e776fc2d7e42a22dd6c2fb368", size = 44543847, upload_time = "2025-04-27T12:31:45.997Z" }, - { url = "https://files.pythonhosted.org/packages/9d/07/290f4abf9ca702c5df7b47739c1b2c83588641ddfa2cc75e34a301d42e55/pyarrow-20.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:30b3051b7975801c1e1d387e17c588d8ab05ced9b1e14eec57915f79869b5031", size = 25653219, upload_time = "2025-04-27T12:31:54.11Z" }, - { url = "https://files.pythonhosted.org/packages/95/df/720bb17704b10bd69dde086e1400b8eefb8f58df3f8ac9cff6c425bf57f1/pyarrow-20.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:ca151afa4f9b7bc45bcc791eb9a89e90a9eb2772767d0b1e5389609c7d03db63", size = 30853957, upload_time = "2025-04-27T12:31:59.215Z" }, - { url = "https://files.pythonhosted.org/packages/d9/72/0d5f875efc31baef742ba55a00a25213a19ea64d7176e0fe001c5d8b6e9a/pyarrow-20.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:4680f01ecd86e0dd63e39eb5cd59ef9ff24a9d166db328679e36c108dc993d4c", size = 32247972, upload_time = "2025-04-27T12:32:05.369Z" }, - { url = "https://files.pythonhosted.org/packages/d5/bc/e48b4fa544d2eea72f7844180eb77f83f2030b84c8dad860f199f94307ed/pyarrow-20.0.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7f4c8534e2ff059765647aa69b75d6543f9fef59e2cd4c6d18015192565d2b70", size = 41256434, upload_time = "2025-04-27T12:32:11.814Z" }, - { url = "https://files.pythonhosted.org/packages/c3/01/974043a29874aa2cf4f87fb07fd108828fc7362300265a2a64a94965e35b/pyarrow-20.0.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e1f8a47f4b4ae4c69c4d702cfbdfe4d41e18e5c7ef6f1bb1c50918c1e81c57b", size = 42353648, upload_time = "2025-04-27T12:32:20.766Z" }, - { url = "https://files.pythonhosted.org/packages/68/95/cc0d3634cde9ca69b0e51cbe830d8915ea32dda2157560dda27ff3b3337b/pyarrow-20.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:a1f60dc14658efaa927f8214734f6a01a806d7690be4b3232ba526836d216122", size = 40619853, upload_time = "2025-04-27T12:32:28.1Z" }, - { url = "https://files.pythonhosted.org/packages/29/c2/3ad40e07e96a3e74e7ed7cc8285aadfa84eb848a798c98ec0ad009eb6bcc/pyarrow-20.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:204a846dca751428991346976b914d6d2a82ae5b8316a6ed99789ebf976551e6", size = 42241743, upload_time = "2025-04-27T12:32:35.792Z" }, - { url = "https://files.pythonhosted.org/packages/eb/cb/65fa110b483339add6a9bc7b6373614166b14e20375d4daa73483755f830/pyarrow-20.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:f3b117b922af5e4c6b9a9115825726cac7d8b1421c37c2b5e24fbacc8930612c", size = 42839441, upload_time = "2025-04-27T12:32:46.64Z" }, - { url = "https://files.pythonhosted.org/packages/98/7b/f30b1954589243207d7a0fbc9997401044bf9a033eec78f6cb50da3f304a/pyarrow-20.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e724a3fd23ae5b9c010e7be857f4405ed5e679db5c93e66204db1a69f733936a", size = 44503279, upload_time = "2025-04-27T12:32:56.503Z" }, - { url = "https://files.pythonhosted.org/packages/37/40/ad395740cd641869a13bcf60851296c89624662575621968dcfafabaa7f6/pyarrow-20.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:82f1ee5133bd8f49d31be1299dc07f585136679666b502540db854968576faf9", size = 25944982, upload_time = "2025-04-27T12:33:04.72Z" }, -] - [[package]] name = "pyaudio" version = "0.2.14" @@ -2273,19 +2065,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6f/9a/e73262f6c6656262b5fdd723ad90f518f579b7bc8622e43a942eec53c938/pydantic_core-2.33.2-cp313-cp313t-win_amd64.whl", hash = "sha256:c2fc0a768ef76c15ab9238afa6da7f69895bb5d1ee83aeea2e3509af4472d0b9", size = 1935777, upload_time = "2025-04-23T18:32:25.088Z" }, ] -[[package]] -name = "pydeck" -version = "0.9.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "jinja2" }, - { name = "numpy" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/a1/ca/40e14e196864a0f61a92abb14d09b3d3da98f94ccb03b49cf51688140dab/pydeck-0.9.1.tar.gz", hash = "sha256:f74475ae637951d63f2ee58326757f8d4f9cd9f2a457cf42950715003e2cb605", size = 3832240, upload_time = "2024-05-10T15:36:21.153Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ab/4c/b888e6cf58bd9db9c93f40d1c6be8283ff49d88919231afe93a6bcf61626/pydeck-0.9.1-py2.py3-none-any.whl", hash = "sha256:b3f75ba0d273fc917094fa61224f3f6076ca8752b93d46faf3bcfd9f9d59b038", size = 6900403, upload_time = "2024-05-10T15:36:17.36Z" }, -] - [[package]] name = "pygments" version = "2.19.1" @@ -2537,20 +2316,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e4/52/f49b0aa96253010f57cf80315edecec4f469e7a39c1ed92bf727fa290e57/qdrant_client-1.14.2-py3-none-any.whl", hash = "sha256:7c283b1f0e71db9c21b85d898fb395791caca2a6d56ee751da96d797b001410c", size = 327691, upload_time = "2025-04-24T14:44:41.794Z" }, ] -[[package]] -name = "referencing" -version = "0.36.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "attrs" }, - { name = "rpds-py" }, - { name = "typing-extensions", marker = "python_full_version < '3.13'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/2f/db/98b5c277be99dd18bfd91dd04e1b759cad18d1a338188c936e92f921c7e2/referencing-0.36.2.tar.gz", hash = "sha256:df2e89862cd09deabbdba16944cc3f10feb6b3e6f18e902f7cc25609a34775aa", size = 74744, upload_time = "2025-01-25T08:48:16.138Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c1/b1/3baf80dc6d2b7bc27a95a67752d0208e410351e3feb4eb78de5f77454d8d/referencing-0.36.2-py3-none-any.whl", hash = "sha256:e8699adbbf8b5c7de96d8ffa0eb5c158b3beafce084968e2ea8bb08c6794dcd0", size = 26775, upload_time = "2025-01-25T08:48:14.241Z" }, -] - [[package]] name = "requests" version = "2.32.4" @@ -2591,55 +2356,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/19/71/39c7c0d87f8d4e6c020a393182060eaefeeae6c01dab6a84ec346f2567df/rich-13.9.4-py3-none-any.whl", hash = "sha256:6049d5e6ec054bf2779ab3358186963bac2ea89175919d699e378b99738c2a90", size = 242424, upload_time = "2024-11-01T16:43:55.817Z" }, ] -[[package]] -name = "rpds-py" -version = "0.25.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/8c/a6/60184b7fc00dd3ca80ac635dd5b8577d444c57e8e8742cecabfacb829921/rpds_py-0.25.1.tar.gz", hash = "sha256:8960b6dac09b62dac26e75d7e2c4a22efb835d827a7278c34f72b2b84fa160e3", size = 27304, upload_time = "2025-05-21T12:46:12.502Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7f/81/28ab0408391b1dc57393653b6a0cf2014cc282cc2909e4615e63e58262be/rpds_py-0.25.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:b5ffe453cde61f73fea9430223c81d29e2fbf412a6073951102146c84e19e34c", size = 364647, upload_time = "2025-05-21T12:43:28.559Z" }, - { url = "https://files.pythonhosted.org/packages/2c/9a/7797f04cad0d5e56310e1238434f71fc6939d0bc517192a18bb99a72a95f/rpds_py-0.25.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:115874ae5e2fdcfc16b2aedc95b5eef4aebe91b28e7e21951eda8a5dc0d3461b", size = 350454, upload_time = "2025-05-21T12:43:30.615Z" }, - { url = "https://files.pythonhosted.org/packages/69/3c/93d2ef941b04898011e5d6eaa56a1acf46a3b4c9f4b3ad1bbcbafa0bee1f/rpds_py-0.25.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a714bf6e5e81b0e570d01f56e0c89c6375101b8463999ead3a93a5d2a4af91fa", size = 389665, upload_time = "2025-05-21T12:43:32.629Z" }, - { url = "https://files.pythonhosted.org/packages/c1/57/ad0e31e928751dde8903a11102559628d24173428a0f85e25e187defb2c1/rpds_py-0.25.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:35634369325906bcd01577da4c19e3b9541a15e99f31e91a02d010816b49bfda", size = 403873, upload_time = "2025-05-21T12:43:34.576Z" }, - { url = "https://files.pythonhosted.org/packages/16/ad/c0c652fa9bba778b4f54980a02962748479dc09632e1fd34e5282cf2556c/rpds_py-0.25.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d4cb2b3ddc16710548801c6fcc0cfcdeeff9dafbc983f77265877793f2660309", size = 525866, upload_time = "2025-05-21T12:43:36.123Z" }, - { url = "https://files.pythonhosted.org/packages/2a/39/3e1839bc527e6fcf48d5fec4770070f872cdee6c6fbc9b259932f4e88a38/rpds_py-0.25.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9ceca1cf097ed77e1a51f1dbc8d174d10cb5931c188a4505ff9f3e119dfe519b", size = 416886, upload_time = "2025-05-21T12:43:38.034Z" }, - { url = "https://files.pythonhosted.org/packages/7a/95/dd6b91cd4560da41df9d7030a038298a67d24f8ca38e150562644c829c48/rpds_py-0.25.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c2cd1a4b0c2b8c5e31ffff50d09f39906fe351389ba143c195566056c13a7ea", size = 390666, upload_time = "2025-05-21T12:43:40.065Z" }, - { url = "https://files.pythonhosted.org/packages/64/48/1be88a820e7494ce0a15c2d390ccb7c52212370badabf128e6a7bb4cb802/rpds_py-0.25.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1de336a4b164c9188cb23f3703adb74a7623ab32d20090d0e9bf499a2203ad65", size = 425109, upload_time = "2025-05-21T12:43:42.263Z" }, - { url = "https://files.pythonhosted.org/packages/cf/07/3e2a17927ef6d7720b9949ec1b37d1e963b829ad0387f7af18d923d5cfa5/rpds_py-0.25.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9fca84a15333e925dd59ce01da0ffe2ffe0d6e5d29a9eeba2148916d1824948c", size = 567244, upload_time = "2025-05-21T12:43:43.846Z" }, - { url = "https://files.pythonhosted.org/packages/d2/e5/76cf010998deccc4f95305d827847e2eae9c568099c06b405cf96384762b/rpds_py-0.25.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:88ec04afe0c59fa64e2f6ea0dd9657e04fc83e38de90f6de201954b4d4eb59bd", size = 596023, upload_time = "2025-05-21T12:43:45.932Z" }, - { url = "https://files.pythonhosted.org/packages/52/9a/df55efd84403736ba37a5a6377b70aad0fd1cb469a9109ee8a1e21299a1c/rpds_py-0.25.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a8bd2f19e312ce3e1d2c635618e8a8d8132892bb746a7cf74780a489f0f6cdcb", size = 561634, upload_time = "2025-05-21T12:43:48.263Z" }, - { url = "https://files.pythonhosted.org/packages/ab/aa/dc3620dd8db84454aaf9374bd318f1aa02578bba5e567f5bf6b79492aca4/rpds_py-0.25.1-cp312-cp312-win32.whl", hash = "sha256:e5e2f7280d8d0d3ef06f3ec1b4fd598d386cc6f0721e54f09109a8132182fbfe", size = 222713, upload_time = "2025-05-21T12:43:49.897Z" }, - { url = "https://files.pythonhosted.org/packages/a3/7f/7cef485269a50ed5b4e9bae145f512d2a111ca638ae70cc101f661b4defd/rpds_py-0.25.1-cp312-cp312-win_amd64.whl", hash = "sha256:db58483f71c5db67d643857404da360dce3573031586034b7d59f245144cc192", size = 235280, upload_time = "2025-05-21T12:43:51.893Z" }, - { url = "https://files.pythonhosted.org/packages/99/f2/c2d64f6564f32af913bf5f3f7ae41c7c263c5ae4c4e8f1a17af8af66cd46/rpds_py-0.25.1-cp312-cp312-win_arm64.whl", hash = "sha256:6d50841c425d16faf3206ddbba44c21aa3310a0cebc3c1cdfc3e3f4f9f6f5728", size = 225399, upload_time = "2025-05-21T12:43:53.351Z" }, - { url = "https://files.pythonhosted.org/packages/2b/da/323848a2b62abe6a0fec16ebe199dc6889c5d0a332458da8985b2980dffe/rpds_py-0.25.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:659d87430a8c8c704d52d094f5ba6fa72ef13b4d385b7e542a08fc240cb4a559", size = 364498, upload_time = "2025-05-21T12:43:54.841Z" }, - { url = "https://files.pythonhosted.org/packages/1f/b4/4d3820f731c80fd0cd823b3e95b9963fec681ae45ba35b5281a42382c67d/rpds_py-0.25.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:68f6f060f0bbdfb0245267da014d3a6da9be127fe3e8cc4a68c6f833f8a23bb1", size = 350083, upload_time = "2025-05-21T12:43:56.428Z" }, - { url = "https://files.pythonhosted.org/packages/d5/b1/3a8ee1c9d480e8493619a437dec685d005f706b69253286f50f498cbdbcf/rpds_py-0.25.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:083a9513a33e0b92cf6e7a6366036c6bb43ea595332c1ab5c8ae329e4bcc0a9c", size = 389023, upload_time = "2025-05-21T12:43:57.995Z" }, - { url = "https://files.pythonhosted.org/packages/3b/31/17293edcfc934dc62c3bf74a0cb449ecd549531f956b72287203e6880b87/rpds_py-0.25.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:816568614ecb22b18a010c7a12559c19f6fe993526af88e95a76d5a60b8b75fb", size = 403283, upload_time = "2025-05-21T12:43:59.546Z" }, - { url = "https://files.pythonhosted.org/packages/d1/ca/e0f0bc1a75a8925024f343258c8ecbd8828f8997ea2ac71e02f67b6f5299/rpds_py-0.25.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3c6564c0947a7f52e4792983f8e6cf9bac140438ebf81f527a21d944f2fd0a40", size = 524634, upload_time = "2025-05-21T12:44:01.087Z" }, - { url = "https://files.pythonhosted.org/packages/3e/03/5d0be919037178fff33a6672ffc0afa04ea1cfcb61afd4119d1b5280ff0f/rpds_py-0.25.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5c4a128527fe415d73cf1f70a9a688d06130d5810be69f3b553bf7b45e8acf79", size = 416233, upload_time = "2025-05-21T12:44:02.604Z" }, - { url = "https://files.pythonhosted.org/packages/05/7c/8abb70f9017a231c6c961a8941403ed6557664c0913e1bf413cbdc039e75/rpds_py-0.25.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a49e1d7a4978ed554f095430b89ecc23f42014a50ac385eb0c4d163ce213c325", size = 390375, upload_time = "2025-05-21T12:44:04.162Z" }, - { url = "https://files.pythonhosted.org/packages/7a/ac/a87f339f0e066b9535074a9f403b9313fd3892d4a164d5d5f5875ac9f29f/rpds_py-0.25.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d74ec9bc0e2feb81d3f16946b005748119c0f52a153f6db6a29e8cd68636f295", size = 424537, upload_time = "2025-05-21T12:44:06.175Z" }, - { url = "https://files.pythonhosted.org/packages/1f/8f/8d5c1567eaf8c8afe98a838dd24de5013ce6e8f53a01bd47fe8bb06b5533/rpds_py-0.25.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:3af5b4cc10fa41e5bc64e5c198a1b2d2864337f8fcbb9a67e747e34002ce812b", size = 566425, upload_time = "2025-05-21T12:44:08.242Z" }, - { url = "https://files.pythonhosted.org/packages/95/33/03016a6be5663b389c8ab0bbbcca68d9e96af14faeff0a04affcb587e776/rpds_py-0.25.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:79dc317a5f1c51fd9c6a0c4f48209c6b8526d0524a6904fc1076476e79b00f98", size = 595197, upload_time = "2025-05-21T12:44:10.449Z" }, - { url = "https://files.pythonhosted.org/packages/33/8d/da9f4d3e208c82fda311bff0cf0a19579afceb77cf456e46c559a1c075ba/rpds_py-0.25.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1521031351865e0181bc585147624d66b3b00a84109b57fcb7a779c3ec3772cd", size = 561244, upload_time = "2025-05-21T12:44:12.387Z" }, - { url = "https://files.pythonhosted.org/packages/e2/b3/39d5dcf7c5f742ecd6dbc88f6f84ae54184b92f5f387a4053be2107b17f1/rpds_py-0.25.1-cp313-cp313-win32.whl", hash = "sha256:5d473be2b13600b93a5675d78f59e63b51b1ba2d0476893415dfbb5477e65b31", size = 222254, upload_time = "2025-05-21T12:44:14.261Z" }, - { url = "https://files.pythonhosted.org/packages/5f/19/2d6772c8eeb8302c5f834e6d0dfd83935a884e7c5ce16340c7eaf89ce925/rpds_py-0.25.1-cp313-cp313-win_amd64.whl", hash = "sha256:a7b74e92a3b212390bdce1d93da9f6488c3878c1d434c5e751cbc202c5e09500", size = 234741, upload_time = "2025-05-21T12:44:16.236Z" }, - { url = "https://files.pythonhosted.org/packages/5b/5a/145ada26cfaf86018d0eb304fe55eafdd4f0b6b84530246bb4a7c4fb5c4b/rpds_py-0.25.1-cp313-cp313-win_arm64.whl", hash = "sha256:dd326a81afe332ede08eb39ab75b301d5676802cdffd3a8f287a5f0b694dc3f5", size = 224830, upload_time = "2025-05-21T12:44:17.749Z" }, - { url = "https://files.pythonhosted.org/packages/4b/ca/d435844829c384fd2c22754ff65889c5c556a675d2ed9eb0e148435c6690/rpds_py-0.25.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:a58d1ed49a94d4183483a3ce0af22f20318d4a1434acee255d683ad90bf78129", size = 359668, upload_time = "2025-05-21T12:44:19.322Z" }, - { url = "https://files.pythonhosted.org/packages/1f/01/b056f21db3a09f89410d493d2f6614d87bb162499f98b649d1dbd2a81988/rpds_py-0.25.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f251bf23deb8332823aef1da169d5d89fa84c89f67bdfb566c49dea1fccfd50d", size = 345649, upload_time = "2025-05-21T12:44:20.962Z" }, - { url = "https://files.pythonhosted.org/packages/e0/0f/e0d00dc991e3d40e03ca36383b44995126c36b3eafa0ccbbd19664709c88/rpds_py-0.25.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8dbd586bfa270c1103ece2109314dd423df1fa3d9719928b5d09e4840cec0d72", size = 384776, upload_time = "2025-05-21T12:44:22.516Z" }, - { url = "https://files.pythonhosted.org/packages/9f/a2/59374837f105f2ca79bde3c3cd1065b2f8c01678900924949f6392eab66d/rpds_py-0.25.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6d273f136e912aa101a9274c3145dcbddbe4bac560e77e6d5b3c9f6e0ed06d34", size = 395131, upload_time = "2025-05-21T12:44:24.147Z" }, - { url = "https://files.pythonhosted.org/packages/9c/dc/48e8d84887627a0fe0bac53f0b4631e90976fd5d35fff8be66b8e4f3916b/rpds_py-0.25.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:666fa7b1bd0a3810a7f18f6d3a25ccd8866291fbbc3c9b912b917a6715874bb9", size = 520942, upload_time = "2025-05-21T12:44:25.915Z" }, - { url = "https://files.pythonhosted.org/packages/7c/f5/ee056966aeae401913d37befeeab57a4a43a4f00099e0a20297f17b8f00c/rpds_py-0.25.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:921954d7fbf3fccc7de8f717799304b14b6d9a45bbeec5a8d7408ccbf531faf5", size = 411330, upload_time = "2025-05-21T12:44:27.638Z" }, - { url = "https://files.pythonhosted.org/packages/ab/74/b2cffb46a097cefe5d17f94ede7a174184b9d158a0aeb195f39f2c0361e8/rpds_py-0.25.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3d86373ff19ca0441ebeb696ef64cb58b8b5cbacffcda5a0ec2f3911732a194", size = 387339, upload_time = "2025-05-21T12:44:29.292Z" }, - { url = "https://files.pythonhosted.org/packages/7f/9a/0ff0b375dcb5161c2b7054e7d0b7575f1680127505945f5cabaac890bc07/rpds_py-0.25.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c8980cde3bb8575e7c956a530f2c217c1d6aac453474bf3ea0f9c89868b531b6", size = 418077, upload_time = "2025-05-21T12:44:30.877Z" }, - { url = "https://files.pythonhosted.org/packages/0d/a1/fda629bf20d6b698ae84c7c840cfb0e9e4200f664fc96e1f456f00e4ad6e/rpds_py-0.25.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:8eb8c84ecea987a2523e057c0d950bcb3f789696c0499290b8d7b3107a719d78", size = 562441, upload_time = "2025-05-21T12:44:32.541Z" }, - { url = "https://files.pythonhosted.org/packages/20/15/ce4b5257f654132f326f4acd87268e1006cc071e2c59794c5bdf4bebbb51/rpds_py-0.25.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:e43a005671a9ed5a650f3bc39e4dbccd6d4326b24fb5ea8be5f3a43a6f576c72", size = 590750, upload_time = "2025-05-21T12:44:34.557Z" }, - { url = "https://files.pythonhosted.org/packages/fb/ab/e04bf58a8d375aeedb5268edcc835c6a660ebf79d4384d8e0889439448b0/rpds_py-0.25.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:58f77c60956501a4a627749a6dcb78dac522f249dd96b5c9f1c6af29bfacfb66", size = 558891, upload_time = "2025-05-21T12:44:37.358Z" }, - { url = "https://files.pythonhosted.org/packages/90/82/cb8c6028a6ef6cd2b7991e2e4ced01c854b6236ecf51e81b64b569c43d73/rpds_py-0.25.1-cp313-cp313t-win32.whl", hash = "sha256:2cb9e5b5e26fc02c8a4345048cd9998c2aca7c2712bd1b36da0c72ee969a3523", size = 218718, upload_time = "2025-05-21T12:44:38.969Z" }, - { url = "https://files.pythonhosted.org/packages/b6/97/5a4b59697111c89477d20ba8a44df9ca16b41e737fa569d5ae8bff99e650/rpds_py-0.25.1-cp313-cp313t-win_amd64.whl", hash = "sha256:401ca1c4a20cc0510d3435d89c069fe0a9ae2ee6495135ac46bdd49ec0495763", size = 232218, upload_time = "2025-05-21T12:44:40.512Z" }, -] - [[package]] name = "scipy" version = "1.15.3" @@ -2717,15 +2433,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/08/5b/a2a3d4514c64818925f4e886d39981f1926eeb5288a4549c6b3c17ed66bb/smart_open-7.3.0.post1-py3-none-any.whl", hash = "sha256:c73661a2c24bf045c1e04e08fffc585b59af023fe783d57896f590489db66fb4", size = 61946, upload_time = "2025-07-03T10:06:29.599Z" }, ] -[[package]] -name = "smmap" -version = "5.0.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/44/cd/a040c4b3119bbe532e5b0732286f805445375489fceaec1f48306068ee3b/smmap-5.0.2.tar.gz", hash = "sha256:26ea65a03958fa0c8a1c7e8c7a58fdc77221b8910f6be2131affade476898ad5", size = 22329, upload_time = "2025-01-02T07:14:40.909Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/04/be/d09147ad1ec7934636ad912901c5fd7667e1c858e19d355237db0d0cd5e4/smmap-5.0.2-py3-none-any.whl", hash = "sha256:b30115f0def7d7531d22a0fb6502488d879e75b260a9db4d0819cfb25403af5e", size = 24303, upload_time = "2025-01-02T07:14:38.724Z" }, -] - [[package]] name = "sniffio" version = "1.3.1" @@ -2878,35 +2585,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8b/0c/9d30a4ebeb6db2b25a841afbb80f6ef9a854fc3b41be131d249a977b4959/starlette-0.46.2-py3-none-any.whl", hash = "sha256:595633ce89f8ffa71a015caed34a5b2dc1c0cdb3f0f1fbd1e69339cf2abeec35", size = 72037, upload_time = "2025-04-13T13:56:16.21Z" }, ] -[[package]] -name = "streamlit" -version = "1.45.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "altair" }, - { name = "blinker" }, - { name = "cachetools" }, - { name = "click" }, - { name = "gitpython" }, - { name = "numpy" }, - { name = "packaging" }, - { name = "pandas" }, - { name = "pillow" }, - { name = "protobuf" }, - { name = "pyarrow" }, - { name = "pydeck" }, - { name = "requests" }, - { name = "tenacity" }, - { name = "toml" }, - { name = "tornado" }, - { name = "typing-extensions" }, - { name = "watchdog", marker = "sys_platform != 'darwin'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/f0/46/9b3f73886f82d27849ce1e7a74ae7c39f5323e46da0b6e8847ad4c25f44c/streamlit-1.45.1.tar.gz", hash = "sha256:e37d56c0af5240dbc240976880e81366689c290a559376417246f9b3f51b4217", size = 9463953, upload_time = "2025-05-12T20:40:30.562Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/13/e6/69fcbae3dd2fcb2f54283a7cbe03c8b944b79997f1b526984f91d4796a02/streamlit-1.45.1-py3-none-any.whl", hash = "sha256:9ab6951585e9444672dd650850f81767b01bba5d87c8dac9bc2e1c859d6cc254", size = 9856294, upload_time = "2025-05-12T20:40:27.875Z" }, -] - [[package]] name = "tenacity" version = "9.1.2" @@ -2961,34 +2639,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f0/26/f77ef4bd174bfeac491237a4ca3f74ba2ee2f672004f76cff90f8407a489/thinc-8.3.6-cp313-cp313-win_amd64.whl", hash = "sha256:ddd7041946a427f6a9b0b49419353d02ad7eb43fe16724bfcc3bdeb9562040b1", size = 1746883, upload_time = "2025-04-04T11:50:33.038Z" }, ] -[[package]] -name = "toml" -version = "0.10.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/be/ba/1f744cdc819428fc6b5084ec34d9b30660f6f9daaf70eead706e3203ec3c/toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f", size = 22253, upload_time = "2020-11-01T01:40:22.204Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/44/6f/7120676b6d73228c96e17f1f794d8ab046fc910d781c8d151120c3f1569e/toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", size = 16588, upload_time = "2020-11-01T01:40:20.672Z" }, -] - -[[package]] -name = "tornado" -version = "6.5.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/51/89/c72771c81d25d53fe33e3dca61c233b665b2780f21820ba6fd2c6793c12b/tornado-6.5.1.tar.gz", hash = "sha256:84ceece391e8eb9b2b95578db65e920d2a61070260594819589609ba9bc6308c", size = 509934, upload_time = "2025-05-22T18:15:38.788Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/77/89/f4532dee6843c9e0ebc4e28d4be04c67f54f60813e4bf73d595fe7567452/tornado-6.5.1-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d50065ba7fd11d3bd41bcad0825227cc9a95154bad83239357094c36708001f7", size = 441948, upload_time = "2025-05-22T18:15:20.862Z" }, - { url = "https://files.pythonhosted.org/packages/15/9a/557406b62cffa395d18772e0cdcf03bed2fff03b374677348eef9f6a3792/tornado-6.5.1-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:9e9ca370f717997cb85606d074b0e5b247282cf5e2e1611568b8821afe0342d6", size = 440112, upload_time = "2025-05-22T18:15:22.591Z" }, - { url = "https://files.pythonhosted.org/packages/55/82/7721b7319013a3cf881f4dffa4f60ceff07b31b394e459984e7a36dc99ec/tornado-6.5.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b77e9dfa7ed69754a54c89d82ef746398be82f749df69c4d3abe75c4d1ff4888", size = 443672, upload_time = "2025-05-22T18:15:24.027Z" }, - { url = "https://files.pythonhosted.org/packages/7d/42/d11c4376e7d101171b94e03cef0cbce43e823ed6567ceda571f54cf6e3ce/tornado-6.5.1-cp39-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:253b76040ee3bab8bcf7ba9feb136436a3787208717a1fb9f2c16b744fba7331", size = 443019, upload_time = "2025-05-22T18:15:25.735Z" }, - { url = "https://files.pythonhosted.org/packages/7d/f7/0c48ba992d875521ac761e6e04b0a1750f8150ae42ea26df1852d6a98942/tornado-6.5.1-cp39-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:308473f4cc5a76227157cdf904de33ac268af770b2c5f05ca6c1161d82fdd95e", size = 443252, upload_time = "2025-05-22T18:15:27.499Z" }, - { url = "https://files.pythonhosted.org/packages/89/46/d8d7413d11987e316df4ad42e16023cd62666a3c0dfa1518ffa30b8df06c/tornado-6.5.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:caec6314ce8a81cf69bd89909f4b633b9f523834dc1a352021775d45e51d9401", size = 443930, upload_time = "2025-05-22T18:15:29.299Z" }, - { url = "https://files.pythonhosted.org/packages/78/b2/f8049221c96a06df89bed68260e8ca94beca5ea532ffc63b1175ad31f9cc/tornado-6.5.1-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:13ce6e3396c24e2808774741331638ee6c2f50b114b97a55c5b442df65fd9692", size = 443351, upload_time = "2025-05-22T18:15:31.038Z" }, - { url = "https://files.pythonhosted.org/packages/76/ff/6a0079e65b326cc222a54720a748e04a4db246870c4da54ece4577bfa702/tornado-6.5.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:5cae6145f4cdf5ab24744526cc0f55a17d76f02c98f4cff9daa08ae9a217448a", size = 443328, upload_time = "2025-05-22T18:15:32.426Z" }, - { url = "https://files.pythonhosted.org/packages/49/18/e3f902a1d21f14035b5bc6246a8c0f51e0eef562ace3a2cea403c1fb7021/tornado-6.5.1-cp39-abi3-win32.whl", hash = "sha256:e0a36e1bc684dca10b1aa75a31df8bdfed656831489bc1e6a6ebed05dc1ec365", size = 444396, upload_time = "2025-05-22T18:15:34.205Z" }, - { url = "https://files.pythonhosted.org/packages/7b/09/6526e32bf1049ee7de3bebba81572673b19a2a8541f795d887e92af1a8bc/tornado-6.5.1-cp39-abi3-win_amd64.whl", hash = "sha256:908e7d64567cecd4c2b458075589a775063453aeb1d2a1853eedb806922f568b", size = 444840, upload_time = "2025-05-22T18:15:36.1Z" }, - { url = "https://files.pythonhosted.org/packages/55/a7/535c44c7bea4578e48281d83c615219f3ab19e6abc67625ef637c73987be/tornado-6.5.1-cp39-abi3-win_arm64.whl", hash = "sha256:02420a0eb7bf617257b9935e2b754d1b63897525d8a289c9d65690d580b4dcf7", size = 443596, upload_time = "2025-05-22T18:15:37.433Z" }, -] - [[package]] name = "tqdm" version = "4.67.1" @@ -3059,15 +2709,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/17/69/cd203477f944c353c31bade965f880aa1061fd6bf05ded0726ca845b6ff7/typing_inspection-0.4.1-py3-none-any.whl", hash = "sha256:389055682238f53b04f7badcb49b989835495a96700ced5dab2d8feae4b26f51", size = 14552, upload_time = "2025-05-21T18:55:22.152Z" }, ] -[[package]] -name = "tzdata" -version = "2025.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/95/32/1a225d6164441be760d75c2c42e2780dc0873fe382da3e98a2e1e48361e5/tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9", size = 196380, upload_time = "2025-03-23T13:54:43.652Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839, upload_time = "2025-03-23T13:54:41.845Z" }, -] - [[package]] name = "urllib3" version = "2.4.0" @@ -3141,24 +2782,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/06/7c/34330a89da55610daa5f245ddce5aab81244321101614751e7537f125133/wasabi-1.1.3-py3-none-any.whl", hash = "sha256:f76e16e8f7e79f8c4c8be49b4024ac725713ab10cd7f19350ad18a8e3f71728c", size = 27880, upload_time = "2024-05-31T16:56:16.699Z" }, ] -[[package]] -name = "watchdog" -version = "6.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/db/7d/7f3d619e951c88ed75c6037b246ddcf2d322812ee8ea189be89511721d54/watchdog-6.0.0.tar.gz", hash = "sha256:9ddf7c82fda3ae8e24decda1338ede66e1c99883db93711d8fb941eaa2d8c282", size = 131220, upload_time = "2024-11-01T14:07:13.037Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a9/c7/ca4bf3e518cb57a686b2feb4f55a1892fd9a3dd13f470fca14e00f80ea36/watchdog-6.0.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7607498efa04a3542ae3e05e64da8202e58159aa1fa4acddf7678d34a35d4f13", size = 79079, upload_time = "2024-11-01T14:06:59.472Z" }, - { url = "https://files.pythonhosted.org/packages/5c/51/d46dc9332f9a647593c947b4b88e2381c8dfc0942d15b8edc0310fa4abb1/watchdog-6.0.0-py3-none-manylinux2014_armv7l.whl", hash = "sha256:9041567ee8953024c83343288ccc458fd0a2d811d6a0fd68c4c22609e3490379", size = 79078, upload_time = "2024-11-01T14:07:01.431Z" }, - { url = "https://files.pythonhosted.org/packages/d4/57/04edbf5e169cd318d5f07b4766fee38e825d64b6913ca157ca32d1a42267/watchdog-6.0.0-py3-none-manylinux2014_i686.whl", hash = "sha256:82dc3e3143c7e38ec49d61af98d6558288c415eac98486a5c581726e0737c00e", size = 79076, upload_time = "2024-11-01T14:07:02.568Z" }, - { url = "https://files.pythonhosted.org/packages/ab/cc/da8422b300e13cb187d2203f20b9253e91058aaf7db65b74142013478e66/watchdog-6.0.0-py3-none-manylinux2014_ppc64.whl", hash = "sha256:212ac9b8bf1161dc91bd09c048048a95ca3a4c4f5e5d4a7d1b1a7d5752a7f96f", size = 79077, upload_time = "2024-11-01T14:07:03.893Z" }, - { url = "https://files.pythonhosted.org/packages/2c/3b/b8964e04ae1a025c44ba8e4291f86e97fac443bca31de8bd98d3263d2fcf/watchdog-6.0.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:e3df4cbb9a450c6d49318f6d14f4bbc80d763fa587ba46ec86f99f9e6876bb26", size = 79078, upload_time = "2024-11-01T14:07:05.189Z" }, - { url = "https://files.pythonhosted.org/packages/62/ae/a696eb424bedff7407801c257d4b1afda455fe40821a2be430e173660e81/watchdog-6.0.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:2cce7cfc2008eb51feb6aab51251fd79b85d9894e98ba847408f662b3395ca3c", size = 79077, upload_time = "2024-11-01T14:07:06.376Z" }, - { url = "https://files.pythonhosted.org/packages/b5/e8/dbf020b4d98251a9860752a094d09a65e1b436ad181faf929983f697048f/watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:20ffe5b202af80ab4266dcd3e91aae72bf2da48c0d33bdb15c66658e685e94e2", size = 79078, upload_time = "2024-11-01T14:07:07.547Z" }, - { url = "https://files.pythonhosted.org/packages/07/f6/d0e5b343768e8bcb4cda79f0f2f55051bf26177ecd5651f84c07567461cf/watchdog-6.0.0-py3-none-win32.whl", hash = "sha256:07df1fdd701c5d4c8e55ef6cf55b8f0120fe1aef7ef39a1c6fc6bc2e606d517a", size = 79065, upload_time = "2024-11-01T14:07:09.525Z" }, - { url = "https://files.pythonhosted.org/packages/db/d9/c495884c6e548fce18a8f40568ff120bc3a4b7b99813081c8ac0c936fa64/watchdog-6.0.0-py3-none-win_amd64.whl", hash = "sha256:cbafb470cf848d93b5d013e2ecb245d4aa1c8fd0504e863ccefa32445359d680", size = 79070, upload_time = "2024-11-01T14:07:10.686Z" }, - { url = "https://files.pythonhosted.org/packages/33/e8/e40370e6d74ddba47f002a32919d91310d6074130fe4e17dabcafc15cbf1/watchdog-6.0.0-py3-none-win_ia64.whl", hash = "sha256:a1914259fa9e1454315171103c6a30961236f508b9b623eae470268bbcc6a22f", size = 79067, upload_time = "2024-11-01T14:07:11.845Z" }, -] - [[package]] name = "weasel" version = "0.4.1" diff --git a/backends/advanced/webui/src/pages/Conversations.tsx b/backends/advanced/webui/src/pages/Conversations.tsx index cfb30375..79698be3 100644 --- a/backends/advanced/webui/src/pages/Conversations.tsx +++ b/backends/advanced/webui/src/pages/Conversations.tsx @@ -1,5 +1,5 @@ import { useState, useEffect, useRef } from 'react' -import { MessageSquare, RefreshCw, Calendar, User, Play, Pause, ChevronDown, ChevronUp } from 'lucide-react' +import { MessageSquare, RefreshCw, Calendar, User, Play, Pause, MoreVertical, RotateCcw, Zap, ChevronDown, ChevronUp, Trash2 } from 'lucide-react' import { conversationsApi, BACKEND_URL } from '../services/api' interface Conversation { @@ -52,12 +52,17 @@ export default function Conversations() { // Transcript expand/collapse state const [expandedTranscripts, setExpandedTranscripts] = useState>(new Set()) - // Audio playback state const [playingSegment, setPlayingSegment] = useState(null) // Format: "audioUuid-segmentIndex" const audioRefs = useRef<{ [key: string]: HTMLAudioElement }>({}) const segmentTimerRef = useRef(null) + // Reprocessing state + const [openDropdown, setOpenDropdown] = useState(null) + const [reprocessingTranscript, setReprocessingTranscript] = useState>(new Set()) + const [reprocessingMemory, setReprocessingMemory] = useState>(new Set()) + const [deletingConversation, setDeletingConversation] = useState>(new Set()) + const loadConversations = async () => { try { setLoading(true) @@ -80,6 +85,13 @@ export default function Conversations() { loadConversations() }, []) + // Close dropdown when clicking outside + useEffect(() => { + const handleClickOutside = () => setOpenDropdown(null) + document.addEventListener('click', handleClickOutside) + return () => document.removeEventListener('click', handleClickOutside) + }, []) + const formatDate = (timestamp: number) => { return new Date(timestamp * 1000).toLocaleString() } @@ -91,6 +103,96 @@ export default function Conversations() { return `${minutes}:${seconds.toString().padStart(2, '0')}` } + const handleReprocessTranscript = async (conversation: Conversation) => { + try { + if (!conversation.conversation_id) { + setError('Cannot reprocess transcript: Conversation ID is missing. This conversation may be from an older format.') + return + } + + setReprocessingTranscript(prev => new Set(prev).add(conversation.conversation_id!)) + setOpenDropdown(null) + + const response = await conversationsApi.reprocessTranscript(conversation.conversation_id) + + if (response.status === 200) { + // Refresh conversations to show updated data + await loadConversations() + } else { + setError(`Failed to start transcript reprocessing: ${response.data?.error || 'Unknown error'}`) + } + } catch (err: any) { + setError(`Error starting transcript reprocessing: ${err.message || 'Unknown error'}`) + } finally { + if (conversation.conversation_id) { + setReprocessingTranscript(prev => { + const newSet = new Set(prev) + newSet.delete(conversation.conversation_id!) + return newSet + }) + } + } + } + + const handleReprocessMemory = async (conversation: Conversation, transcriptVersionId?: string) => { + try { + if (!conversation.conversation_id) { + setError('Cannot reprocess memory: Conversation ID is missing. This conversation may be from an older format.') + return + } + + setReprocessingMemory(prev => new Set(prev).add(conversation.conversation_id!)) + setOpenDropdown(null) + + // For now, use active transcript version. In future, this could be selected from UI + const response = await conversationsApi.reprocessMemory(conversation.conversation_id, transcriptVersionId || 'active') + + if (response.status === 200) { + // Refresh conversations to show updated data + await loadConversations() + } else { + setError(`Failed to start memory reprocessing: ${response.data?.error || 'Unknown error'}`) + } + } catch (err: any) { + setError(`Error starting memory reprocessing: ${err.message || 'Unknown error'}`) + } finally { + if (conversation.conversation_id) { + setReprocessingMemory(prev => { + const newSet = new Set(prev) + newSet.delete(conversation.conversation_id!) + return newSet + }) + } + } + } + + const handleDeleteConversation = async (audioUuid: string) => { + try { + const confirmed = window.confirm('Are you sure you want to delete this conversation? This action cannot be undone.') + if (!confirmed) return + + setDeletingConversation(prev => new Set(prev).add(audioUuid)) + setOpenDropdown(null) + + const response = await conversationsApi.delete(audioUuid) + + if (response.status === 200) { + // Refresh conversations to show updated data + await loadConversations() + } else { + setError(`Failed to delete conversation: ${response.data?.error || 'Unknown error'}`) + } + } catch (err: any) { + setError(`Error deleting conversation: ${err.message || 'Unknown error'}`) + } finally { + setDeletingConversation(prev => { + const newSet = new Set(prev) + newSet.delete(audioUuid) + return newSet + }) + } + } + const toggleTranscriptExpansion = (audioUuid: string) => { setExpandedTranscripts(prev => { const newSet = new Set(prev) @@ -275,8 +377,73 @@ export default function Conversations() { )} - -{/* Audio Player */} + + {/* Hamburger Menu */} +
+ + + {/* Dropdown Menu */} + {openDropdown === conversation.audio_uuid && ( +
+ + +
+ +
+ )} +
+ + + {/* Audio Player */} +
{(conversation.audio_path || conversation.cropped_audio_path) && ( <> @@ -395,8 +562,8 @@ export default function Conversations() {
{debugMode && ( - - [{formatDuration(segment.start, segment.end)}] + + [start: {segment.start.toFixed(1)}s, end: {segment.end.toFixed(1)}s, duration: {formatDuration(segment.start, segment.end)}] )} @@ -443,6 +610,7 @@ export default function Conversations() {

🔧 Debug Info:

+
Conversation ID: {conversation.conversation_id || 'N/A'}
Audio UUID: {conversation.audio_uuid}
Original Audio: {conversation.audio_path || 'N/A'}
Cropped Audio: {conversation.cropped_audio_path || 'N/A'}
diff --git a/backends/advanced/webui/src/services/api.ts b/backends/advanced/webui/src/services/api.ts index de3e2831..5c9d82f0 100644 --- a/backends/advanced/webui/src/services/api.ts +++ b/backends/advanced/webui/src/services/api.ts @@ -74,6 +74,17 @@ export const conversationsApi = { getAll: () => api.get('/api/conversations'), getById: (id: string) => api.get(`/api/conversations/${id}`), delete: (id: string) => api.delete(`/api/conversations/${id}`), + + // Reprocessing endpoints + reprocessTranscript: (conversationId: string) => api.post(`/api/conversations/${conversationId}/reprocess-transcript`), + reprocessMemory: (conversationId: string, transcriptVersionId: string = 'active') => api.post(`/api/conversations/${conversationId}/reprocess-memory`, null, { + params: { transcript_version_id: transcriptVersionId } + }), + + // Version management + activateTranscriptVersion: (conversationId: string, versionId: string) => api.post(`/api/conversations/${conversationId}/activate-transcript/${versionId}`), + activateMemoryVersion: (conversationId: string, versionId: string) => api.post(`/api/conversations/${conversationId}/activate-memory/${versionId}`), + getVersionHistory: (conversationId: string) => api.get(`/api/conversations/${conversationId}/versions`), } export const memoriesApi = { diff --git a/extras/asr-services/enhanced_chunking.py b/extras/asr-services/enhanced_chunking.py index f35e04a0..fab9d927 100644 --- a/extras/asr-services/enhanced_chunking.py +++ b/extras/asr-services/enhanced_chunking.py @@ -12,6 +12,7 @@ from nemo.collections.asr.parts.utils.streaming_utils import FrameBatchChunkedRNNT from nemo.collections.asr.parts.utils import rnnt_utils from nemo.collections.asr.parts.utils.rnnt_utils import Hypothesis +from nemo.collections.asr.parts.utils.timestamp_utils import process_timestamp_outputs logger = logging.getLogger(__name__) @@ -30,17 +31,25 @@ def __init__(self, asr_model, frame_len=4, total_buffer=4, batch_size=4): self.chunk_offsets = [0] # Track chunk offsets like NeMo's FrameBatchMultiTaskAED self.merged_hypothesis = None - # Get subsampling factor for timestamp calculations (following FrameBatchMultiTaskAED) + # Get model parameters for timestamp calculations (following FrameBatchMultiTaskAED) self.subsampling_factor = getattr(asr_model._cfg.encoder, 'subsampling_factor', 4) + self.window_stride = getattr(asr_model._cfg.preprocessor, 'window_stride', 0.01) # Ensure model is in eval mode and timestamps enabled self.asr_model.eval() - if hasattr(self.asr_model, 'decoding') and hasattr(self.asr_model.decoding, 'compute_timestamps'): - original_value = self.asr_model.decoding.compute_timestamps - self.asr_model.decoding.compute_timestamps = True - logger.info(f"🔧 TIMESTAMP CONFIG: Set compute_timestamps=True (was: {original_value})") + if hasattr(self.asr_model, 'decoding'): + # Enable word timestamps but not char timestamps to avoid issues + if hasattr(self.asr_model.decoding, 'compute_timestamps'): + original_value = self.asr_model.decoding.compute_timestamps + self.asr_model.decoding.compute_timestamps = True + logger.debug(f"Set compute_timestamps=True (was: {original_value})") + + # Set timestamp type to word only to avoid char_offsets issues + if hasattr(self.asr_model.decoding, 'rnnt_timestamp_type'): + self.asr_model.decoding.rnnt_timestamp_type = 'word' + logger.debug("Set rnnt_timestamp_type='word' to avoid char offset issues") else: - logger.warning("🚨 TIMESTAMP CONFIG: Model does not have compute_timestamps attribute!") + logger.warning("Model does not have decoding attribute!") def reset(self): """Reset the chunked inference state and clear accumulated hypotheses.""" @@ -71,25 +80,33 @@ def _get_batch_preds(self, keep_logits=False): ) # KEY CHANGE: Get full Hypothesis objects instead of just text + # Temporarily disable timestamps to avoid char_offsets error + old_compute_timestamps = getattr(self.asr_model.decoding, 'compute_timestamps', False) + self.asr_model.decoding.compute_timestamps = False + hypotheses = self.asr_model.decoding.rnnt_decoder_predictions_tensor( encoder_output=encoded, encoded_lengths=encoded_len, - return_hypotheses=True # CRITICAL: Get timestamps and confidence + return_hypotheses=True # Get hypothesis objects even without timestamps ) + # Restore original setting + self.asr_model.decoding.compute_timestamps = old_compute_timestamps + # Store hypotheses with chunk offset tracking self.all_hypotheses.extend(hypotheses) + logger.debug(f"Got {len(hypotheses)} hypotheses from chunk {len(self.chunk_offsets)}") - # Update chunk offsets for timestamp joining (following FrameBatchMultiTaskAED pattern) - if hypotheses: - # Calculate frame-based offset for proper timestamp alignment - # Each chunk processes audio frames, track cumulative frame offset - if len(self.chunk_offsets) > 1: - # Add frame offset based on processed frames - frame_offset = feat_signal.shape[1] # Number of frames in current chunk - self.chunk_offsets.append(self.chunk_offsets[-1] + frame_offset) + # Update chunk offsets for ALL chunks (following FrameBatchMultiTaskAED pattern) + for length in feat_signal_len: + current_length = length.item() + if len(self.chunk_offsets) == 1: + self.chunk_offsets.append(current_length) else: - # First chunk beyond initialization - self.chunk_offsets.append(feat_signal.shape[1]) + old_offset = self.chunk_offsets[-1] + new_offset = old_offset + current_length + self.chunk_offsets.append(new_offset) + + logger.debug(f"Chunk offsets updated: {self.chunk_offsets}") # Extract text for parent class compatibility best_hyp_text = [hyp.text if hyp.text else "" for hyp in hypotheses] @@ -103,13 +120,32 @@ def get_timestamped_results(self): if not self.all_hypotheses: return [] + logger.info(f"Joining {len(self.all_hypotheses)} hypotheses") + # Join hypotheses using NeMo's FrameBatchMultiTaskAED pattern try: self.merged_hypothesis = self._join_hypotheses(self.all_hypotheses) + + # Check merged hypothesis results + if self.merged_hypothesis and hasattr(self.merged_hypothesis, 'timestamp'): + words = self.merged_hypothesis.timestamp.get('word', []) + if words and len(words) > 0: + first_word = words[0] + last_word = words[-1] + first_start = first_word.get('start', 0) + last_end = last_word.get('end', 0) + + logger.info(f"Merged {len(words)} words: {first_start:.2f}s to {last_end:.2f}s") + + if first_start > 1.0: + logger.warning(f"First word starts at {first_start:.2f}s (may be chunk-relative)") + else: + logger.warning("No word timestamps found in merged hypothesis") + return [self.merged_hypothesis] if self.merged_hypothesis else self.all_hypotheses except Exception as e: - logger.warning(f"Hypothesis joining failed: {e}, returning raw hypotheses") - return self.all_hypotheses + logger.error(f"Hypothesis joining FAILED: {e}") + raise e # Don't silently fall back def _join_hypotheses(self, hypotheses): """Join multiple hypotheses with proper timestamp alignment following NeMo's FrameBatchMultiTaskAED.""" @@ -149,48 +185,177 @@ def _join_y_sequence(self, merged_hypothesis, hypotheses): def _join_timestamp(self, merged_hypothesis, hypotheses): """Join timestamps from multiple hypotheses with proper offset handling.""" cumulative_offset = 0 + logger.debug(f"Processing {len(hypotheses)} hypotheses with chunk_offsets: {self.chunk_offsets}") for i, h in enumerate(hypotheses): + # Calculate cumulative offset for this hypothesis if i < len(self.chunk_offsets): cumulative_offset = self.chunk_offsets[i] + else: + logger.warning(f"Hypothesis {i}: No chunk offset available, using previous offset {cumulative_offset}") + + logger.debug(f"Hypothesis {i}: using cumulative_offset {cumulative_offset}") + + # Process word-level timestamps using h.words and h.timestamp tensor + if hasattr(h, 'words') and h.words: + word_list = h.words + timestamp_tensor = getattr(h, 'timestamp', None) + word_confidence = getattr(h, 'word_confidence', []) or [] + + logger.debug(f"Hypothesis {i}: Processing {len(word_list)} words") - # Process word-level timestamps if available - if hasattr(h, 'timestamp') and h.timestamp and 'word' in h.timestamp: - word_timestamps = h.timestamp['word'] updated_timestamps = [] - for word in word_timestamps: - if isinstance(word, dict): - updated_word = word.copy() - # Apply frame offset with subsampling factor - if 'start_offset' in word: - updated_word['start_offset'] = word['start_offset'] + cumulative_offset // self.subsampling_factor - if 'end_offset' in word: - updated_word['end_offset'] = word['end_offset'] + cumulative_offset // self.subsampling_factor + for j, word_text in enumerate(word_list): + if word_text and word_text.strip(): # Skip empty words + # Calculate frame indices for this word from the tensor + if timestamp_tensor is not None and j < len(timestamp_tensor): + frame_start = timestamp_tensor[j].item() + frame_end = timestamp_tensor[j + 1].item() if j + 1 < len(timestamp_tensor) else frame_start + 1 + else: + # Fallback: estimate frames + frame_start = j + frame_end = j + 1 + + # Apply cumulative offset for absolute timestamps + absolute_frame_start = frame_start + cumulative_offset // self.subsampling_factor + absolute_frame_end = frame_end + cumulative_offset // self.subsampling_factor + + # Convert frames to time using model parameters + start_time = absolute_frame_start * self.window_stride * self.subsampling_factor + end_time = absolute_frame_end * self.window_stride * self.subsampling_factor + + # Get confidence if available + confidence = word_confidence[j] if j < len(word_confidence) else 1.0 + + # Create word timestamp entry in NeMo format + updated_word = { + 'word': word_text, + 'start_offset': absolute_frame_start, + 'end_offset': absolute_frame_end, + 'start': start_time, + 'end': end_time, + 'confidence': confidence + } + updated_timestamps.append(updated_word) - merged_hypothesis.timestamp['word'].extend(updated_timestamps) + if updated_timestamps: + logger.debug(f"Hypothesis {i}: processed {len(updated_timestamps)} words") + merged_hypothesis.timestamp['word'].extend(updated_timestamps) # Process segment-level timestamps if available - if hasattr(h, 'timestamp') and h.timestamp and 'segment' in h.timestamp: - segment_timestamps = h.timestamp['segment'] - updated_timestamps = [] + if hasattr(h, 'timestamp') and hasattr(h.timestamp, 'get'): + segment_timestamps = h.timestamp.get('segment', None) + if segment_timestamps: + updated_timestamps = [] + + for segment in segment_timestamps: + if isinstance(segment, dict): + updated_segment = segment.copy() + # Apply frame offset with subsampling factor + if 'start_offset' in segment: + updated_segment['start_offset'] = segment['start_offset'] + cumulative_offset // self.subsampling_factor + if 'end_offset' in segment: + updated_segment['end_offset'] = segment['end_offset'] + cumulative_offset // self.subsampling_factor + + # Convert to absolute time using model parameters + if 'start_offset' in updated_segment: + updated_segment['start'] = updated_segment['start_offset'] * self.window_stride * self.subsampling_factor + if 'end_offset' in updated_segment: + updated_segment['end'] = updated_segment['end_offset'] * self.window_stride * self.subsampling_factor + + updated_timestamps.append(updated_segment) + + merged_hypothesis.timestamp['segment'].extend(updated_timestamps) - for segment in segment_timestamps: - if isinstance(segment, dict): - updated_segment = segment.copy() - # Apply frame offset with subsampling factor - if 'start_offset' in segment: - updated_segment['start_offset'] = segment['start_offset'] + cumulative_offset // self.subsampling_factor - if 'end_offset' in segment: - updated_segment['end_offset'] = segment['end_offset'] + cumulative_offset // self.subsampling_factor - updated_timestamps.append(updated_segment) + return merged_hypothesis - merged_hypothesis.timestamp['segment'].extend(updated_timestamps) - return merged_hypothesis +def extract_timestamps_from_hypotheses_native(hypotheses: List[Hypothesis], chunk_start_time: float = 0.0, model=None) -> List[Dict[str, Any]]: + """ + Extract word-level timestamps using NeMo's native process_timestamp_outputs. + + This is the recommended approach using NeMo's official utilities. + """ + try: + if not hypotheses: + return [] + + logger.debug(f"Processing {len(hypotheses)} hypotheses with chunk_start_time={chunk_start_time}") + + # Get model parameters + window_stride = getattr(model._cfg.preprocessor, 'window_stride', 0.01) if model else 0.01 + subsampling_factor = getattr(model._cfg.encoder, 'subsampling_factor', 4) if model else 4 + + logger.debug(f"Model params: window_stride={window_stride}, subsampling_factor={subsampling_factor}") + + words = [] + for i, hyp in enumerate(hypotheses): + # Check if hypothesis already has processed timestamp dict (from joining) + if hasattr(hyp, 'timestamp') and isinstance(hyp.timestamp, dict) and 'word' in hyp.timestamp: + word_timestamps = hyp.timestamp['word'] + for word_data in word_timestamps: + if isinstance(word_data, dict) and word_data.get('word'): + # Already processed by joining - just add chunk offset if needed + final_start = float(word_data.get('start', 0)) + chunk_start_time + final_end = float(word_data.get('end', 0)) + chunk_start_time + + word_dict = { + 'word': word_data['word'], + 'start': final_start, + 'end': final_end, + 'confidence': float(word_data.get('confidence', 1.0)) + } + words.append(word_dict) + + elif hasattr(hyp, 'words') and hyp.words: + # Original tensor processing for raw hypotheses + word_list = hyp.words + timestamp_tensor = getattr(hyp, 'timestamp', None) if hasattr(hyp, 'timestamp') and hasattr(hyp.timestamp, 'shape') else None + word_confidence = getattr(hyp, 'word_confidence', []) or [] + + for j, word_text in enumerate(word_list): + if word_text and word_text.strip(): # Skip empty words + # Calculate frame indices for this word + if timestamp_tensor is not None and j < len(timestamp_tensor): + frame_start = timestamp_tensor[j].item() + frame_end = timestamp_tensor[j + 1].item() if j + 1 < len(timestamp_tensor) else frame_start + 1 + else: + # Fallback: estimate frames + frame_start = j + frame_end = j + 1 + + # Convert frames to time using model parameters + start_time = frame_start * window_stride * subsampling_factor + end_time = frame_end * window_stride * subsampling_factor + + # Get confidence + confidence = word_confidence[j] if j < len(word_confidence) else 1.0 + + # Add chunk start time for absolute positioning + final_start = start_time + chunk_start_time + final_end = end_time + chunk_start_time + + word_dict = { + 'word': word_text, + 'start': final_start, + 'end': final_end, + 'confidence': float(confidence) + } + words.append(word_dict) + + logger.info(f"Extracted {len(words)} words") + if words: + logger.info(f"Time range: {words[0]['start']:.2f}s to {words[-1]['end']:.2f}s") + + return words + + except Exception as e: + logger.error(f"Native timestamp extraction FAILED: {e}") + raise e # Don't silently fall back def extract_timestamps_from_hypotheses(hypotheses: List[Hypothesis], chunk_start_time: float = 0.0, model=None) -> List[Dict[str, Any]]: @@ -207,107 +372,65 @@ def extract_timestamps_from_hypotheses(hypotheses: List[Hypothesis], chunk_start """ try: words = [] - - print(f"🔍 LEN CHECK 1: hypotheses type: {type(hypotheses)}") if hypotheses is None: - print("🔍 LEN CHECK 1: hypotheses is None - returning empty list") return [] - print(f"🔍 LEN CHECK 1: About to call len(hypotheses)") - logger.info(f"Processing {len(hypotheses)} hypotheses for timestamp extraction") - print(f"🔍 LEN CHECK 1: Successfully called len(hypotheses) = {len(hypotheses)}") + logger.debug(f"Processing {len(hypotheses)} hypotheses for timestamp extraction") for i, hyp in enumerate(hypotheses): - print(f"🔍 LEN CHECK 2: Processing hypothesis {i}, hyp type: {type(hyp)}") + logger.debug(f"Processing hypothesis {i}: {type(hyp)}") + try: - # Extract timestamps from NeMo Hypothesis structure: - # timestamp={'timestep': [], 'char': [], 'word': [], 'segment': []} - if hasattr(hyp, 'timestamp') and isinstance(hyp.timestamp, dict): - timestamp_dict = hyp.timestamp - print(f"🔍 LEN CHECK 3: timestamp_dict type: {type(timestamp_dict)}") - - # Get word-level timestamps - word_timestamps = timestamp_dict.get('word', []) - print(f"🔍 DEBUG: word_timestamps type: {type(word_timestamps)}") - if word_timestamps is None: - logger.warning(f"Hypothesis {i}: word_timestamps is None, using empty list") - word_timestamps = [] - - word_confidence = getattr(hyp, 'word_confidence', []) - print(f"🔍 DEBUG: word_confidence type: {type(word_confidence)}") - if word_confidence is None: - logger.warning(f"Hypothesis {i}: word_confidence is None, using empty list") - word_confidence = [] - - print(f"🔍 DEBUG: word_timestamps length: {len(word_timestamps)}") - logger.info(f"Hypothesis {i}: Found {len(word_timestamps)} word timestamps") - - # 🔍 CRITICAL DEBUG: Log structure of first word timestamp - if word_timestamps: - print(f"🔍 TIMESTAMP STRUCTURE: First word_timestamp: {word_timestamps[0]}") - print(f"🔍 TIMESTAMP STRUCTURE: Type: {type(word_timestamps[0])}") - if isinstance(word_timestamps[0], dict): - print(f"🔍 TIMESTAMP STRUCTURE: Keys: {list(word_timestamps[0].keys())}") - logger.info(f"🔍 TIMESTAMP STRUCTURE: {word_timestamps[0]}") - - # Process word-level timing data - for j, word_timing in enumerate(word_timestamps): - print(f"🔍 DEBUG: Processing word {j}, word_timing: {word_timing}") + # Use h.words instead of h.timestamp['word'] + if hasattr(hyp, 'words') and hyp.words: + word_list = hyp.words + timestamp_tensor = getattr(hyp, 'timestamp', None) + word_confidence = getattr(hyp, 'word_confidence', []) or [] + + # Convert frame indices to word timestamps using model parameters + window_stride = getattr(model._cfg.preprocessor, 'window_stride', 0.01) if model else 0.01 + subsampling_factor = getattr(model._cfg.encoder, 'subsampling_factor', 4) if model else 4 + + # Process each word with its corresponding frame indices + for j, word_text in enumerate(word_list): try: - logger.info(f"🔍 WORD PROCESSING: word {j}: {word_timing}") - if isinstance(word_timing, dict): - # Word timing is a dictionary with timing info - word_text = word_timing.get('word', '') - - # Extract timestamps using NeMo's offset fields - start_offset = word_timing.get('start_offset', 0) - end_offset = word_timing.get('end_offset', 0) - - # Convert frame offsets to time using NeMo's formula - window_stride = 0.01 # NeMo default - subsampling_factor = 4 # NeMo default - start_time = start_offset * window_stride * subsampling_factor - end_time = end_offset * window_stride * subsampling_factor - - print(f"🔧 TIMESTAMP: {word_text} [{start_offset},{end_offset}] -> [{start_time:.3f}s,{end_time:.3f}s]") - - print(f"🔍 LEN CHECK 9: word_confidence before len check: {type(word_confidence)}") - if word_confidence is not None: - print(f"🔍 LEN CHECK 10: About to call len(word_confidence)") - confidence_len = len(word_confidence) - print(f"🔍 LEN CHECK 10: len(word_confidence) = {confidence_len}") - logger.info(f"🔍 ISOLATE: word_confidence type: {type(word_confidence)}, len: {confidence_len}") - confidence = word_confidence[j] if j < confidence_len else 1.0 + if word_text and word_text.strip(): # Skip empty words + # Calculate frame indices for this word + # For word j, we typically use frames [j, j+1] or similar pattern + if timestamp_tensor is not None and j < len(timestamp_tensor): + # Use frame index from tensor + frame_start = timestamp_tensor[j].item() if j < len(timestamp_tensor) else j + frame_end = timestamp_tensor[j + 1].item() if j + 1 < len(timestamp_tensor) else frame_start + 1 else: - print(f"🔍 LEN CHECK 10: word_confidence is None, using default confidence") - confidence = 1.0 - - if word_text: # Only add non-empty words - word_dict = { - 'word': word_text, - 'start': float(start_time) + chunk_start_time, - 'end': float(end_time) + chunk_start_time, - 'confidence': float(confidence) - } - words.append(word_dict) - print(f"🔍 LEN CHECK 11: Added word {j}: {word_text}") + # Fallback: estimate frame indices + frame_start = j + frame_end = j + 1 + + # Convert frame indices to time using model parameters + start_time = frame_start * window_stride * subsampling_factor + end_time = frame_end * window_stride * subsampling_factor + + # Get confidence for this word + confidence = word_confidence[j] if j < len(word_confidence) else 1.0 + + word_dict = { + 'word': word_text, + 'start': float(start_time) + chunk_start_time, + 'end': float(end_time) + chunk_start_time, + 'confidence': float(confidence) + } + words.append(word_dict) + except Exception as word_error: - print(f"🔍 LEN CHECK ERROR: Error processing word {j}: {word_error}") - logger.error(f"🔍 ISOLATE: Error processing word {j}: {word_error}", exc_info=True) + logger.error(f"Error processing word {j} '{word_text}': {word_error}") raise # Fallback: if no word timestamps but we have text, create words from text elif hasattr(hyp, 'text') and hyp.text: - print(f"🔍 LEN CHECK 12: Using text fallback for hyp {i}") try: - # Split text into words and estimate timing text_words = hyp.text.split() - print(f"🔍 LEN CHECK 13: text_words type: {type(text_words)}") if text_words: - print(f"🔍 LEN CHECK 14: About to call len(text_words)") - # Estimate timing: spread words evenly across a reasonable duration estimated_duration = max(len(text_words) * 0.5, 1.0) # 0.5s per word minimum - print(f"🔍 LEN CHECK 14: len(text_words) = {len(text_words)}") word_duration = estimated_duration / len(text_words) for j, word in enumerate(text_words): @@ -322,14 +445,12 @@ def extract_timestamps_from_hypotheses(hypotheses: List[Hypothesis], chunk_start } words.append(word_dict) - logger.info(f"Hypothesis {i}: Created {len(text_words)} estimated word timings from text") + logger.debug(f"Hypothesis {i}: Created {len(text_words)} estimated word timings from text") except Exception as text_error: - print(f"🔍 LEN CHECK ERROR: Text fallback error for hyp {i}: {text_error}") - logger.error(f"Error processing text fallback for hypothesis {i}: {text_error}", exc_info=True) + logger.error(f"Error processing text fallback for hypothesis {i}: {text_error}") # Create empty word entries for silence/non-speech if hypothesis is empty else: - print(f"🔍 LEN CHECK 15: Creating empty word entry for hyp {i}") # This represents silence or non-speech audio words.append({ 'word': '', @@ -339,18 +460,13 @@ def extract_timestamps_from_hypotheses(hypotheses: List[Hypothesis], chunk_start }) except Exception as hyp_error: - print(f"🔍 LEN CHECK ERROR: Error processing hypothesis {i}: {hyp_error}") - logger.error(f"Error processing hypothesis {i}: {hyp_error}", exc_info=True) + logger.error(f"Error processing hypothesis {i}: {hyp_error}") - print(f"🔍 LEN CHECK 16: About to call len(words) at end") - logger.info(f"Extracted {len(words)} total words with timestamps") - print(f"🔍 LEN CHECK 16: Successfully called len(words) = {len(words)}") - print(f"🔍 LEN CHECK 17: About to return words") + logger.debug(f"Extracted {len(words)} total words with timestamps") return words except Exception as e: - print(f"🔍 LEN CHECK ERROR: Critical error in extract_timestamps_from_hypotheses: {e}") - logger.error(f"Critical error in extract_timestamps_from_hypotheses: {e}", exc_info=True) + logger.error(f"Critical error in extract_timestamps_from_hypotheses: {e}") return [] @@ -369,20 +485,18 @@ async def transcribe_with_enhanced_chunking(model, audio_file_path: str, Returns: Dictionary with transcription results including word-level timestamps """ - # 📊 TIMING: Start overall timing + # Start timing overall_start = time.time() - logger.info(f"📊 TIMING: Starting enhanced chunking transcription for {audio_file_path}") - print(f"📊 TIMING: 🎯 PHASE START: Audio Upload/Processing at {time.strftime('%H:%M:%S')}") + logger.info(f"Starting enhanced chunking transcription for {audio_file_path}") # Ensure model is in eval mode for inference model.eval() try: with torch.no_grad(): - # 📊 TIMING: Initialization phase + # Initialization phase init_start = time.time() - logger.info(f"📊 TIMING: Initializing NeMo chunked processor...") - print(f"📊 TIMING: 🔧 PHASE START: Initialization at {time.strftime('%H:%M:%S')}") + logger.debug(f"Initializing NeMo chunked processor...") # Initialize NeMo's chunked processor with timestamp preservation chunker = TimestampedFrameBatchChunkedRNNT( @@ -394,13 +508,11 @@ async def transcribe_with_enhanced_chunking(model, audio_file_path: str, init_end = time.time() init_duration = init_end - init_start - logger.info(f"📊 TIMING: ✅ Initialization completed in {init_duration:.3f}s") - print(f"📊 TIMING: ✅ PHASE END: Initialization completed in {init_duration:.3f}s") + logger.debug(f"Initialization completed in {init_duration:.3f}s") - # 📊 TIMING: Audio loading phase + # Audio loading phase loading_start = time.time() - logger.info(f"📊 TIMING: Loading audio file into chunker...") - print(f"📊 TIMING: 📁 PHASE START: Audio Loading at {time.strftime('%H:%M:%S')}") + logger.debug(f"Loading audio file into chunker...") # Process the audio file using NeMo's built-in chunking chunker.read_audio_file( @@ -411,50 +523,43 @@ async def transcribe_with_enhanced_chunking(model, audio_file_path: str, loading_end = time.time() loading_duration = loading_end - loading_start - logger.info(f"📊 TIMING: ✅ Audio loading completed in {loading_duration:.3f}s") - print(f"📊 TIMING: ✅ PHASE END: Audio Loading completed in {loading_duration:.3f}s") + logger.debug(f"Audio loading completed in {loading_duration:.3f}s") - # 📊 TIMING: Processing phase + # Processing phase processing_start = time.time() - logger.info(f"📊 TIMING: Running enhanced chunking inference...") - print(f"📊 TIMING: 🚀 PHASE START: Processing at {time.strftime('%H:%M:%S')}") + logger.info(f"Running enhanced chunking inference...") # Run inference with NeMo's chunked processing result_text = chunker.transcribe() processing_end = time.time() processing_duration = processing_end - processing_start - logger.info(f"📊 TIMING: ✅ Processing completed in {processing_duration:.3f}s") - print(f"📊 TIMING: ✅ PHASE END: Processing completed in {processing_duration:.3f}s") + logger.info(f"Processing completed in {processing_duration:.3f}s") - # 📊 TIMING: Reconcile phase (hypothesis extraction and merging) + # Reconcile phase (hypothesis extraction and merging) reconcile_start = time.time() - logger.info(f"📊 TIMING: Extracting and reconciling hypotheses...") - print(f"📊 TIMING: 🔀 PHASE START: Reconcile at {time.strftime('%H:%M:%S')}") + logger.debug(f"Extracting and reconciling hypotheses...") hypotheses = chunker.get_timestamped_results() reconcile_end = time.time() reconcile_duration = reconcile_end - reconcile_start - logger.info(f"📊 TIMING: ✅ Reconcile completed in {reconcile_duration:.3f}s") - print(f"📊 TIMING: ✅ PHASE END: Reconcile completed in {reconcile_duration:.3f}s") + logger.debug(f"Reconcile completed in {reconcile_duration:.3f}s") - # 📊 TIMING: Timestamp extraction phase + # Timestamp extraction phase timestamp_start = time.time() - logger.info(f"📊 TIMING: Extracting word-level timestamps...") - print(f"📊 TIMING: 📝 PHASE START: Timestamp Extraction at {time.strftime('%H:%M:%S')}") + logger.debug(f"Extracting word-level timestamps...") - words = extract_timestamps_from_hypotheses(hypotheses, chunk_start_time=0.0, model=model) + # Try using native NeMo processing first, fall back to manual if needed + words = extract_timestamps_from_hypotheses_native(hypotheses, chunk_start_time=0.0, model=model) timestamp_end = time.time() timestamp_duration = timestamp_end - timestamp_start - logger.info(f"📊 TIMING: ✅ Timestamp extraction completed in {timestamp_duration:.3f}s") - print(f"📊 TIMING: ✅ PHASE END: Timestamp Extraction completed in {timestamp_duration:.3f}s") + logger.debug(f"Timestamp extraction completed in {timestamp_duration:.3f}s") - # 📊 TIMING: Final formatting phase + # Final formatting phase format_start = time.time() - logger.info(f"📊 TIMING: Formatting final response...") - print(f"📊 TIMING: 📄 PHASE START: Final Formatting at {time.strftime('%H:%M:%S')}") + logger.debug(f"Formatting final response...") if words is None: logger.warning("Words extraction returned None, using empty list") @@ -478,34 +583,14 @@ async def transcribe_with_enhanced_chunking(model, audio_file_path: str, format_end = time.time() format_duration = format_end - format_start - logger.info(f"📊 TIMING: ✅ Final formatting completed in {format_duration:.3f}s") - print(f"📊 TIMING: ✅ PHASE END: Final Formatting completed in {format_duration:.3f}s") + logger.debug(f"Final formatting completed in {format_duration:.3f}s") - # 📊 TIMING: Overall completion summary + # Overall completion summary overall_end = time.time() overall_duration = overall_end - overall_start - logger.info(f"📊 TIMING: =================== COMPLETE TIMING SUMMARY ===================") - logger.info(f"📊 TIMING: 🔧 Initialization: {init_duration:.3f}s") - logger.info(f"📊 TIMING: 📁 Audio Loading: {loading_duration:.3f}s") - logger.info(f"📊 TIMING: 🚀 Processing: {processing_duration:.3f}s") - logger.info(f"📊 TIMING: 🔀 Reconcile: {reconcile_duration:.3f}s") - logger.info(f"📊 TIMING: 📝 Timestamp Extraction: {timestamp_duration:.3f}s") - logger.info(f"📊 TIMING: 📄 Final Formatting: {format_duration:.3f}s") - logger.info(f"📊 TIMING: 🎯 TOTAL END-TO-END: {overall_duration:.3f}s") - logger.info(f"📊 TIMING: Enhanced chunking completed. Transcribed {words_count} words") - logger.info(f"📊 TIMING: ================================================================") - - print(f"📊 TIMING: =================== COMPLETE TIMING SUMMARY ===================") - print(f"📊 TIMING: 🔧 Initialization: {init_duration:.3f}s") - print(f"📊 TIMING: 📁 Audio Loading: {loading_duration:.3f}s") - print(f"📊 TIMING: 🚀 Processing: {processing_duration:.3f}s") - print(f"📊 TIMING: 🔀 Reconcile: {reconcile_duration:.3f}s") - print(f"📊 TIMING: 📝 Timestamp Extraction: {timestamp_duration:.3f}s") - print(f"📊 TIMING: 📄 Final Formatting: {format_duration:.3f}s") - print(f"📊 TIMING: 🎯 TOTAL END-TO-END: {overall_duration:.3f}s") - print(f"📊 TIMING: Enhanced chunking completed. Transcribed {words_count} words") - print(f"📊 TIMING: ================================================================") + logger.info(f"Enhanced chunking completed in {overall_duration:.3f}s - {words_count} words") + logger.debug(f"Timing breakdown: init={init_duration:.3f}s, load={loading_duration:.3f}s, process={processing_duration:.3f}s, reconcile={reconcile_duration:.3f}s, extract={timestamp_duration:.3f}s, format={format_duration:.3f}s") return response