SimpleOpenSoftware · AnkushMalaker · Sep 19, 2025 · Sep 19, 2025 · Sep 19, 2025 · Sep 19, 2025
diff --git a/CLAUDE.md b/CLAUDE.md
diff --git a/backends/advanced/src/advanced_omi_backend/audio_utils.py b/backends/advanced/src/advanced_omi_backend/audio_utils.py
@@ -6,6 +6,10 @@
 import logging
 import os
 import time
+import wave
+import io
+import numpy as np
+from pathlib import Path
 
 # Type import to avoid circular imports
 from typing import TYPE_CHECKING, Optional
@@ -83,9 +87,68 @@ async def process_audio_chunk(
 
     await processor_manager.queue_audio(processing_item)
 
-    # Update client state if provided
-    if client_state is not None:
-        client_state.update_audio_received(chunk)
+
+async def load_audio_file_as_chunk(audio_path: Path) -> AudioChunk:
+    """Load existing audio file into Wyoming AudioChunk format for reprocessing.
+
+    Args:
+        audio_path: Path to the audio file on disk
+
+    Returns:
+        AudioChunk object ready for processing
+
+    Raises:
+        FileNotFoundError: If audio file doesn't exist
+        ValueError: If audio file format is invalid
+    """
+    try:
+        # Read the audio file
+        with open(audio_path, 'rb') as f:
+            file_content = f.read()
+
+        # Process WAV file using existing pattern from system_controller.py
+        with wave.open(io.BytesIO(file_content), "rb") as wav_file:
+            sample_rate = wav_file.getframerate()
+            sample_width = wav_file.getsampwidth()
+            channels = wav_file.getnchannels()
+            audio_data = wav_file.readframes(wav_file.getnframes())
+
+            # Convert to mono if stereo (same logic as system_controller.py)
+            if channels == 2:
+                if sample_width == 2:
+                    audio_array = np.frombuffer(audio_data, dtype=np.int16)
+                    audio_array = audio_array.reshape(-1, 2)
+                    audio_data = np.mean(audio_array, axis=1, dtype=np.int16).tobytes()
+                    channels = 1
+                else:
+                    raise ValueError(f"Unsupported sample width for stereo: {sample_width}")
+
-            # Convert to mono if stereo (same logic as system_controller.py)
-            if channels == 2:
-                if sample_width == 2:
-                    audio_array = np.frombuffer(audio_data, dtype=np.int16)
-                    audio_array = audio_array.reshape(-1, 2)
-                    audio_data = np.mean(audio_array, axis=1, dtype=np.int16).tobytes()
-                    channels = 1
-                else:
-                    raise ValueError(f"Unsupported sample width for stereo: {sample_width}")
+            # Convert to mono if stereo (same logic as system_controller.py)
+            if channels == 2:
+                if sample_width == 2:
+                    audio_array = np.frombuffer(audio_data, dtype=np.int16).reshape(-1, 2)
+                    # Average L/R safely in wider int, then clip back to int16
+                    mono = (audio_array.astype(np.int32).sum(axis=1) // 2)
+                    mono = np.clip(mono, -32768, 32767).astype(np.int16)
+                    audio_data = mono.tobytes()
+                    channels = 1
+                else:
+                    raise ValueError(f"Unsupported sample width for stereo: {sample_width}")
-            # Convert to mono if stereo (same logic as system_controller.py)
-            if channels == 2:
-                if sample_width == 2:
-                    audio_array = np.frombuffer(audio_data, dtype=np.int16)
-                    audio_array = audio_array.reshape(-1, 2)
-                    audio_data = np.mean(audio_array, axis=1, dtype=np.int16).tobytes()
-                    channels = 1
-                else:
-                    raise ValueError(f"Unsupported sample width for stereo: {sample_width}")
+            # Convert to mono if stereo (same logic as system_controller.py)
+            if channels == 2:
+                if sample_width == 2:
+                    audio_array = np.frombuffer(audio_data, dtype=np.int16).reshape(-1, 2)
+                    # Average L/R safely in wider int, then clip back to int16
+                    mono = (audio_array.astype(np.int32).sum(axis=1) // 2)
+                    mono = np.clip(mono, -32768, 32767).astype(np.int16)
+                    audio_data = mono.tobytes()
+                    channels = 1
+                else:
+                    raise ValueError(f"Unsupported sample width for stereo: {sample_width}")
+            # Validate format matches expected (16kHz, mono, 16-bit)
+            if sample_rate != 16000:
+                raise ValueError(f"Audio file has sample rate {sample_rate}Hz, expected 16kHz")
+            if channels != 1:
+                raise ValueError(f"Audio file has {channels} channels, expected mono")
+            if sample_width != 2:
+                raise ValueError(f"Audio file has {sample_width}-byte samples, expected 2 bytes")
+
+            # Create AudioChunk with current timestamp
+            chunk = AudioChunk(
+                audio=audio_data,
+                rate=sample_rate,
+                width=sample_width,
+                channels=channels,
+                timestamp=int(time.time() * 1000)
+            )
+
+            logger.info(f"Loaded audio file {audio_path} as AudioChunk ({len(audio_data)} bytes)")
+            return chunk
+
+    except FileNotFoundError:
+        logger.error(f"Audio file not found: {audio_path}")
+        raise
+    except Exception as e:
+        logger.error(f"Error loading audio file {audio_path}: {e}")
+        raise ValueError(f"Invalid audio file format: {e}")
 
 
 async def _process_audio_cropping_with_relative_timestamps(

diff --git a/backends/advanced/src/advanced_omi_backend/client.py b/backends/advanced/src/advanced_omi_backend/client.py
@@ -5,24 +5,18 @@
 application level by the ProcessorManager.
 """
 
-import asyncio
 import logging
-import os
 import time
 from pathlib import Path
 from typing import Dict, List, Optional, Tuple
 
 from advanced_omi_backend.conversation_manager import get_conversation_manager
 from advanced_omi_backend.database import AudioChunksRepository
 from advanced_omi_backend.task_manager import get_task_manager
-from wyoming.audio import AudioChunk
 
 # Get loggers
 audio_logger = logging.getLogger("audio_processing")
 
-# Configuration constants
-NEW_CONVERSATION_TIMEOUT_MINUTES = float(os.getenv("NEW_CONVERSATION_TIMEOUT_MINUTES", "1.5"))
-
 
 class ClientState:
     """Manages conversation state for a single client connection."""
@@ -67,11 +61,6 @@ def __init__(
 
         audio_logger.info(f"Created client state for {client_id}")
 
-    def update_audio_received(self, chunk: AudioChunk):
-        """Update state when audio is received."""
-        # Check if we should start a new conversation
-        if self.should_start_new_conversation():
-            asyncio.create_task(self.start_new_conversation())
 
     def set_current_audio_uuid(self, audio_uuid: str):
         """Set the current audio UUID when processor creates a new file."""
@@ -104,20 +93,9 @@ def record_speech_end(self, audio_uuid: str, timestamp: float):
             audio_logger.warning(f"Speech end recorded for {audio_uuid} but no start time found")
 
     def update_transcript_received(self):
-        """Update timestamp when transcript is received (for timeout detection)."""
+        """Update timestamp when transcript is received."""
         self.last_transcript_time = time.time()
 
-    def should_start_new_conversation(self) -> bool:
-        """Check if we should start a new conversation based on timeout."""
-        if self.last_transcript_time is None:
-            return False
-
-        current_time = time.time()
-        time_since_last_transcript = current_time - self.last_transcript_time
-        timeout_seconds = NEW_CONVERSATION_TIMEOUT_MINUTES * 60
-
-        return time_since_last_transcript > timeout_seconds
-
     async def close_current_conversation(self):
         """Close the current conversation and queue necessary processing."""
         # Prevent double closure
@@ -161,20 +139,6 @@ async def close_current_conversation(self):
         else:
             audio_logger.warning(f"⚠️ Conversation closure had issues for {self.current_audio_uuid}")
 
-    async def start_new_conversation(self):
-        """Start a new conversation by closing current and resetting state."""
-        await self.close_current_conversation()
-
-        # Reset conversation state
-        self.current_audio_uuid = None
-        self.conversation_start_time = time.time()
-        self.last_transcript_time = None
-        self.conversation_closed = False
-
-        audio_logger.info(
-            f"Client {self.client_id}: Started new conversation due to "
-            f"{NEW_CONVERSATION_TIMEOUT_MINUTES}min timeout"
-        )
 
     async def disconnect(self):
         """Clean disconnect of client state."""

diff --git a/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py
@@ -5,20 +5,23 @@
 import asyncio
 import hashlib
 import logging
+import os
 import time
 from pathlib import Path
 from typing import Optional
 
 from advanced_omi_backend.audio_utils import (
     _process_audio_cropping_with_relative_timestamps,
+    load_audio_file_as_chunk,
 )
 from advanced_omi_backend.client_manager import (
     ClientManager,
     client_belongs_to_user,
     get_user_clients_all,
 )
 from advanced_omi_backend.database import AudioChunksRepository, ProcessingRunsRepository, chunks_col, processing_runs_col, conversations_col, ConversationsRepository
-from advanced_omi_backend.users import User
+from advanced_omi_backend.processors import get_processor_manager, TranscriptionItem, MemoryProcessingItem
+from advanced_omi_backend.users import User, get_user_by_id
 from fastapi.responses import JSONResponse
 
 logger = logging.getLogger(__name__)
@@ -585,9 +588,10 @@ async def reprocess_transcript(conversation_id: str, user: User):
             )
 
         # Generate configuration hash for duplicate detection
+        transcription_provider = os.getenv("TRANSCRIPTION_PROVIDER", "deepgram")
         config_data = {
             "audio_path": str(full_audio_path),
-            "transcription_provider": "deepgram",  # This would come from settings
+            "transcription_provider": transcription_provider,
             "trigger": "manual_reprocess"
         }
         config_hash = hashlib.sha256(str(config_data).encode()).hexdigest()[:16]
@@ -613,18 +617,37 @@ async def reprocess_transcript(conversation_id: str, user: User):
                 status_code=500, content={"error": "Failed to create transcript version"}
             )
 
-        # TODO: Queue audio for reprocessing with ProcessorManager
-        # This is where we would integrate with the existing processor
-        # For now, we'll return the version ID for the caller to handle
+        # NEW: Load audio file and queue for transcription processing
+        try:
+            # Load audio file as AudioChunk
+            audio_chunk = await load_audio_file_as_chunk(full_audio_path)
+
+            # Create TranscriptionItem for reprocessing
+            transcription_item = TranscriptionItem(
+                client_id=f"reprocess-{conversation_id}",
+                user_id=str(user.user_id),
+                audio_uuid=audio_uuid,
+                audio_chunk=audio_chunk
+            )
+
+            # Queue for transcription processing
+            processor_manager = get_processor_manager()
+            await processor_manager.queue_transcription(transcription_item)
+
+            logger.info(f"Queued transcript reprocessing job {run_id} (version {version_id}) for conversation {conversation_id}")
 
-        logger.info(f"Created transcript reprocessing job {run_id} (version {version_id}) for conversation {conversation_id}")
+        except Exception as e:
+            logger.error(f"Error queuing transcript reprocessing: {e}")
+            return JSONResponse(
+                status_code=500, content={"error": f"Failed to queue reprocessing: {str(e)}"}
+            )
 
         return JSONResponse(content={
             "message": f"Transcript reprocessing started for conversation {conversation_id}",
             "run_id": run_id,
             "version_id": version_id,
             "config_hash": config_hash,
-            "status": "PENDING"
+            "status": "QUEUED"
         })
 
     except Exception as e:
@@ -673,9 +696,10 @@ async def reprocess_memory(conversation_id: str, transcript_version_id: str, use
             )
 
         # Generate configuration hash for duplicate detection
+        memory_provider = os.getenv("MEMORY_PROVIDER", "friend_lite")
         config_data = {
             "transcript_version_id": transcript_version_id,
-            "memory_provider": "friend_lite",  # This would come from settings
+            "memory_provider": memory_provider,
             "trigger": "manual_reprocess"
         }
         config_hash = hashlib.sha256(str(config_data).encode()).hexdigest()[:16]
@@ -702,18 +726,42 @@ async def reprocess_memory(conversation_id: str, transcript_version_id: str, use
                 status_code=500, content={"error": "Failed to create memory version"}
             )
 
-        # TODO: Queue memory extraction for processing
-        # This is where we would integrate with the existing memory processor
+        # NEW: Queue memory processing
+        try:
+            # Get user email for memory processing
+            user_obj = await get_user_by_id(str(user.user_id))
+            if not user_obj:
+                return JSONResponse(
+                    status_code=500, content={"error": "User not found for memory processing"}
+                )
+
+            # Create MemoryProcessingItem for reprocessing
+            memory_item = MemoryProcessingItem(
+                client_id=f"reprocess-{conversation_id}",
+                user_id=str(user.user_id),
+                user_email=user_obj.email,
+                conversation_id=conversation_id
+            )
+
+            # Queue for memory processing
+            processor_manager = get_processor_manager()
+            await processor_manager.queue_memory(memory_item)
 
-        logger.info(f"Created memory reprocessing job {run_id} (version {version_id}) for conversation {conversation_id}")
+            logger.info(f"Queued memory reprocessing job {run_id} (version {version_id}) for conversation {conversation_id}")
+
+        except Exception as e:
+            logger.error(f"Error queuing memory reprocessing: {e}")
+            return JSONResponse(
+                status_code=500, content={"error": f"Failed to queue memory reprocessing: {str(e)}"}
+            )
 
         return JSONResponse(content={
             "message": f"Memory reprocessing started for conversation {conversation_id}",
             "run_id": run_id,
             "version_id": version_id,
             "transcript_version_id": transcript_version_id,
             "config_hash": config_hash,
-            "status": "PENDING"
+            "status": "QUEUED"
         })
 
     except Exception as e: