Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 111 additions & 0 deletions backends/advanced/src/advanced_omi_backend/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
"""
Configuration management for Friend-Lite backend.

Currently contains diarization settings because they were used in multiple places
causing circular imports. Other configurations can be moved here as needed.
"""

import json
import logging
import shutil
from pathlib import Path

logger = logging.getLogger(__name__)

# Default diarization settings
DEFAULT_DIARIZATION_SETTINGS = {
"diarization_source": "pyannote",
"similarity_threshold": 0.15,
"min_duration": 0.5,
"collar": 2.0,
"min_duration_off": 1.5,
"min_speakers": 2,
"max_speakers": 6
}

# Global cache for diarization settings
_diarization_settings = None


def get_diarization_config_path():
"""Get the path to the diarization config file."""
# Try different locations in order of preference
# 1. Data directory (for persistence across container restarts)
data_path = Path("/app/data/diarization_config.json")
if data_path.parent.exists():
return data_path

# 2. App root directory
app_path = Path("/app/diarization_config.json")
if app_path.parent.exists():
return app_path

# 3. Local development path
local_path = Path("diarization_config.json")
return local_path


def load_diarization_settings_from_file():
"""Load diarization settings from file or create from template."""
global _diarization_settings

config_path = get_diarization_config_path()
template_path = Path("/app/diarization_config.json.template")

# If no template, try local development path
if not template_path.exists():
template_path = Path("diarization_config.json.template")

# If config doesn't exist, try to copy from template
if not config_path.exists():
if template_path.exists():
try:
# Ensure parent directory exists
config_path.parent.mkdir(parents=True, exist_ok=True)
shutil.copy(template_path, config_path)
logger.info(f"Created diarization config from template at {config_path}")
except Exception as e:
logger.warning(f"Could not copy template to {config_path}: {e}")

# Load from file if it exists
if config_path.exists():
try:
with open(config_path, 'r') as f:
_diarization_settings = json.load(f)
logger.info(f"Loaded diarization settings from {config_path}")
return _diarization_settings
except Exception as e:
logger.error(f"Error loading diarization settings from {config_path}: {e}")

# Fall back to defaults
_diarization_settings = DEFAULT_DIARIZATION_SETTINGS.copy()
logger.info("Using default diarization settings")
return _diarization_settings


def save_diarization_settings_to_file(settings):
"""Save diarization settings to file."""
global _diarization_settings

config_path = get_diarization_config_path()

try:
# Ensure parent directory exists
config_path.parent.mkdir(parents=True, exist_ok=True)

# Write settings to file
with open(config_path, 'w') as f:
json.dump(settings, f, indent=2)

# Update cache
_diarization_settings = settings

logger.info(f"Saved diarization settings to {config_path}")
return True
except Exception as e:
logger.error(f"Error saving diarization settings to {config_path}: {e}")
return False


# Initialize settings on module load
_diarization_settings = load_diarization_settings_from_file()
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from wyoming.audio import AudioChunk

from advanced_omi_backend.client_manager import generate_client_id
from advanced_omi_backend.config import load_diarization_settings_from_file, save_diarization_settings_to_file
from advanced_omi_backend.database import chunks_col
from advanced_omi_backend.job_tracker import FileStatus, JobStatus, get_job_tracker
from advanced_omi_backend.processors import AudioProcessingItem, get_processor_manager
Expand Down Expand Up @@ -748,103 +749,7 @@ async def process_files_with_content(
await job_tracker.update_job_status(job_id, JobStatus.FAILED, error_msg)


# Default diarization settings
DEFAULT_DIARIZATION_SETTINGS = {
"diarization_source": "pyannote",
"similarity_threshold": 0.15,
"min_duration": 0.5,
"collar": 2.0,
"min_duration_off": 1.5,
"min_speakers": 2,
"max_speakers": 6
}

# Global cache for diarization settings
_diarization_settings = None


def get_diarization_config_path():
"""Get the path to the diarization config file."""
# Try different locations in order of preference
# 1. Data directory (for persistence across container restarts)
data_path = Path("/app/data/diarization_config.json")
if data_path.parent.exists():
return data_path

# 2. App root directory
app_path = Path("/app/diarization_config.json")
if app_path.parent.exists():
return app_path

# 3. Local development path
local_path = Path("diarization_config.json")
return local_path


def load_diarization_settings_from_file():
"""Load diarization settings from file or create from template."""
global _diarization_settings

config_path = get_diarization_config_path()
template_path = Path("/app/diarization_config.json.template")

# If no template, try local development path
if not template_path.exists():
template_path = Path("diarization_config.json.template")

# If config doesn't exist, try to copy from template
if not config_path.exists():
if template_path.exists():
try:
# Ensure parent directory exists
config_path.parent.mkdir(parents=True, exist_ok=True)
shutil.copy(template_path, config_path)
logger.info(f"Created diarization config from template at {config_path}")
except Exception as e:
logger.warning(f"Could not copy template to {config_path}: {e}")

# Load from file if it exists
if config_path.exists():
try:
with open(config_path, 'r') as f:
_diarization_settings = json.load(f)
logger.info(f"Loaded diarization settings from {config_path}")
return _diarization_settings
except Exception as e:
logger.error(f"Error loading diarization settings from {config_path}: {e}")

# Fall back to defaults
_diarization_settings = DEFAULT_DIARIZATION_SETTINGS.copy()
logger.info("Using default diarization settings")
return _diarization_settings


def save_diarization_settings_to_file(settings):
"""Save diarization settings to file."""
global _diarization_settings

config_path = get_diarization_config_path()

try:
# Ensure parent directory exists
config_path.parent.mkdir(parents=True, exist_ok=True)

# Write settings to file
with open(config_path, 'w') as f:
json.dump(settings, f, indent=2)

# Update cache
_diarization_settings = settings

logger.info(f"Saved diarization settings to {config_path}")
return True
except Exception as e:
logger.error(f"Error saving diarization settings to {config_path}: {e}")
return False


# Initialize settings on module load
_diarization_settings = load_diarization_settings_from_file()
# Configuration functions moved to config.py to avoid circular imports


async def get_diarization_settings():
Expand Down Expand Up @@ -884,34 +789,36 @@ async def save_diarization_settings(settings: dict):
return JSONResponse(
status_code=400, content={"error": f"Invalid value for {key}: must be integer 1-20"}
)
elif key == "diarization_source":
if not isinstance(value, str) or value not in ["pyannote", "deepgram"]:
return JSONResponse(
status_code=400, content={"error": f"Invalid value for {key}: must be 'pyannote' or 'deepgram'"}
)
else:
if not isinstance(value, (int, float)) or value < 0:
return JSONResponse(
status_code=400, content={"error": f"Invalid value for {key}: must be positive number"}
)

# Update global settings with new values
global _diarization_settings
if _diarization_settings is None:
_diarization_settings = DEFAULT_DIARIZATION_SETTINGS.copy()

_diarization_settings.update(settings)
# Get current settings and merge with new values
current_settings = load_diarization_settings_from_file()
current_settings.update(settings)

Comment on lines +803 to 806
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Validate min_speakers ≤ max_speakers after merge.

Prevents impossible configs reaching runtime.

         current_settings = load_diarization_settings_from_file()
         current_settings.update(settings)
+        # Cross-field validation
+        min_s = current_settings.get("min_speakers")
+        max_s = current_settings.get("max_speakers")
+        if isinstance(min_s, int) and isinstance(max_s, int) and min_s > max_s:
+            return JSONResponse(
+                status_code=400,
+                content={"error": "min_speakers cannot exceed max_speakers"}
+            )
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
# Get current settings and merge with new values
current_settings = load_diarization_settings_from_file()
current_settings.update(settings)
# Get current settings and merge with new values
current_settings = load_diarization_settings_from_file()
current_settings.update(settings)
# Cross-field validation
min_s = current_settings.get("min_speakers")
max_s = current_settings.get("max_speakers")
if isinstance(min_s, int) and isinstance(max_s, int) and min_s > max_s:
return JSONResponse(
status_code=400,
content={"error": "min_speakers cannot exceed max_speakers"}
)
🤖 Prompt for AI Agents
In backends/advanced/src/advanced_omi_backend/controllers/system_controller.py
around lines 803 to 806, after merging current_settings.update(settings)
validate that the resulting min_speakers and max_speakers are consistent: coerce
or parse them to integers if necessary, check that both keys exist (or use
defaults) and that min_speakers <= max_speakers; if the check fails,
raise/return a clear client error (e.g., HTTP 400 / HTTPException) with a
message indicating the constraint violation and do not proceed to save or apply
the settings. Ensure validation runs immediately after the merge and before any
persistence or runtime usage.

# Save to file
if save_diarization_settings_to_file(_diarization_settings):
if save_diarization_settings_to_file(current_settings):
logger.info(f"Updated and saved diarization settings: {settings}")

return {
"message": "Diarization settings saved successfully",
"settings": _diarization_settings,
"settings": current_settings,
"status": "success"
}
else:
# Even if file save fails, we've updated the in-memory settings
logger.warning("Settings updated in memory but file save failed")
return {
"message": "Settings updated (file save failed)",
"settings": _diarization_settings,
"settings": current_settings,
"status": "partial"
}
Comment on lines 817 to 823
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

“Partial” branch message is misleading; no in-memory state exists anymore.

Since settings are file-backed (no process-global cache), claiming “updated in memory” is inaccurate. Return an error status.

-            return {
-                "message": "Settings updated (file save failed)",
-                "settings": current_settings,
-                "status": "partial"
-            }
+            return {
+                "message": "Failed to persist diarization settings to file",
+                "settings": current_settings,  # Echo attempted values for UX
+                "status": "error"
+            }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
# Even if file save fails, we've updated the in-memory settings
logger.warning("Settings updated in memory but file save failed")
return {
"message": "Settings updated (file save failed)",
"settings": _diarization_settings,
"settings": current_settings,
"status": "partial"
}
# Even if file save fails, we've updated the in-memory settings
logger.warning("Settings updated in memory but file save failed")
return {
"message": "Failed to persist diarization settings to file",
"settings": current_settings, # Echo attempted values for UX
"status": "error"
}
🤖 Prompt for AI Agents
In backends/advanced/src/advanced_omi_backend/controllers/system_controller.py
around lines 817 to 823, the response incorrectly claims settings were "updated
in memory" despite there being no process-global cache; change the returned
message and status to indicate a failure instead of a partial success, e.g. log
the file save error, return an error HTTP status payload (message like "Settings
update failed (file save failed)" and status "error"), and avoid claiming or
returning an in-memory settings object — instead return the previous persisted
settings or omit "settings" from the error response as appropriate.


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ async def diarize_identify_match(
logger.info(f"Diarizing, identifying, and matching words for {audio_path}")

# Read diarization source from existing config system
from advanced_omi_backend.controllers.system_controller import load_diarization_settings_from_file
from advanced_omi_backend.config import load_diarization_settings_from_file
config = load_diarization_settings_from_file()
diarization_source = config.get("diarization_source", "pyannote")

Expand All @@ -78,54 +78,42 @@ async def diarize_identify_match(
)

if diarization_source == "deepgram":
# DEEPGRAM PATH: Use /v1/listen with structured config
logger.info("Using Deepgram diarization path")
# DEEPGRAM DIARIZATION PATH: We EXPECT transcript has speaker info from Deepgram
# Only need speaker identification of existing segments
logger.info("Using Deepgram diarization path - transcript should have speaker segments, identifying speakers")

# Structure config for Deepgram diarization
diarization_config = {"diarization": {"provider": "deepgram"}}
# TODO: Implement proper speaker identification for Deepgram segments
# For now, use diarize-identify-match as fallback until we implement segment identification
logger.warning("Deepgram segment identification not yet implemented, using diarize-identify-match as fallback")

# Log warning if pyannote params provided
pyannote_params = ["min_speakers", "max_speakers", "collar", "min_duration_off"]
provided_params = [p for p in pyannote_params if config.get(p) is not None]
if provided_params:
logger.warning("Ignoring pyannote parameters for Deepgram diarization: %s", provided_params)

# Add structured diarization config
form_data.add_field("diarization_config", json.dumps(diarization_config))

# Add speaker identification params
form_data.add_field("enhance_speakers", "true")
form_data.add_field("transcript_data", json.dumps(transcript_data))
form_data.add_field("user_id", "1") # TODO: Implement proper user mapping
form_data.add_field("speaker_confidence_threshold", str(config.get("similarity_threshold", 0.15)))
form_data.add_field("similarity_threshold", str(config.get("similarity_threshold", 0.15)))
form_data.add_field("min_duration", str(config.get("min_duration", 0.5)))

# Use /v1/listen endpoint
endpoint = "/v1/listen"
# Use /v1/diarize-identify-match endpoint as fallback
endpoint = "/v1/diarize-identify-match"

Comment on lines +89 to 96
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Honor the user_id parameter; avoid hard-coding "1".

Currently ignores the method argument.

-                        form_data.add_field("transcript_data", json.dumps(transcript_data))
-                        form_data.add_field("user_id", "1")  # TODO: Implement proper user mapping
+                        form_data.add_field("transcript_data", json.dumps(transcript_data))
+                        form_data.add_field("user_id", str(user_id) if user_id is not None else "1")  # TODO: map real users
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
form_data.add_field("transcript_data", json.dumps(transcript_data))
form_data.add_field("user_id", "1") # TODO: Implement proper user mapping
form_data.add_field("speaker_confidence_threshold", str(config.get("similarity_threshold", 0.15)))
form_data.add_field("similarity_threshold", str(config.get("similarity_threshold", 0.15)))
form_data.add_field("min_duration", str(config.get("min_duration", 0.5)))
# Use /v1/listen endpoint
endpoint = "/v1/listen"
# Use /v1/diarize-identify-match endpoint as fallback
endpoint = "/v1/diarize-identify-match"
form_data.add_field("transcript_data", json.dumps(transcript_data))
form_data.add_field("user_id", str(user_id) if user_id is not None else "1") # TODO: map real users
form_data.add_field("similarity_threshold", str(config.get("similarity_threshold", 0.15)))
form_data.add_field("min_duration", str(config.get("min_duration", 0.5)))
# Use /v1/diarize-identify-match endpoint as fallback
endpoint = "/v1/diarize-identify-match"
🤖 Prompt for AI Agents
In backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py
around lines 89 to 96, the code hard-codes form_data.add_field("user_id", "1")
and ignores the function/method user_id argument; replace the hard-coded value
with the passed-in user_id (coerce to string), fallback to a sensible default
only if the argument is None/empty, and remove the TODO comment; ensure any
necessary validation (int or non-empty) is performed before adding the field so
the endpoint receives the correct user identity.

else: # pyannote (default)
# PYANNOTE PATH: Use /v1/listen with structured config including pyannote parameters
logger.info("Using Pyannote diarization path")

# Structure config for Pyannote diarization
diarization_config = {
"diarization": {
"provider": "pyannote",
"min_speakers": config.get("min_speakers"),
"max_speakers": config.get("max_speakers"),
"collar": config.get("collar", 2.0),
"min_duration_off": config.get("min_duration_off", 1.5)
}
}
# PYANNOTE PATH: Backend has transcript, need diarization + speaker identification
logger.info("Using Pyannote path - diarizing backend transcript and identifying speakers")

# Add structured diarization config
form_data.add_field("diarization_config", json.dumps(diarization_config))

# Add speaker identification params
form_data.add_field("enhance_speakers", "true")
# Send existing transcript for diarization and speaker matching
form_data.add_field("transcript_data", json.dumps(transcript_data))
form_data.add_field("user_id", "1") # TODO: Implement proper user mapping
form_data.add_field("speaker_confidence_threshold", str(config.get("similarity_threshold", 0.15)))
form_data.add_field("similarity_threshold", str(config.get("similarity_threshold", 0.15)))

# Add pyannote diarization parameters
form_data.add_field("min_duration", str(config.get("min_duration", 0.5)))
form_data.add_field("collar", str(config.get("collar", 2.0)))
form_data.add_field("min_duration_off", str(config.get("min_duration_off", 1.5)))
if config.get("min_speakers"):
form_data.add_field("min_speakers", str(config.get("min_speakers")))
if config.get("max_speakers"):
form_data.add_field("max_speakers", str(config.get("max_speakers")))

# Use /v1/listen endpoint (now supports both providers)
endpoint = "/v1/listen"
# Use /v1/diarize-identify-match endpoint for backend integration
endpoint = "/v1/diarize-identify-match"

Comment on lines +101 to 117
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Same bug in pyannote branch; also good param pass-through.

-                        form_data.add_field("transcript_data", json.dumps(transcript_data))
-                        form_data.add_field("user_id", "1")  # TODO: Implement proper user mapping
+                        form_data.add_field("transcript_data", json.dumps(transcript_data))
+                        form_data.add_field("user_id", str(user_id) if user_id is not None else "1")  # TODO: map real users
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
# Send existing transcript for diarization and speaker matching
form_data.add_field("transcript_data", json.dumps(transcript_data))
form_data.add_field("user_id", "1") # TODO: Implement proper user mapping
form_data.add_field("speaker_confidence_threshold", str(config.get("similarity_threshold", 0.15)))
form_data.add_field("similarity_threshold", str(config.get("similarity_threshold", 0.15)))
# Add pyannote diarization parameters
form_data.add_field("min_duration", str(config.get("min_duration", 0.5)))
form_data.add_field("collar", str(config.get("collar", 2.0)))
form_data.add_field("min_duration_off", str(config.get("min_duration_off", 1.5)))
if config.get("min_speakers"):
form_data.add_field("min_speakers", str(config.get("min_speakers")))
if config.get("max_speakers"):
form_data.add_field("max_speakers", str(config.get("max_speakers")))
# Use /v1/listen endpoint (now supports both providers)
endpoint = "/v1/listen"
# Use /v1/diarize-identify-match endpoint for backend integration
endpoint = "/v1/diarize-identify-match"
# Send existing transcript for diarization and speaker matching
form_data.add_field("transcript_data", json.dumps(transcript_data))
form_data.add_field("user_id", str(user_id) if user_id is not None else "1") # TODO: map real users
form_data.add_field("similarity_threshold", str(config.get("similarity_threshold", 0.15)))

# Make the request to the consolidated endpoint
async with session.post(
Expand Down
14 changes: 13 additions & 1 deletion backends/advanced/src/advanced_omi_backend/transcription.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
BaseTranscriptionProvider,
get_transcription_provider,
)
from advanced_omi_backend.config import load_diarization_settings_from_file

# ASR Configuration
TRANSCRIPTION_PROVIDER = os.getenv("TRANSCRIPTION_PROVIDER") # Optional: 'deepgram' or 'parakeet'
Expand Down Expand Up @@ -117,7 +118,18 @@ async def _get_transcript(self, audio_duration_seconds: Optional[float] = None):
logger.warning("No audio data or sample rate available for transcription")
return None

return await self.provider.transcribe(combined_audio, sample_rate)
# Check if we should request diarization based on configuration
config = load_diarization_settings_from_file()
diarization_source = config.get("diarization_source", "pyannote")

# Request diarization if using Deepgram as diarization source
should_diarize = (diarization_source == "deepgram" and
self.provider.name in ["Deepgram", "Deepgram-Streaming"])

if should_diarize:
logger.info(f"Requesting diarization from {self.provider.name} (diarization_source=deepgram)")

return await self.provider.transcribe(combined_audio, sample_rate, diarize=should_diarize)

except Exception as e:
logger.error(f"Error getting transcript from {self.provider.name}: {e}")
Expand Down
Loading