SimpleOpenSoftware · thestumonkey · Jul 7, 2025 · Jul 15, 2025
diff --git a/backends/advanced-backend/.env.template b/backends/advanced-backend/.env.template
@@ -1,6 +1,33 @@
+
+# Transcription
 OFFLINE_ASR_TCP_URI=
-OLLAMA_BASE_URL=
+DEEPGRAM_API_KEY=
+
+# LLM config
+LLM_PROVIDER=openai
+LLM_API_KEY=sk-proj-
+LLM_CHOICE=gpt-4o-mini
+
+OLLAMA_BASE_URL=http://ollama:11434
+
+
+
+NGROK_URL=
 NGROK_AUTHTOKEN=
 HF_TOKEN=
+
+METRICS_COLLECTION_ENABLE=false
+
+
+MEM0_TELEMETRY=
+NEXT_PUBLIC_API_URL=http://127.0.0.0:8050
+NEXT_PUBLIC_USER_ID=NEXT_PUBLIC_USER_ID
+
+
 SPEAKER_SERVICE_URL=
-MONGODB_URI=
+
+MONGODB_URI=mongo
+QDRANT_BASE_URL=qdrant
+NEO4J_HOST=neo4j-mem0
+NEO4J_USER=neo4j
+NEO4J_PASSWORD=
diff --git a/backends/advanced-backend/docker-compose.yml b/backends/advanced-backend/docker-compose.yml
@@ -8,6 +8,8 @@ services:
     volumes:
       - ./audio_chunks:/app/audio_chunks
       - ./debug_dir:/app/debug_dir
+    env_file:
+      - .env
     environment:
       - DEEPGRAM_API_KEY=${DEEPGRAM_API_KEY}
       - OFFLINE_ASR_TCP_URI=${OFFLINE_ASR_TCP_URI}
@@ -108,7 +110,7 @@ services:
       - "4040:4040" # Ngrok web interface
     environment:
       - NGROK_AUTHTOKEN=${NGROK_AUTHTOKEN}
-    command: "http friend-backend:8000 --url=intelligent-hypervisor.ngrok.app"
+    command: "http friend-backend:8000 --url=${NGROK_URL}"
     depends_on:
       - friend-backend
 volumes:

diff --git a/backends/advanced-backend/src/action_items_service.py b/backends/advanced-backend/src/action_items_service.py
@@ -5,6 +5,8 @@
 from motor.motor_asyncio import AsyncIOMotorCollection
 import logging
 import ollama
+import openai
+import os
 
 # Set up logging
 action_items_logger = logging.getLogger("action_items")
@@ -15,9 +17,9 @@ class ActionItemsService:
     Replaces the Mem0-based implementation for better update capabilities.
     """
 
-    def __init__(self, collection: AsyncIOMotorCollection, ollama_client: ollama.Client):
+    def __init__(self, collection: AsyncIOMotorCollection, llm_client: Any):
         self.collection = collection
-        self.ollama_client = ollama_client
+        self.llm_client = llm_client
         self._initialized = False
 
     async def initialize(self):
@@ -64,40 +66,34 @@ async def extract_and_store_action_items(self, transcript: str, client_id: str,
         except Exception as e:
             action_items_logger.error(f"Error extracting action items for {audio_uuid}: {e}")
             return 0
-
+        
     async def _extract_action_items_from_transcript(self, transcript: str, client_id: str, audio_uuid: str) -> List[Dict[str, Any]]:
-        """Extract action items from transcript using Ollama."""
+        """Extract action items from transcript using llm."""
         try:
             extraction_prompt = f"""
-<|begin_of_text|><|start_header_id|>system<|end_header_id|>
-You are an intelligent assistant that reads transcripts and extracts all potential action items, even informal or implied ones.
-
-Your output must be a **JSON array**, where action item includes:
-- description: A short summary of the task
-- assignee: Who should do it ("unassigned" if unclear)
-- due_date: When it should be done ("not_specified" if not mentioned)
-- priority: high / medium / low / not_specified
-- context: Why or how the task came up
-- tool: The name of the tool required, if any ("check_email", "check_calendar", "set_alarm"), or "none" if no tool is needed
-
-Rules:
-- Identify both explicit tasks and implied ones.
-- Suggest a tool only when the task obviously requires it or could be automated.
-- If it's a general human task with no clear automation, use `"none"` for tool.
-
-Return **only** a JSON array. No explanation or extra text.
-
-<|eot_id|>
-<|start_header_id|>user<|end_header_id|>
-Transcript:
-<start_transcript>
-{transcript}
-<end_transcript>
-<|eot_id|>
-<|start_header_id|>assistant<|end_header_id|>
-"""
-            response = self.ollama_client.generate(
-                model="llama3.1:latest",
+            Analyze the following conversation transcript and extract action items.
+
+            Look for:
+            - Tasks that someone commits to do ("I'll send the report", "I will call them")
+            - Requests made to others ("Can you review this", "Please schedule a meeting")
+            - Things that need to be done ("We need to fix the bug", "The document needs updating")
+            - Follow-up actions mentioned ("Let's schedule a follow-up", "We should contact them")
+
+            For each action item found, provide:
+            - description: Clear description of what needs to be done
+            - assignee: Who should do it (use names from transcript, or "unassigned" if not clear)
+            - due_date: When it should be done (extract from transcript, or "not_specified")
+            - priority: Assess urgency from context (high/medium/low/not_specified)
+            - context: Brief context about when/why it was mentioned
+
+            Return ONLY a JSON array of action items. If no action items found, return an empty array [].
+
+            Transcript:
+            {transcript}
+            """
+
+            response = self.llm_client.generate(
+                model=os.getenv('LLM_CHOICE', 'llama-3.1:latest'),
                 prompt=extraction_prompt,
                 options={"temperature": 0.1}
             )
@@ -125,14 +121,13 @@ async def _extract_action_items_from_transcript(self, transcript: str, client_id
                     "updated_at": int(time.time()),
                     "source": "transcript_extraction"
                 })
-
                 # TODO: Handle all tools here, these can be imported from other files
                 # Handle set_alarm tool, this can be another llm call to mcp with description as input 
                 # Also handle sending notification via app or TTS
                 if item.get("tool") == "set_alarm":
                     description = item.get("description", "")
                     action_items_logger.info(f"Calling set alarm service with description: {description}")
-            
+
             action_items_logger.info(f"Extracted {len(action_items)} action items from {audio_uuid}")
             return action_items
 
@@ -456,4 +451,4 @@ async def get_action_item_stats(self, user_id: str) -> Dict[str, Any]:
 # <|eot_id|>
 # <|start_header_id|>assistant<|end_header_id|>
 # """
-# pyperclip.copy(extraction_prompt)
+# pyperclip.copy(extraction_prompt)