Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 29 additions & 2 deletions backends/advanced-backend/.env.template
Original file line number Diff line number Diff line change
@@ -1,6 +1,33 @@

# Transcription
OFFLINE_ASR_TCP_URI=
OLLAMA_BASE_URL=
DEEPGRAM_API_KEY=

# LLM config
LLM_PROVIDER=openai
LLM_API_KEY=sk-proj-
LLM_CHOICE=gpt-4o-mini

OLLAMA_BASE_URL=http://ollama:11434



NGROK_URL=
NGROK_AUTHTOKEN=
HF_TOKEN=

METRICS_COLLECTION_ENABLE=false


MEM0_TELEMETRY=
NEXT_PUBLIC_API_URL=http://127.0.0.0:8050
NEXT_PUBLIC_USER_ID=NEXT_PUBLIC_USER_ID


SPEAKER_SERVICE_URL=
MONGODB_URI=

MONGODB_URI=mongo
QDRANT_BASE_URL=qdrant
NEO4J_HOST=neo4j-mem0
NEO4J_USER=neo4j
NEO4J_PASSWORD=
4 changes: 3 additions & 1 deletion backends/advanced-backend/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ services:
volumes:
- ./audio_chunks:/app/audio_chunks
- ./debug_dir:/app/debug_dir
env_file:
- .env
environment:
- DEEPGRAM_API_KEY=${DEEPGRAM_API_KEY}
- OFFLINE_ASR_TCP_URI=${OFFLINE_ASR_TCP_URI}
Expand Down Expand Up @@ -108,7 +110,7 @@ services:
- "4040:4040" # Ngrok web interface
environment:
- NGROK_AUTHTOKEN=${NGROK_AUTHTOKEN}
command: "http friend-backend:8000 --url=intelligent-hypervisor.ngrok.app"
command: "http friend-backend:8000 --url=${NGROK_URL}"
depends_on:
- friend-backend
volumes:
Expand Down
67 changes: 31 additions & 36 deletions backends/advanced-backend/src/action_items_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from motor.motor_asyncio import AsyncIOMotorCollection
import logging
import ollama
import openai
import os

# Set up logging
action_items_logger = logging.getLogger("action_items")
Expand All @@ -15,9 +17,9 @@ class ActionItemsService:
Replaces the Mem0-based implementation for better update capabilities.
"""

def __init__(self, collection: AsyncIOMotorCollection, ollama_client: ollama.Client):
def __init__(self, collection: AsyncIOMotorCollection, llm_client: Any):
self.collection = collection
self.ollama_client = ollama_client
self.llm_client = llm_client
self._initialized = False

async def initialize(self):
Expand Down Expand Up @@ -64,40 +66,34 @@ async def extract_and_store_action_items(self, transcript: str, client_id: str,
except Exception as e:
action_items_logger.error(f"Error extracting action items for {audio_uuid}: {e}")
return 0

async def _extract_action_items_from_transcript(self, transcript: str, client_id: str, audio_uuid: str) -> List[Dict[str, Any]]:
"""Extract action items from transcript using Ollama."""
"""Extract action items from transcript using llm."""
try:
extraction_prompt = f"""
<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are an intelligent assistant that reads transcripts and extracts all potential action items, even informal or implied ones.

Your output must be a **JSON array**, where action item includes:
- description: A short summary of the task
- assignee: Who should do it ("unassigned" if unclear)
- due_date: When it should be done ("not_specified" if not mentioned)
- priority: high / medium / low / not_specified
- context: Why or how the task came up
- tool: The name of the tool required, if any ("check_email", "check_calendar", "set_alarm"), or "none" if no tool is needed

Rules:
- Identify both explicit tasks and implied ones.
- Suggest a tool only when the task obviously requires it or could be automated.
- If it's a general human task with no clear automation, use `"none"` for tool.

Return **only** a JSON array. No explanation or extra text.

<|eot_id|>
<|start_header_id|>user<|end_header_id|>
Transcript:
<start_transcript>
{transcript}
<end_transcript>
<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>
"""
response = self.ollama_client.generate(
model="llama3.1:latest",
Analyze the following conversation transcript and extract action items.

Look for:
- Tasks that someone commits to do ("I'll send the report", "I will call them")
- Requests made to others ("Can you review this", "Please schedule a meeting")
- Things that need to be done ("We need to fix the bug", "The document needs updating")
- Follow-up actions mentioned ("Let's schedule a follow-up", "We should contact them")

For each action item found, provide:
- description: Clear description of what needs to be done
- assignee: Who should do it (use names from transcript, or "unassigned" if not clear)
- due_date: When it should be done (extract from transcript, or "not_specified")
- priority: Assess urgency from context (high/medium/low/not_specified)
- context: Brief context about when/why it was mentioned

Return ONLY a JSON array of action items. If no action items found, return an empty array [].

Transcript:
{transcript}
"""

response = self.llm_client.generate(
model=os.getenv('LLM_CHOICE', 'llama-3.1:latest'),
prompt=extraction_prompt,
options={"temperature": 0.1}
)
Expand Down Expand Up @@ -125,14 +121,13 @@ async def _extract_action_items_from_transcript(self, transcript: str, client_id
"updated_at": int(time.time()),
"source": "transcript_extraction"
})

# TODO: Handle all tools here, these can be imported from other files
# Handle set_alarm tool, this can be another llm call to mcp with description as input
# Also handle sending notification via app or TTS
if item.get("tool") == "set_alarm":
description = item.get("description", "")
action_items_logger.info(f"Calling set alarm service with description: {description}")

action_items_logger.info(f"Extracted {len(action_items)} action items from {audio_uuid}")
return action_items

Expand Down Expand Up @@ -456,4 +451,4 @@ async def get_action_item_stats(self, user_id: str) -> Dict[str, Any]:
# <|eot_id|>
# <|start_header_id|>assistant<|end_header_id|>
# """
# pyperclip.copy(extraction_prompt)
# pyperclip.copy(extraction_prompt)
Loading