diff --git a/.storybook/mocks/orpc.ts b/.storybook/mocks/orpc.ts index 5dbe61e459..e4dae7450e 100644 --- a/.storybook/mocks/orpc.ts +++ b/.storybook/mocks/orpc.ts @@ -6,7 +6,11 @@ import type { APIClient } from "@/browser/contexts/API"; import type { FrontendWorkspaceMetadata } from "@/common/types/workspace"; import type { ProjectConfig } from "@/node/config"; -import type { WorkspaceChatMessage, ProvidersConfigMap } from "@/common/orpc/types"; +import type { + WorkspaceChatMessage, + ProvidersConfigMap, + WorkspaceStatsSnapshot, +} from "@/common/orpc/types"; import type { ChatStats } from "@/common/types/chatStats"; import { DEFAULT_RUNTIME_CONFIG } from "@/common/constants/workspace"; import { createAsyncMessageQueue } from "@/common/utils/asyncMessageQueue"; @@ -69,6 +73,8 @@ export interface MockORPCClientOptions { }> >; /** Session usage data per workspace (for Costs tab) */ + workspaceStatsSnapshots?: Map; + statsTabVariant?: "control" | "stats"; sessionUsage?: Map; /** MCP server configuration per project */ mcpServers?: Map< @@ -112,11 +118,27 @@ export function createMockORPCClient(options: MockORPCClientOptions = {}): APICl onProjectRemove, backgroundProcesses = new Map(), sessionUsage = new Map(), + workspaceStatsSnapshots = new Map(), + statsTabVariant = "control", mcpServers = new Map(), mcpOverrides = new Map(), mcpTestResults = new Map(), } = options; + // Feature flags + let statsTabOverride: "default" | "on" | "off" = "default"; + + const getStatsTabState = () => { + const enabled = + statsTabOverride === "on" + ? true + : statsTabOverride === "off" + ? false + : statsTabVariant === "stats"; + + return { enabled, variant: statsTabVariant, override: statsTabOverride } as const; + }; + const workspaceMap = new Map(workspaces.map((w) => [w.id, w])); const mockStats: ChatStats = { @@ -135,6 +157,16 @@ export function createMockORPCClient(options: MockORPCClientOptions = {}): APICl _input.texts.map(() => 0), calculateStats: async () => mockStats, }, + features: { + getStatsTabState: async () => getStatsTabState(), + setStatsTabOverride: async (input: { override: "default" | "on" | "off" }) => { + statsTabOverride = input.override; + return getStatsTabState(); + }, + }, + telemetry: { + track: async () => undefined, + }, server: { getLaunchProject: async () => null, getSshHost: async () => null, @@ -250,12 +282,12 @@ export function createMockORPCClient(options: MockORPCClientOptions = {}): APICl }, onMetadata: async function* () { // Empty generator - no metadata updates in mock - await new Promise(() => {}); // Never resolves, keeps stream open + return; }, activity: { list: async () => ({}), subscribe: async function* () { - await new Promise(() => {}); // Never resolves + return; }, }, backgroundBashes: { @@ -271,6 +303,18 @@ export function createMockORPCClient(options: MockORPCClientOptions = {}): APICl terminate: async () => ({ success: true, data: undefined }), sendToBackground: async () => ({ success: true, data: undefined }), }, + stats: { + subscribe: async function* (input: { workspaceId: string }) { + const snapshot = workspaceStatsSnapshots.get(input.workspaceId); + if (snapshot) { + yield snapshot; + } + }, + clear: async (input: { workspaceId: string }) => { + workspaceStatsSnapshots.delete(input.workspaceId); + return { success: true, data: undefined }; + }, + }, getSessionUsage: async (input: { workspaceId: string }) => sessionUsage.get(input.workspaceId), mcp: { get: async (input: { workspaceId: string }) => mcpOverrides.get(input.workspaceId) ?? {}, diff --git a/src/browser/App.tsx b/src/browser/App.tsx index 8ab4af27d9..448dd5bcc2 100644 --- a/src/browser/App.tsx +++ b/src/browser/App.tsx @@ -55,6 +55,8 @@ import { SplashScreenProvider } from "./components/splashScreens/SplashScreenPro import { TutorialProvider } from "./contexts/TutorialContext"; import { ConnectionStatusIndicator } from "./components/ConnectionStatusIndicator"; import { TooltipProvider } from "./components/ui/tooltip"; +import { useFeatureFlags } from "./contexts/FeatureFlagsContext"; +import { FeatureFlagsProvider } from "./contexts/FeatureFlagsContext"; import { ExperimentsProvider } from "./contexts/ExperimentsContext"; import { getWorkspaceSidebarKey } from "./utils/workspace"; @@ -131,6 +133,11 @@ function AppInner() { // Get workspace store for command palette const workspaceStore = useWorkspaceStoreRaw(); + const { statsTabState } = useFeatureFlags(); + useEffect(() => { + workspaceStore.setStatsEnabled(Boolean(statsTabState?.enabled)); + }, [workspaceStore, statsTabState?.enabled]); + // Track telemetry when workspace selection changes const prevWorkspaceRef = useRef(null); useEffect(() => { @@ -449,6 +456,7 @@ function AppInner() { onToggleTheme: toggleTheme, onSetTheme: setThemePreference, onOpenSettings: openSettings, + onClearTimingStats: (workspaceId: string) => workspaceStore.clearTimingStats(workspaceId), api, }; @@ -761,17 +769,19 @@ function App() { return ( - - - - - - - - - - - + + + + + + + + + + + + + ); diff --git a/src/browser/components/AIView.tsx b/src/browser/components/AIView.tsx index 0247c317a0..ad2974d7e0 100644 --- a/src/browser/components/AIView.tsx +++ b/src/browser/components/AIView.tsx @@ -41,11 +41,13 @@ import { useAutoScroll } from "@/browser/hooks/useAutoScroll"; import { useOpenTerminal } from "@/browser/hooks/useOpenTerminal"; import { useOpenInEditor } from "@/browser/hooks/useOpenInEditor"; import { usePersistedState } from "@/browser/hooks/usePersistedState"; +import { useFeatureFlags } from "@/browser/contexts/FeatureFlagsContext"; import { useThinking } from "@/browser/contexts/ThinkingContext"; import { useWorkspaceState, useWorkspaceAggregator, useWorkspaceUsage, + useWorkspaceStatsSnapshot, } from "@/browser/stores/WorkspaceStore"; import { WorkspaceHeader } from "./WorkspaceHeader"; import { getModelName } from "@/common/utils/ai/models"; @@ -106,7 +108,8 @@ const AIViewInner: React.FC = ({ // Resizable RightSidebar width - separate hooks per tab for independent persistence const costsSidebar = useResizableSidebar({ - enabled: selectedRightTab === "costs", + // Costs + Stats share the same resizable width persistence + enabled: selectedRightTab === "costs" || selectedRightTab === "stats", defaultWidth: 300, minWidth: 300, maxWidth: 1200, @@ -127,6 +130,9 @@ const AIViewInner: React.FC = ({ const startResize = selectedRightTab === "review" ? reviewSidebar.startResize : costsSidebar.startResize; + const statsSnapshot = useWorkspaceStatsSnapshot(workspaceId); + const { statsTabState } = useFeatureFlags(); + const statsEnabled = Boolean(statsTabState?.enabled); const workspaceState = useWorkspaceState(workspaceId); const aggregator = useWorkspaceAggregator(workspaceId); const workspaceUsage = useWorkspaceUsage(workspaceId); @@ -705,14 +711,18 @@ const AIViewInner: React.FC = ({ awaitingUserQuestion ? undefined : activeStreamMessageId - ? aggregator?.getStreamingTokenCount(activeStreamMessageId) + ? statsEnabled && statsSnapshot?.active?.messageId === activeStreamMessageId + ? statsSnapshot.active.liveTokenCount + : aggregator?.getStreamingTokenCount(activeStreamMessageId) : undefined } tps={ awaitingUserQuestion ? undefined : activeStreamMessageId - ? aggregator?.getStreamingTPS(activeStreamMessageId) + ? statsEnabled && statsSnapshot?.active?.messageId === activeStreamMessageId + ? statsSnapshot.active.liveTPS + : aggregator?.getStreamingTPS(activeStreamMessageId) : undefined } /> diff --git a/src/browser/components/RightSidebar.tsx b/src/browser/components/RightSidebar.tsx index fe64714566..1387d5b1bc 100644 --- a/src/browser/components/RightSidebar.tsx +++ b/src/browser/components/RightSidebar.tsx @@ -1,10 +1,17 @@ import React from "react"; import { RIGHT_SIDEBAR_TAB_KEY, RIGHT_SIDEBAR_COLLAPSED_KEY } from "@/common/constants/storage"; import { usePersistedState } from "@/browser/hooks/usePersistedState"; -import { useWorkspaceUsage } from "@/browser/stores/WorkspaceStore"; +import { useWorkspaceUsage, useWorkspaceStatsSnapshot } from "@/browser/stores/WorkspaceStore"; +import { useProviderOptions } from "@/browser/hooks/useProviderOptions"; import { useResizeObserver } from "@/browser/hooks/useResizeObserver"; +import { useFeatureFlags } from "@/browser/contexts/FeatureFlagsContext"; +import { useAutoCompactionSettings } from "@/browser/hooks/useAutoCompactionSettings"; +import { ErrorBoundary } from "./ErrorBoundary"; import { CostsTab } from "./RightSidebar/CostsTab"; +import { StatsTab } from "./RightSidebar/StatsTab"; +import { VerticalTokenMeter } from "./RightSidebar/VerticalTokenMeter"; import { ReviewPanel } from "./RightSidebar/CodeReview/ReviewPanel"; +import { calculateTokenMeterData } from "@/common/utils/tokens/tokenMeterUtils"; import { sumUsageHistory, type ChatUsageDisplay } from "@/common/utils/tokens/usageAggregator"; import { matchesKeybind, KEYBINDS, formatKeybind } from "@/browser/utils/ui/keybinds"; import { Tooltip, TooltipTrigger, TooltipContent } from "./ui/tooltip"; @@ -17,7 +24,17 @@ export interface ReviewStats { read: number; } +/** Format duration for tab display (compact format) */ +function formatTabDuration(ms: number): string { + if (ms < 1000) return `${Math.round(ms)}ms`; + if (ms < 60000) return `${Math.round(ms / 1000)}s`; + const mins = Math.floor(ms / 60000); + const secs = Math.round((ms % 60000) / 1000); + return secs > 0 ? `${mins}m${secs}s` : `${mins}m`; +} + interface SidebarContainerProps { + collapsed: boolean; wide?: boolean; /** Custom width from drag-resize (persisted per-tab by AIView) */ customWidth?: number; @@ -32,11 +49,13 @@ interface SidebarContainerProps { * SidebarContainer - Main sidebar wrapper with dynamic width * * Width priority (first match wins): - * 1. customWidth - From drag-resize (persisted per-tab) - * 2. wide - Auto-calculated max width for Review tab (when not drag-resizing) - * 3. default (300px) - Costs tab when no customWidth saved + * 1. collapsed (20px) - Shows vertical token meter only + * 2. customWidth - From drag-resize (persisted per-tab) + * 3. wide - Auto-calculated max width for Review tab (when not drag-resizing) + * 4. default (300px) - Costs tab when no customWidth saved */ const SidebarContainer: React.FC = ({ + collapsed, wide, customWidth, isResizing, @@ -44,19 +63,23 @@ const SidebarContainer: React.FC = ({ role, "aria-label": ariaLabel, }) => { - const width = customWidth - ? `${customWidth}px` - : wide - ? "min(1200px, calc(100vw - 400px))" - : "300px"; + const width = collapsed + ? "20px" + : customWidth + ? `${customWidth}px` + : wide + ? "min(1200px, calc(100vw - 400px))" + : "300px"; return (
= ({ ); }; -type TabType = "costs" | "review"; +type TabType = "costs" | "stats" | "review"; export type { TabType }; @@ -100,6 +123,15 @@ const RightSidebarComponent: React.FC = ({ // Global tab preference (not per-workspace) const [selectedTab, setSelectedTab] = usePersistedState(RIGHT_SIDEBAR_TAB_KEY, "costs"); + const { statsTabState } = useFeatureFlags(); + const statsTabEnabled = Boolean(statsTabState?.enabled); + + React.useEffect(() => { + if (!statsTabEnabled && selectedTab === "stats") { + setSelectedTab("costs"); + } + }, [statsTabEnabled, selectedTab, setSelectedTab]); + // Trigger for focusing Review panel (preserves hunk selection) const [focusTrigger, setFocusTrigger] = React.useState(0); @@ -116,22 +148,34 @@ const RightSidebarComponent: React.FC = ({ e.preventDefault(); setSelectedTab("review"); setFocusTrigger((prev) => prev + 1); + } else if (statsTabEnabled && matchesKeybind(e, KEYBINDS.STATS_TAB)) { + e.preventDefault(); + setSelectedTab("stats"); } }; window.addEventListener("keydown", handleKeyDown); return () => window.removeEventListener("keydown", handleKeyDown); - }, [setSelectedTab]); + }, [setSelectedTab, statsTabEnabled]); const usage = useWorkspaceUsage(workspaceId); + + const { options } = useProviderOptions(); + const use1M = options.anthropic?.use1MContext ?? false; const chatAreaSize = useResizeObserver(chatAreaRef); const baseId = `right-sidebar-${workspaceId}`; const costsTabId = `${baseId}-tab-costs`; + const statsTabId = `${baseId}-tab-stats`; const reviewTabId = `${baseId}-tab-review`; const costsPanelId = `${baseId}-panel-costs`; + const statsPanelId = `${baseId}-panel-stats`; const reviewPanelId = `${baseId}-panel-review`; + // Use lastContextUsage for context window display (last step = actual context size) + const lastUsage = usage?.liveUsage ?? usage?.lastContextUsage; + const model = lastUsage?.model ?? null; + // Calculate session cost for tab display const sessionCost = React.useMemo(() => { const parts: ChatUsageDisplay[] = []; @@ -152,60 +196,100 @@ const RightSidebarComponent: React.FC = ({ return total > 0 ? total : null; }, [usage.sessionTotal, usage.liveCostUsage]); - // Auto-hide sidebar on small screens using hysteresis to prevent oscillation - // - Observe ChatArea width directly (independent of sidebar width) - // - ChatArea has min-width and flex: 1 - // - Collapse when chatAreaWidth <= 800px (tight space) - // - Expand when chatAreaWidth >= 1100px (lots of space) - // - Between 800-1100: maintain current state (dead zone) - const COLLAPSE_THRESHOLD = 800; - const EXPAND_THRESHOLD = 1100; + const statsSnapshot = useWorkspaceStatsSnapshot(workspaceId); + + const sessionDuration = (() => { + if (!statsTabEnabled) return null; + const baseDuration = statsSnapshot?.session?.totalDurationMs ?? 0; + const activeDuration = statsSnapshot?.active?.elapsedMs ?? 0; + const total = baseDuration + activeDuration; + return total > 0 ? total : null; + })(); + + // Auto-compaction settings: threshold per-model + const { threshold: autoCompactThreshold, setThreshold: setAutoCompactThreshold } = + useAutoCompactionSettings(workspaceId, model); + + // Memoize vertical meter data calculation to prevent unnecessary re-renders + const verticalMeterData = React.useMemo(() => { + return lastUsage + ? calculateTokenMeterData(lastUsage, model ?? "unknown", use1M, true) + : { segments: [], totalTokens: 0, totalPercentage: 0 }; + }, [lastUsage, model, use1M]); + + // Calculate if we should show collapsed view with hysteresis + // Strategy: Observe ChatArea width directly (independent of sidebar width) + // - ChatArea has min-width: 750px and flex: 1 + // - Use hysteresis to prevent oscillation: + // * Collapse when chatAreaWidth <= 800px (tight space) + // * Expand when chatAreaWidth >= 1100px (lots of space) + // * Between 800-1100: maintain current state (dead zone) + const COLLAPSE_THRESHOLD = 800; // Collapse below this + const EXPAND_THRESHOLD = 1100; // Expand above this const chatAreaWidth = chatAreaSize?.width ?? 1000; // Default to large to avoid flash // Persist collapsed state globally (not per-workspace) since chat area width is shared - const [isHidden, setIsHidden] = usePersistedState(RIGHT_SIDEBAR_COLLAPSED_KEY, false); + // This prevents animation flash when switching workspaces - sidebar maintains its state + const [showCollapsed, setShowCollapsed] = usePersistedState( + RIGHT_SIDEBAR_COLLAPSED_KEY, + false + ); React.useEffect(() => { - // Never hide when Review tab is active - code review needs space + // Never collapse when Review tab is active - code review needs space if (selectedTab === "review") { - if (isHidden) { - setIsHidden(false); + if (showCollapsed) { + setShowCollapsed(false); } return; } - // If sidebar is custom-resized wider than default, don't auto-hide - // (would cause oscillation between hidden and wide states) + // If the sidebar is custom-resized (wider than the default Costs width), + // auto-collapse based on chatAreaWidth can oscillate between expanded and + // collapsed states (because collapsed is 20px but expanded can be much wider), + // which looks like a constant flash. In that case, keep it expanded and let + // the user resize manually. if (width !== undefined && width > 300) { - if (isHidden) { - setIsHidden(false); + if (showCollapsed) { + setShowCollapsed(false); } return; } - // Normal hysteresis for Costs tab + // Normal hysteresis for Costs/Tools tabs if (chatAreaWidth <= COLLAPSE_THRESHOLD) { - setIsHidden(true); + setShowCollapsed(true); } else if (chatAreaWidth >= EXPAND_THRESHOLD) { - setIsHidden(false); + setShowCollapsed(false); } // Between thresholds: maintain current state (no change) - }, [chatAreaWidth, selectedTab, isHidden, setIsHidden, width]); + }, [chatAreaWidth, selectedTab, showCollapsed, setShowCollapsed, width]); - // Fully hide sidebar on small screens (context usage now shown in ChatInput) - if (isHidden) { - return null; - } + // Single render point for VerticalTokenMeter + // Shows when: (1) collapsed, OR (2) Review tab is active + const showMeter = showCollapsed || selectedTab === "review"; + const autoCompactionProps = React.useMemo( + () => ({ + threshold: autoCompactThreshold, + setThreshold: setAutoCompactThreshold, + }), + [autoCompactThreshold, setAutoCompactThreshold] + ); + const verticalMeter = showMeter ? ( + + ) : null; return ( -
+ {/* Full view when not collapsed */} +
{/* Resize handle (left edge) */} {onStartResize && (
= ({ /> )} + {/* Render meter when Review tab is active */} + {selectedTab === "review" && ( +
{verticalMeter}
+ )} +
= ({ {formatKeybind(KEYBINDS.REVIEW_TAB)} + {statsTabEnabled && ( + + + + + + {formatKeybind(KEYBINDS.STATS_TAB)} + + + )}
= ({ />
)} + {statsTabEnabled && selectedTab === "stats" && ( +
+ + + +
+ )}
+ {/* Render meter in collapsed view when sidebar is collapsed */} +
{verticalMeter}
); }; diff --git a/src/browser/components/RightSidebar/StatsTab.tsx b/src/browser/components/RightSidebar/StatsTab.tsx new file mode 100644 index 0000000000..30d0f9e651 --- /dev/null +++ b/src/browser/components/RightSidebar/StatsTab.tsx @@ -0,0 +1,397 @@ +import React from "react"; + +import { usePersistedState } from "@/browser/hooks/usePersistedState"; +import { useWorkspaceStatsSnapshot } from "@/browser/stores/WorkspaceStore"; +import { ToggleGroup, type ToggleOption } from "../ToggleGroup"; +import { useTelemetry } from "@/browser/hooks/useTelemetry"; +import { calculateAverageTPS } from "@/browser/utils/messages/StreamingTPSCalculator"; + +// Colors for timing components (matching TOKEN_COMPONENT_COLORS style) +const TIMING_COLORS = { + ttft: "#f59e0b", // amber - waiting for first token + model: "#3b82f6", // blue - model inference + tools: "#10b981", // green - tool execution +} as const; + +function formatDuration(ms: number): string { + if (ms < 1000) return `${Math.round(ms)}ms`; + if (ms < 10000) return `${(ms / 1000).toFixed(1)}s`; + if (ms < 60000) return `${Math.round(ms / 1000)}s`; + const mins = Math.floor(ms / 60000); + const secs = Math.round((ms % 60000) / 1000); + return `${mins}m ${secs}s`; +} + +function formatTokens(tokens: number): string { + if (tokens < 1000) return String(tokens); + return `${(tokens / 1000).toFixed(1)}k`; +} + +type ViewMode = "session" | "last-request"; + +const VIEW_MODE_OPTIONS: Array> = [ + { value: "session", label: "Session" }, + { value: "last-request", label: "Last Request" }, +]; + +interface ModelBreakdownEntry { + key: string; + model: string; + mode?: "plan" | "exec"; + totalDurationMs: number; + totalToolExecutionMs: number; + totalStreamingMs: number; + totalTtftMs: number; + ttftCount: number; + responseCount: number; + totalOutputTokens: number; + totalReasoningTokens: number; +} + +function computeAverageTtft(totalTtftMs: number, ttftCount: number): number | null { + if (ttftCount <= 0) return null; + return totalTtftMs / ttftCount; +} + +export function StatsTab(props: { workspaceId: string }) { + const snapshot = useWorkspaceStatsSnapshot(props.workspaceId); + const telemetry = useTelemetry(); + const [viewMode, setViewMode] = usePersistedState("statsTab:viewMode", "session"); + const [showModeBreakdown, setShowModeBreakdown] = usePersistedState( + "statsTab:showModeBreakdown", + false + ); + + React.useEffect(() => { + telemetry.statsTabOpened(viewMode, showModeBreakdown); + }, [telemetry, viewMode, showModeBreakdown]); + + const active = snapshot?.active; + const session = snapshot?.session; + const lastRequest = snapshot?.lastRequest; + + const hasAnyData = + active !== undefined || lastRequest !== undefined || (session?.responseCount ?? 0) > 0; + + const onClearStats = async (): Promise => { + const client = window.__ORPC_CLIENT__; + if (!client) throw new Error("ORPC client not initialized"); + await client.workspace.stats.clear({ workspaceId: props.workspaceId }); + }; + + if (!hasAnyData) { + return ( +
+
+

No timing data yet.

+

Send a message to see timing statistics.

+
+
+ ); + } + + // --- Timing data selection --- + + const sessionTotalDuration = (session?.totalDurationMs ?? 0) + (active?.elapsedMs ?? 0); + const sessionToolExecutionMs = + (session?.totalToolExecutionMs ?? 0) + (active?.toolExecutionMs ?? 0); + // Includes TTFT (used as a fallback for TPS when streaming time is unavailable/corrupted). + const sessionModelTimeMs = Math.max(0, sessionTotalDuration - sessionToolExecutionMs); + const sessionStreamingMs = (session?.totalStreamingMs ?? 0) + (active?.streamingMs ?? 0); + const sessionAvgTtftMs = computeAverageTtft(session?.totalTtftMs ?? 0, session?.ttftCount ?? 0); + const sessionTotalTtftMs = (session?.totalTtftMs ?? 0) + (active?.ttftMs ?? 0); + + const lastData = active ?? lastRequest; + const isActive = Boolean(active); + + const lastTotalDuration = active ? active.elapsedMs : (lastRequest?.totalDurationMs ?? 0); + const lastToolExecutionMs = active ? active.toolExecutionMs : (lastRequest?.toolExecutionMs ?? 0); + // Includes TTFT (used as a fallback for TPS when streaming time is unavailable/corrupted). + const lastModelTimeMs = active + ? active.modelTimeMs + : (lastRequest?.modelTimeMs ?? Math.max(0, lastTotalDuration - lastToolExecutionMs)); + const lastStreamingMs = active ? active.streamingMs : (lastRequest?.streamingMs ?? 0); + const lastTtftMs = active ? active.ttftMs : (lastRequest?.ttftMs ?? null); + + const totalDuration = viewMode === "session" ? sessionTotalDuration : lastTotalDuration; + const toolExecutionMs = viewMode === "session" ? sessionToolExecutionMs : lastToolExecutionMs; + const modelTimeMs = viewMode === "session" ? sessionModelTimeMs : lastModelTimeMs; + const streamingMs = viewMode === "session" ? sessionStreamingMs : lastStreamingMs; + const ttftMs = viewMode === "session" ? sessionAvgTtftMs : lastTtftMs; + const ttftMsForBar = viewMode === "session" ? sessionTotalTtftMs : (lastTtftMs ?? 0); + + // Stats snapshot provides both modelTime (includes TTFT) and streaming time. + // For display breakdowns, prefer streaming time so TTFT isn't double-counted. + const modelDisplayMs = streamingMs; + + const waitingForTtft = viewMode === "last-request" && isActive && active?.ttftMs === null; + + const toolPercentage = totalDuration > 0 ? (toolExecutionMs / totalDuration) * 100 : 0; + const modelPercentage = totalDuration > 0 ? (modelDisplayMs / totalDuration) * 100 : 0; + const ttftPercentage = totalDuration > 0 ? (ttftMsForBar / totalDuration) * 100 : 0; + + const totalTokensForView = (() => { + if (viewMode === "session") { + const output = session?.totalOutputTokens ?? 0; + const reasoning = session?.totalReasoningTokens ?? 0; + return output + reasoning; + } + + const output = lastData?.outputTokens ?? 0; + const reasoning = lastData?.reasoningTokens ?? 0; + return output + reasoning; + })(); + + const avgTPS = calculateAverageTPS( + streamingMs, + modelTimeMs, + totalTokensForView, + viewMode === "last-request" ? (active?.liveTPS ?? null) : null + ); + + const components = [ + { + name: viewMode === "session" ? "Avg. Time to First Token" : "Time to First Token", + duration: ttftMs, + color: TIMING_COLORS.ttft, + show: ttftMs !== null || waitingForTtft, + waiting: waitingForTtft, + }, + { + name: "Model Time", + duration: modelDisplayMs, + color: TIMING_COLORS.model, + show: true, + percentage: modelPercentage, + }, + { + name: "Tool Execution", + duration: toolExecutionMs, + color: TIMING_COLORS.tools, + show: toolExecutionMs > 0, + percentage: toolPercentage, + }, + ].filter((c) => c.show); + + // --- Per-model breakdown (session view only) --- + + const modelEntries: ModelBreakdownEntry[] = (() => { + if (!session) return []; + return Object.entries(session.byModel).map(([key, entry]) => ({ key, ...entry })); + })(); + + const hasModeData = modelEntries.some((e) => e.mode !== undefined); + + const consolidatedByModel: ModelBreakdownEntry[] = (() => { + const byModel = new Map(); + + for (const entry of modelEntries) { + const existing = byModel.get(entry.model); + if (!existing) { + byModel.set(entry.model, { + ...entry, + key: entry.model, + mode: undefined, + }); + continue; + } + + existing.totalDurationMs += entry.totalDurationMs; + existing.totalToolExecutionMs += entry.totalToolExecutionMs; + existing.totalStreamingMs += entry.totalStreamingMs; + existing.totalTtftMs += entry.totalTtftMs; + existing.ttftCount += entry.ttftCount; + existing.responseCount += entry.responseCount; + existing.totalOutputTokens += entry.totalOutputTokens; + existing.totalReasoningTokens += entry.totalReasoningTokens; + } + + return Array.from(byModel.values()); + })(); + + const breakdownToShow = + viewMode === "session" && hasModeData && showModeBreakdown ? modelEntries : consolidatedByModel; + + breakdownToShow.sort((a, b) => b.totalDurationMs - a.totalDurationMs); + + // --- Render --- + + return ( +
+
+
+
+
+ + Timing + {isActive && } + + +
+
+ {viewMode === "session" && ( + + )} + {formatDuration(totalDuration)} +
+
+ + {viewMode === "session" && session && session.responseCount > 0 && ( +
+ + {session.responseCount} response{session.responseCount !== 1 ? "s" : ""} + + {(session.totalOutputTokens > 0 || session.totalReasoningTokens > 0) && ( + <> + · + {formatTokens(session.totalOutputTokens)} output tokens + {session.totalReasoningTokens > 0 && ( + <> + · + {formatTokens(session.totalReasoningTokens)} thinking + + )} + + )} +
+ )} + + {lastData?.invalid && viewMode === "last-request" && ( +
+ Invalid timing data: {lastData.anomalies.join(", ")} +
+ )} + + {avgTPS !== null && avgTPS > 0 && ( +
Avg. TPS: {avgTPS.toFixed(0)} tok/s
+ )} + + {/* Progress bar */} +
+
+ {ttftPercentage > 0 && ( +
+ )} +
+
+
+
+ + {/* Components table */} +
+ {components.map((component) => ( +
+
+
+ {component.name} +
+
+ {component.waiting ? ( + waiting… + ) : component.duration !== null ? ( + {formatDuration(component.duration)} + ) : ( + + )} + {component.percentage !== undefined && component.percentage > 0 && ( + + {component.percentage.toFixed(0)}% + + )} +
+
+ ))} +
+
+
+ + {viewMode === "session" && breakdownToShow.length > 0 && ( +
+
+ By model + {hasModeData && ( + + )} +
+ +
+ {breakdownToShow.map((entry) => { + const avgTtft = computeAverageTtft(entry.totalTtftMs, entry.ttftCount); + const tokens = entry.totalOutputTokens + entry.totalReasoningTokens; + const entryAvgTPS = calculateAverageTPS( + entry.totalStreamingMs, + Math.max(0, entry.totalDurationMs - entry.totalToolExecutionMs), + tokens, + null + ); + + const label = + entry.mode === "plan" + ? `${entry.model} (plan)` + : entry.mode === "exec" + ? `${entry.model} (exec)` + : entry.model; + + return ( +
+
+ + {label} + + + {formatDuration(entry.totalDurationMs)} + +
+
+ {entry.responseCount} req + {avgTtft !== null && ( + <> + · + TTFT {formatDuration(avgTtft)} + + )} + {entryAvgTPS !== null && entryAvgTPS > 0 && ( + <> + · + {entryAvgTPS.toFixed(0)} tok/s + + )} +
+
+ ); + })} +
+
+ )} +
+ ); +} diff --git a/src/browser/components/RightSidebar/statsTabCalculations.ts b/src/browser/components/RightSidebar/statsTabCalculations.ts new file mode 100644 index 0000000000..f3e31aea2b --- /dev/null +++ b/src/browser/components/RightSidebar/statsTabCalculations.ts @@ -0,0 +1,370 @@ +import type { SessionTimingStats, StreamTimingStats } from "@/browser/stores/WorkspaceStore"; +import { calculateAverageTPS } from "@/browser/utils/messages/StreamingTPSCalculator"; +import { formatModelDisplayName } from "@/common/utils/ai/modelDisplay"; + +export type StatsViewMode = "session" | "last-request"; + +export interface StatsTabDisplayData { + totalDuration: number; + ttft: number | null; + toolExecutionMs: number; + modelTime: number; + isActive: boolean; + responseCount?: number; + /** True if waiting for current request's TTFT */ + waitingForTtft?: boolean; +} + +export interface ModelBreakdownEntry { + model: string; + displayName: string; + totalDuration: number; + toolExecutionMs: number; + modelTime: number; + avgTtft: number | null; + responseCount: number; + totalOutputTokens: number; + totalReasoningTokens: number; + tokensPerSec: number | null; + avgTokensPerMsg: number | null; + avgReasoningPerMsg: number | null; + mode?: "plan" | "exec"; +} + +export interface ModelBreakdownData { + /** Per-model+mode entries (no consolidation; keys may be model:mode) */ + byKey: ModelBreakdownEntry[]; + /** Consolidated per-model entries (mode ignored) */ + byModel: ModelBreakdownEntry[]; + /** Whether any entries have explicit mode (plan/exec) */ + hasModeData: boolean; +} + +export function computeStatsTabDisplayData(params: { + viewMode: StatsViewMode; + timingStats: StreamTimingStats | null; + sessionStats: SessionTimingStats | null; + now: number; +}): StatsTabDisplayData { + if (params.viewMode === "session") { + // Session view: aggregate completed stats + active stream (if present) + const baseTotal = params.sessionStats?.totalDurationMs ?? 0; + const baseToolMs = params.sessionStats?.totalToolExecutionMs ?? 0; + const baseResponseCount = params.sessionStats?.responseCount ?? 0; + + let baseTtftSum = 0; + let baseTtftCount = 0; + if (params.sessionStats?.averageTtftMs !== null && params.sessionStats?.responseCount) { + baseTtftSum = params.sessionStats.averageTtftMs * params.sessionStats.responseCount; + baseTtftCount = params.sessionStats.responseCount; + } + + // Add live stats from active stream + let liveElapsed = 0; + let liveToolMs = 0; + let liveTtft: number | null = null; + let isActive = false; + + if (params.timingStats?.isActive) { + liveElapsed = params.now - params.timingStats.startTime; + liveToolMs = params.timingStats.toolExecutionMs; + isActive = true; + + if (params.timingStats.firstTokenTime !== null) { + liveTtft = params.timingStats.firstTokenTime - params.timingStats.startTime; + } + } + + const totalDuration = baseTotal + liveElapsed; + const totalToolMs = baseToolMs + liveToolMs; + + // Recalculate average TTFT including the active stream once it has a first token. + let avgTtft: number | null = null; + if (liveTtft !== null) { + avgTtft = (baseTtftSum + liveTtft) / (baseTtftCount + 1); + } else if (baseTtftCount > 0) { + avgTtft = baseTtftSum / baseTtftCount; + } + + return { + totalDuration, + ttft: avgTtft, + toolExecutionMs: totalToolMs, + modelTime: Math.max(0, totalDuration - totalToolMs), + isActive, + responseCount: baseResponseCount + (isActive ? 1 : 0), + waitingForTtft: isActive && liveTtft === null, + }; + } + + // Last Request view + if (!params.timingStats) { + return { + totalDuration: 0, + ttft: null, + toolExecutionMs: 0, + modelTime: 0, + isActive: false, + }; + } + + const elapsed = params.timingStats.isActive + ? params.now - params.timingStats.startTime + : params.timingStats.endTime! - params.timingStats.startTime; + + return { + totalDuration: elapsed, + ttft: + params.timingStats.firstTokenTime !== null + ? params.timingStats.firstTokenTime - params.timingStats.startTime + : null, + toolExecutionMs: params.timingStats.toolExecutionMs, + modelTime: Math.max(0, elapsed - params.timingStats.toolExecutionMs), + isActive: params.timingStats.isActive, + }; +} + +function getModelDisplayName(model: string): string { + // Extract model name from "provider:model-name" or "mux-gateway:provider/model-name" format + const colonIndex = model.indexOf(":"); + const afterProvider = colonIndex >= 0 ? model.slice(colonIndex + 1) : model; + + // For mux-gateway format, extract the actual model name after the slash + const slashIndex = afterProvider.indexOf("/"); + const modelName = slashIndex >= 0 ? afterProvider.slice(slashIndex + 1) : afterProvider; + + return formatModelDisplayName(modelName); +} + +const MODE_SUFFIX_PLAN = ":plan" as const; +const MODE_SUFFIX_EXEC = ":exec" as const; + +function parseStatsKey(key: string): { model: string; mode?: "plan" | "exec" } { + if (key.endsWith(MODE_SUFFIX_PLAN)) { + return { model: key.slice(0, -MODE_SUFFIX_PLAN.length), mode: "plan" }; + } + if (key.endsWith(MODE_SUFFIX_EXEC)) { + return { model: key.slice(0, -MODE_SUFFIX_EXEC.length), mode: "exec" }; + } + return { model: key }; +} + +export function computeModelBreakdownData(params: { + viewMode: StatsViewMode; + timingStats: StreamTimingStats | null; + sessionStats: SessionTimingStats | null; + now: number; +}): ModelBreakdownData { + if (params.viewMode !== "session") { + if (!params.timingStats) { + return { byKey: [], byModel: [], hasModeData: false }; + } + + const elapsed = params.timingStats.isActive + ? params.now - params.timingStats.startTime + : params.timingStats.endTime! - params.timingStats.startTime; + const modelTime = Math.max(0, elapsed - params.timingStats.toolExecutionMs); + const ttft = + params.timingStats.firstTokenTime !== null + ? params.timingStats.firstTokenTime - params.timingStats.startTime + : null; + + const outputTokens = params.timingStats.isActive + ? (params.timingStats.liveTokenCount ?? 0) + : (params.timingStats.outputTokens ?? 0); + const reasoningTokens = params.timingStats.reasoningTokens ?? 0; + + const rawStreamingMs = params.timingStats.isActive + ? params.timingStats.firstTokenTime !== null + ? params.now - params.timingStats.firstTokenTime + : 0 + : (params.timingStats.streamingMs ?? 0); + const streamingMs = params.timingStats.isActive + ? Math.max(0, rawStreamingMs - params.timingStats.toolExecutionMs) + : rawStreamingMs; + + const tokensPerSec = calculateAverageTPS( + streamingMs, + modelTime, + outputTokens, + params.timingStats.isActive ? (params.timingStats.liveTPS ?? null) : null + ); + + const entry: ModelBreakdownEntry = { + model: params.timingStats.model, + displayName: getModelDisplayName(params.timingStats.model), + totalDuration: elapsed, + toolExecutionMs: params.timingStats.toolExecutionMs, + modelTime, + avgTtft: ttft, + responseCount: 1, + totalOutputTokens: outputTokens, + totalReasoningTokens: reasoningTokens, + tokensPerSec, + avgTokensPerMsg: outputTokens > 0 ? outputTokens : null, + avgReasoningPerMsg: reasoningTokens > 0 ? reasoningTokens : null, + mode: params.timingStats.mode, + }; + + return { byKey: [entry], byModel: [entry], hasModeData: false }; + } + + interface BreakdownEntry { + totalDuration: number; + toolExecutionMs: number; + streamingMs: number; + responseCount: number; + totalOutputTokens: number; + totalReasoningTokens: number; + ttftSum: number; + ttftCount: number; + liveTPS: number | null; + liveTokenCount: number; + mode?: "plan" | "exec"; + } + + const breakdown: Record = {}; + + if (params.sessionStats?.byModel) { + for (const [key, stats] of Object.entries(params.sessionStats.byModel)) { + breakdown[key] = { + totalDuration: stats.totalDurationMs, + toolExecutionMs: stats.totalToolExecutionMs, + streamingMs: stats.totalStreamingMs, + responseCount: stats.responseCount, + totalOutputTokens: stats.totalOutputTokens, + totalReasoningTokens: stats.totalReasoningTokens, + ttftSum: stats.averageTtftMs !== null ? stats.averageTtftMs * stats.responseCount : 0, + ttftCount: stats.averageTtftMs !== null ? stats.responseCount : 0, + liveTPS: null, + liveTokenCount: 0, + mode: stats.mode, + }; + } + } + + if (params.timingStats?.isActive) { + const activeMode = params.timingStats.mode; + const activeKey = activeMode + ? `${params.timingStats.model}:${activeMode}` + : params.timingStats.model; + const liveElapsed = params.now - params.timingStats.startTime; + const rawStreamingMs = + params.timingStats.firstTokenTime !== null + ? params.now - params.timingStats.firstTokenTime + : 0; + const liveStreamingMs = Math.max(0, rawStreamingMs - params.timingStats.toolExecutionMs); + + const existing = breakdown[activeKey] ?? { + totalDuration: 0, + toolExecutionMs: 0, + streamingMs: 0, + responseCount: 0, + totalOutputTokens: 0, + totalReasoningTokens: 0, + ttftSum: 0, + ttftCount: 0, + liveTPS: null, + liveTokenCount: 0, + mode: activeMode, + }; + + existing.totalDuration += liveElapsed; + existing.toolExecutionMs += params.timingStats.toolExecutionMs; + existing.streamingMs += liveStreamingMs; + existing.responseCount += 1; + existing.liveTokenCount = params.timingStats.liveTokenCount ?? 0; + existing.totalOutputTokens += existing.liveTokenCount; + existing.liveTPS = params.timingStats.liveTPS ?? null; + if (params.timingStats.firstTokenTime !== null) { + existing.ttftSum += params.timingStats.firstTokenTime - params.timingStats.startTime; + existing.ttftCount += 1; + } + + breakdown[activeKey] = existing; + } + + const toModelBreakdownEntry = ( + model: string, + stats: BreakdownEntry, + mode?: "plan" | "exec" + ): ModelBreakdownEntry => { + const modelTime = Math.max(0, stats.totalDuration - stats.toolExecutionMs); + const avgTtft = stats.ttftCount > 0 ? stats.ttftSum / stats.ttftCount : null; + const tokensPerSec = calculateAverageTPS( + stats.streamingMs, + modelTime, + stats.totalOutputTokens, + stats.liveTPS + ); + const avgTokensPerMsg = + stats.responseCount > 0 && stats.totalOutputTokens > 0 + ? Math.round(stats.totalOutputTokens / stats.responseCount) + : null; + const avgReasoningPerMsg = + stats.responseCount > 0 && stats.totalReasoningTokens > 0 + ? Math.round(stats.totalReasoningTokens / stats.responseCount) + : null; + + return { + model, + displayName: getModelDisplayName(model), + totalDuration: stats.totalDuration, + toolExecutionMs: stats.toolExecutionMs, + modelTime, + avgTtft, + responseCount: stats.responseCount, + totalOutputTokens: stats.totalOutputTokens, + totalReasoningTokens: stats.totalReasoningTokens, + tokensPerSec, + avgTokensPerMsg, + avgReasoningPerMsg, + mode, + }; + }; + + const byKey = Object.entries(breakdown).map(([key, stats]) => { + const { model, mode } = parseStatsKey(key); + return toModelBreakdownEntry(model, stats, mode ?? stats.mode); + }); + + const consolidated: Record = {}; + for (const [key, stats] of Object.entries(breakdown)) { + const { model } = parseStatsKey(key); + const existing = consolidated[model] ?? { + totalDuration: 0, + toolExecutionMs: 0, + streamingMs: 0, + responseCount: 0, + totalOutputTokens: 0, + totalReasoningTokens: 0, + ttftSum: 0, + ttftCount: 0, + liveTPS: null, + liveTokenCount: 0, + }; + + existing.totalDuration += stats.totalDuration; + existing.toolExecutionMs += stats.toolExecutionMs; + existing.streamingMs += stats.streamingMs; + existing.responseCount += stats.responseCount; + existing.totalOutputTokens += stats.totalOutputTokens; + existing.totalReasoningTokens += stats.totalReasoningTokens; + existing.ttftSum += stats.ttftSum; + existing.ttftCount += stats.ttftCount; + + // Preserve live data if present (only expected for the active stream) + existing.liveTPS = stats.liveTPS ?? existing.liveTPS; + existing.liveTokenCount += stats.liveTokenCount; + + consolidated[model] = existing; + } + + const byModel = Object.entries(consolidated).map(([model, stats]) => { + return toModelBreakdownEntry(model, stats); + }); + + const hasModeData = byKey.some((m) => m.mode); + + return { byKey, byModel, hasModeData }; +} diff --git a/src/browser/components/Settings/sections/ExperimentsSection.tsx b/src/browser/components/Settings/sections/ExperimentsSection.tsx index 1285f30b47..54905783bd 100644 --- a/src/browser/components/Settings/sections/ExperimentsSection.tsx +++ b/src/browser/components/Settings/sections/ExperimentsSection.tsx @@ -6,6 +6,7 @@ import { type ExperimentId, } from "@/common/constants/experiments"; import { Switch } from "@/browser/components/ui/switch"; +import { useFeatureFlags, type StatsTabOverride } from "@/browser/contexts/FeatureFlagsContext"; import { useWorkspaceContext } from "@/browser/contexts/WorkspaceContext"; import { useTelemetry } from "@/browser/hooks/useTelemetry"; @@ -47,6 +48,39 @@ function ExperimentRow(props: ExperimentRowProps) { ); } +function StatsTabOverrideRow() { + const { statsTabState, setStatsTabOverride } = useFeatureFlags(); + + const onChange = (e: React.ChangeEvent) => { + const value = e.target.value as StatsTabOverride; + setStatsTabOverride(value).catch(() => { + // ignore + }); + }; + + return ( +
+
+
Stats tab
+
+ PostHog experiment-gated timing stats sidebar. Experiment variant:{" "} + {statsTabState?.variant ?? "—"}. +
+
+ +
+ ); +} + export function ExperimentsSection() { const allExperiments = getExperimentList(); const { refreshWorkspaceMetadata } = useWorkspaceContext(); @@ -60,7 +94,9 @@ export function ExperimentsSection() { // When post-compaction experiment is toggled, refresh metadata to fetch/clear bundled state const handlePostCompactionToggle = useCallback(() => { - void refreshWorkspaceMetadata(); + refreshWorkspaceMetadata().catch(() => { + // ignore + }); }, [refreshWorkspaceMetadata]); return ( @@ -69,6 +105,7 @@ export function ExperimentsSection() { Experimental features that are still in development. Enable at your own risk.

+ {experiments.map((exp) => ( Promise; + setStatsTabOverride: (override: StatsTabOverride) => Promise; +} + +const FeatureFlagsContext = createContext(null); + +export function useFeatureFlags(): FeatureFlagsContextValue { + const ctx = useContext(FeatureFlagsContext); + if (!ctx) throw new Error("useFeatureFlags must be used within FeatureFlagsProvider"); + return ctx; +} + +export function FeatureFlagsProvider(props: { children: ReactNode }) { + const { api } = useAPI(); + const [statsTabState, setStatsTabState] = useState(() => { + if (isStorybook()) { + return { enabled: true, variant: "stats", override: "default" }; + } + + return null; + }); + + const refreshStatsTabState = async (): Promise => { + if (!api) { + setStatsTabState({ enabled: false, variant: "control", override: "default" }); + return; + } + + const state = await api.features.getStatsTabState(); + setStatsTabState(state); + }; + + const setStatsTabOverride = async (override: StatsTabOverride): Promise => { + if (!api) { + throw new Error("ORPC client not initialized"); + } + + const state = await api.features.setStatsTabOverride({ override }); + setStatsTabState(state); + }; + + useEffect(() => { + if (isStorybook()) { + return; + } + + (async () => { + try { + if (!api) { + setStatsTabState({ enabled: false, variant: "control", override: "default" }); + return; + } + + const state = await api.features.getStatsTabState(); + setStatsTabState(state); + } catch { + // Treat as disabled if we can't fetch. + setStatsTabState({ enabled: false, variant: "control", override: "default" }); + } + })(); + }, [api]); + + return ( + + {props.children} + + ); +} diff --git a/src/browser/hooks/useLiveTimer.ts b/src/browser/hooks/useLiveTimer.ts new file mode 100644 index 0000000000..15b5bc05ec --- /dev/null +++ b/src/browser/hooks/useLiveTimer.ts @@ -0,0 +1,27 @@ +import { useEffect, useState } from "react"; + +/** + * A small helper for live-updating time displays. + * + * When enabled, updates once per second (by default). When disabled, stops the interval. + */ +export function useLiveTimer(enabled: boolean, intervalMs = 1000): number { + const [now, setNow] = useState(() => Date.now()); + + useEffect(() => { + if (!enabled) { + return; + } + + // Snap immediately when enabling so the UI doesn't wait up to intervalMs. + setNow(Date.now()); + + const interval = setInterval(() => { + setNow(Date.now()); + }, intervalMs); + + return () => clearInterval(interval); + }, [enabled, intervalMs]); + + return now; +} diff --git a/src/browser/hooks/useTelemetry.ts b/src/browser/hooks/useTelemetry.ts index b6d270ba46..139f66f234 100644 --- a/src/browser/hooks/useTelemetry.ts +++ b/src/browser/hooks/useTelemetry.ts @@ -3,6 +3,7 @@ import { trackWorkspaceCreated, trackWorkspaceSwitched, trackMessageSent, + trackStatsTabOpened, trackStreamCompleted, trackProviderConfigured, trackCommandUsed, @@ -62,6 +63,12 @@ export function useTelemetry() { [] ); + const statsTabOpened = useCallback( + (viewMode: "session" | "last-request", showModeBreakdown: boolean) => { + trackStatsTabOpened(viewMode, showModeBreakdown); + }, + [] + ); const streamCompleted = useCallback( (model: string, wasInterrupted: boolean, durationSecs: number, outputTokens: number) => { trackStreamCompleted(model, wasInterrupted, durationSecs, outputTokens); @@ -96,6 +103,7 @@ export function useTelemetry() { workspaceSwitched, workspaceCreated, messageSent, + statsTabOpened, streamCompleted, providerConfigured, commandUsed, diff --git a/src/browser/stores/WorkspaceStore.test.ts b/src/browser/stores/WorkspaceStore.test.ts index d38891fc5d..691d2f955f 100644 --- a/src/browser/stores/WorkspaceStore.test.ts +++ b/src/browser/stores/WorkspaceStore.test.ts @@ -328,6 +328,7 @@ describe("WorkspaceStore", () => { messageId: "msg1", model: "claude-opus-4", workspaceId: "test-workspace", + startTime: Date.now(), }; await new Promise((resolve) => { setTimeout(resolve, 10); @@ -433,6 +434,7 @@ describe("WorkspaceStore", () => { messageId: "msg1", model: "claude-sonnet-4", workspaceId: "test-workspace", + startTime: Date.now(), }; await new Promise((resolve) => { setTimeout(resolve, 10); @@ -476,6 +478,7 @@ describe("WorkspaceStore", () => { messageId: "msg1", model: "claude-sonnet-4", workspaceId: "test-workspace", + startTime: Date.now(), }; await new Promise((resolve) => { setTimeout(resolve, 10); diff --git a/src/browser/stores/WorkspaceStore.ts b/src/browser/stores/WorkspaceStore.ts index c7fc16fb54..2b1a5706dd 100644 --- a/src/browser/stores/WorkspaceStore.ts +++ b/src/browser/stores/WorkspaceStore.ts @@ -1,7 +1,7 @@ import assert from "@/common/utils/assert"; import type { MuxMessage, DisplayedMessage, QueuedMessage } from "@/common/types/message"; import type { FrontendWorkspaceMetadata } from "@/common/types/workspace"; -import type { WorkspaceChatMessage } from "@/common/orpc/types"; +import type { WorkspaceChatMessage, WorkspaceStatsSnapshot } from "@/common/orpc/types"; import type { RouterClient } from "@orpc/server"; import type { AppRouter } from "@/node/orpc/router"; import type { TodoItem } from "@/common/types/tools"; @@ -48,16 +48,124 @@ export interface WorkspaceState { pendingStreamStartTime: number | null; } +/** + * Timing statistics for streaming sessions (active or completed). + * When isActive=true, endTime is null and elapsed time should be computed live. + * When isActive=false, endTime contains the completion timestamp. + */ +export interface StreamTimingStats { + /** When the stream started (Date.now()) */ + startTime: number; + /** When the stream ended, null if still active */ + endTime: number | null; + /** When first content token arrived, null if still waiting */ + firstTokenTime: number | null; + /** Accumulated tool execution time in ms */ + toolExecutionMs: number; + /** Whether this is an active stream (true) or completed (false) */ + isActive: boolean; + /** Model used for this stream */ + model: string; + /** Output tokens (excludes reasoning/thinking tokens) - only available for completed streams */ + outputTokens?: number; + /** Reasoning/thinking tokens - only available for completed streams */ + reasoningTokens?: number; + /** Streaming duration in ms (first token to end) - only available for completed streams */ + streamingMs?: number; + /** Live token count during streaming - only available for active streams */ + liveTokenCount?: number; + /** Live tokens-per-second during streaming - only available for active streams */ + liveTPS?: number; + /** Mode (plan/exec) in which this stream occurred */ + mode?: "plan" | "exec"; +} + +/** Per-model timing statistics */ +export interface ModelTimingStats { + /** Total time spent in responses for this model */ + totalDurationMs: number; + /** Total time spent executing tools for this model */ + totalToolExecutionMs: number; + /** Total time spent streaming tokens (excludes TTFT) - for accurate tokens/sec */ + totalStreamingMs: number; + /** Average time to first token for this model */ + averageTtftMs: number | null; + /** Number of completed responses for this model */ + responseCount: number; + /** Total output tokens generated by this model (excludes reasoning/thinking tokens) */ + totalOutputTokens: number; + /** Total reasoning/thinking tokens generated by this model */ + totalReasoningTokens: number; + /** Mode extracted from composite key (undefined for old data without mode) */ + mode?: "plan" | "exec"; +} + +/** + * Aggregate timing statistics across all completed streams in a session. + */ +export interface SessionTimingStats { + /** Total time spent in all responses */ + totalDurationMs: number; + /** Total time spent executing tools */ + totalToolExecutionMs: number; + /** Total time spent streaming tokens (excludes TTFT) - for accurate tokens/sec */ + totalStreamingMs: number; + /** Average time to first token (null if no responses had TTFT) */ + averageTtftMs: number | null; + /** Number of completed responses */ + responseCount: number; + /** Total output tokens generated across all models (excludes reasoning/thinking tokens) */ + totalOutputTokens: number; + /** Total reasoning/thinking tokens generated across all models */ + totalReasoningTokens: number; + /** Per-model timing breakdown */ + + byModel: Record; +} + /** * Subset of WorkspaceState needed for sidebar display. * Subscribing to only these fields prevents re-renders when messages update. */ +function shallowEqual(a: object | null | undefined, b: object | null | undefined): boolean { + if (a === b) { + return true; + } + if (!a || !b) { + return false; + } + + const aRecord = a as Record; + const bRecord = b as Record; + + const aKeys = Object.keys(aRecord); + const bKeys = Object.keys(bRecord); + if (aKeys.length !== bKeys.length) { + return false; + } + + for (const key of aKeys) { + if (!Object.prototype.hasOwnProperty.call(bRecord, key)) { + return false; + } + if (!Object.is(aRecord[key], bRecord[key])) { + return false; + } + } + + return true; +} + export interface WorkspaceSidebarState { canInterrupt: boolean; awaitingUserQuestion: boolean; currentModel: string | null; recencyTimestamp: number | null; agentStatus: { emoji: string; message: string; url?: string } | undefined; + /** Timing stats for current/last stream */ + timingStats: StreamTimingStats | null; + /** Aggregate timing stats across all responses in session */ + sessionStats: SessionTimingStats | null; } /** @@ -137,6 +245,12 @@ export class WorkspaceStore { private pendingStreamEvents = new Map(); private workspaceMetadata = new Map(); // Store metadata for name lookup private queuedMessages = new Map(); // Cached queued messages + + // Workspace timing stats snapshots (from workspace.stats.subscribe) + private statsEnabled = false; + private workspaceStats = new Map(); + private statsStore = new MapStore(); + private statsUnsubscribers = new Map void>(); // Cumulative session usage (from session-usage.json) private sessionUsage = new Map>(); @@ -343,6 +457,31 @@ export class WorkspaceStore { // message completion events (not on deltas) to prevent App.tsx re-renders. } + setStatsEnabled(enabled: boolean): void { + if (this.statsEnabled === enabled) { + return; + } + + this.statsEnabled = enabled; + + if (!enabled) { + for (const unsubscribe of this.statsUnsubscribers.values()) { + unsubscribe(); + } + this.statsUnsubscribers.clear(); + this.workspaceStats.clear(); + + for (const workspaceId of this.ipcUnsubscribers.keys()) { + this.statsStore.bump(workspaceId); + } + return; + } + + // Enable subscriptions for already-added workspaces. + for (const workspaceId of this.ipcUnsubscribers.keys()) { + this.subscribeToStats(workspaceId); + } + } setClient(client: RouterClient) { this.client = client; } @@ -380,6 +519,43 @@ export class WorkspaceStore { this.deltaIdleHandles.set(workspaceId, handle); } + /** + * Subscribe to backend timing stats snapshots for a workspace. + */ + + private subscribeToStats(workspaceId: string): void { + if (!this.client || !this.statsEnabled) { + return; + } + + // Skip if already subscribed + if (this.statsUnsubscribers.has(workspaceId)) { + return; + } + + const controller = new AbortController(); + const { signal } = controller; + + (async () => { + try { + const iterator = await this.client!.workspace.stats.subscribe({ workspaceId }, { signal }); + + for await (const snapshot of iterator) { + if (signal.aborted) break; + queueMicrotask(() => { + this.workspaceStats.set(workspaceId, snapshot); + this.statsStore.bump(workspaceId); + }); + } + } catch (error) { + if (signal.aborted) return; + console.warn(`[WorkspaceStore] Error in stats subscription for ${workspaceId}:`, error); + } + })(); + + this.statsUnsubscribers.set(workspaceId, () => controller.abort()); + } + /** * Cancel any pending idle state bump for a workspace. * Used when immediate state visibility is needed (e.g., stream-end). @@ -511,16 +687,46 @@ export class WorkspaceStore { */ getWorkspaceSidebarState(workspaceId: string): WorkspaceSidebarState { const fullState = this.getWorkspaceState(workspaceId); + const aggregator = this.aggregators.get(workspaceId); + + // Get timing stats: prefer active stream, fall back to last completed + let timingStats: StreamTimingStats | null = null; + const activeStats = aggregator?.getActiveStreamTimingStats(); + if (activeStats) { + timingStats = { + ...activeStats, + endTime: null, + isActive: true, + }; + } else { + const completedStats = aggregator?.getLastCompletedStreamStats(); + if (completedStats) { + timingStats = { + ...completedStats, + isActive: false, + }; + } + } + + // Get session-level aggregate stats + const sessionStats = aggregator?.getSessionTimingStats() ?? null; + const cached = this.sidebarStateCache.get(workspaceId); - // Return cached if values match + // Return cached if values match (timing stats checked by reference since they change frequently) if ( cached && cached.canInterrupt === fullState.canInterrupt && cached.awaitingUserQuestion === fullState.awaitingUserQuestion && cached.currentModel === fullState.currentModel && cached.recencyTimestamp === fullState.recencyTimestamp && - cached.agentStatus === fullState.agentStatus + cached.agentStatus === fullState.agentStatus && + // Timing stats: compare all fields for equality + shallowEqual(cached.timingStats, timingStats) && + // Session stats: compare key fields + (cached.sessionStats === sessionStats || + (cached.sessionStats?.totalDurationMs === sessionStats?.totalDurationMs && + cached.sessionStats?.responseCount === sessionStats?.responseCount)) ) { return cached; } @@ -532,11 +738,44 @@ export class WorkspaceStore { currentModel: fullState.currentModel, recencyTimestamp: fullState.recencyTimestamp, agentStatus: fullState.agentStatus, + timingStats, + sessionStats, }; this.sidebarStateCache.set(workspaceId, newState); return newState; } + /** + * Clear timing stats for a workspace. + * + * - Clears backend-persisted timing file (session-timing.json) when available. + * - Clears in-memory timing derived from StreamingMessageAggregator. + */ + clearTimingStats(workspaceId: string): void { + if (this.client && this.statsEnabled) { + this.client.workspace.stats + .clear({ workspaceId }) + .then((result) => { + if (!result.success) { + console.warn(`Failed to clear timing stats for ${workspaceId}:`, result.error); + return; + } + + this.workspaceStats.delete(workspaceId); + this.statsStore.bump(workspaceId); + }) + .catch((error) => { + console.warn(`Failed to clear timing stats for ${workspaceId}:`, error); + }); + } + + const aggregator = this.aggregators.get(workspaceId); + if (aggregator) { + aggregator.clearSessionTimingStats(); + this.states.bump(workspaceId); + } + } + /** * Get all workspace states as a Map. * Returns a new Map on each call - not cached/reactive. @@ -575,6 +814,12 @@ export class WorkspaceStore { return this.aggregators.get(workspaceId); } + getWorkspaceStatsSnapshot(workspaceId: string): WorkspaceStatsSnapshot | null { + return this.statsStore.get(workspaceId, () => { + return this.workspaceStats.get(workspaceId) ?? null; + }); + } + /** * Bump state for a workspace to trigger React re-renders. * Used by addEphemeralMessage for frontend-only messages. @@ -710,6 +955,13 @@ export class WorkspaceStore { return this.usageStore.subscribeKey(workspaceId, listener); } + /** + * Subscribe to backend timing stats snapshots for a specific workspace. + */ + subscribeStats(workspaceId: string, listener: () => void): () => void { + return this.statsStore.subscribeKey(workspaceId, listener); + } + /** * Subscribe to consumer store changes for a specific workspace. */ @@ -845,6 +1097,10 @@ export class WorkspaceStore { .catch((error) => { console.warn(`Failed to fetch session usage for ${workspaceId}:`, error); }); + + if (this.statsEnabled) { + this.subscribeToStats(workspaceId); + } } else { console.warn(`[WorkspaceStore] No ORPC client available for workspace ${workspaceId}`); } @@ -864,6 +1120,11 @@ export class WorkspaceStore { this.deltaIdleHandles.delete(workspaceId); } + const statsUnsubscribe = this.statsUnsubscribers.get(workspaceId); + if (statsUnsubscribe) { + statsUnsubscribe(); + this.statsUnsubscribers.delete(workspaceId); + } // Unsubscribe from IPC const unsubscribe = this.ipcUnsubscribers.get(workspaceId); if (unsubscribe) { @@ -883,6 +1144,8 @@ export class WorkspaceStore { this.previousSidebarValues.delete(workspaceId); this.sidebarStateCache.delete(workspaceId); this.workspaceCreatedAt.delete(workspaceId); + this.workspaceStats.delete(workspaceId); + this.statsStore.delete(workspaceId); this.sessionUsage.delete(workspaceId); } @@ -915,6 +1178,10 @@ export class WorkspaceStore { // Clean up consumer manager this.consumerManager.dispose(); + for (const unsubscribe of this.statsUnsubscribers.values()) { + unsubscribe(); + } + this.statsUnsubscribers.clear(); for (const unsubscribe of this.ipcUnsubscribers.values()) { unsubscribe(); } @@ -927,6 +1194,9 @@ export class WorkspaceStore { this.caughtUp.clear(); this.historicalMessages.clear(); this.pendingStreamEvents.clear(); + this.workspaceStats.clear(); + this.statsStore.clear(); + this.sessionUsage.clear(); this.workspaceCreatedAt.clear(); } @@ -1264,6 +1534,17 @@ export function useWorkspaceUsage(workspaceId: string): WorkspaceUsageState { ); } +/** + * Hook for backend timing stats snapshots. + */ +export function useWorkspaceStatsSnapshot(workspaceId: string): WorkspaceStatsSnapshot | null { + const store = getStoreInstance(); + return useSyncExternalStore( + (listener) => store.subscribeStats(workspaceId, listener), + () => store.getWorkspaceStatsSnapshot(workspaceId) + ); +} + /** * Hook for consumer breakdown (lazy, with tokenization). * Updates after async Web Worker calculation completes. diff --git a/src/browser/stories/App.rightsidebar.stories.tsx b/src/browser/stories/App.rightsidebar.stories.tsx index 1cb11ff19e..e1955fad67 100644 --- a/src/browser/stories/App.rightsidebar.stories.tsx +++ b/src/browser/stories/App.rightsidebar.stories.tsx @@ -5,7 +5,7 @@ */ import { appMeta, AppWithMocks, type AppStory } from "./meta.js"; -import { setupSimpleChatStory } from "./storyHelpers"; +import { setupSimpleChatStory, setupStreamingChatStory } from "./storyHelpers"; import { createUserMessage, createAssistantMessage } from "./mockFactory"; import { within, userEvent, waitFor } from "@storybook/test"; import { @@ -200,3 +200,88 @@ export const ReviewTab: AppStory = { }); }, }; + +/** + * Stats tab when idle (no timing data) - shows placeholder message + */ +export const StatsTabIdle: AppStory = { + render: () => ( + { + localStorage.setItem(RIGHT_SIDEBAR_TAB_KEY, JSON.stringify("stats")); + + return setupSimpleChatStory({ + workspaceId: "ws-stats-idle", + workspaceName: "feature/stats", + projectName: "my-app", + messages: [ + createUserMessage("msg-1", "Help me with something", { historySequence: 1 }), + createAssistantMessage("msg-2", "Sure, I can help with that.", { historySequence: 2 }), + ], + sessionUsage: createSessionUsage(0.25), + }); + }} + /> + ), + play: async ({ canvasElement }) => { + const canvas = within(canvasElement); + + // Feature flags are async, so allow more time. + const statsTab = await canvas.findByRole("tab", { name: /^stats/i }, { timeout: 3000 }); + await userEvent.click(statsTab); + + await waitFor(() => { + canvas.getByText(/no timing data yet/i); + }); + }, +}; + +/** + * Stats tab during active streaming - shows timing statistics + */ +export const StatsTabStreaming: AppStory = { + render: () => ( + { + localStorage.setItem(RIGHT_SIDEBAR_TAB_KEY, JSON.stringify("stats")); + + return setupStreamingChatStory({ + workspaceId: "ws-stats-streaming", + workspaceName: "feature/streaming", + projectName: "my-app", + statsTabEnabled: true, + messages: [ + createUserMessage("msg-1", "Write a comprehensive test suite", { historySequence: 1 }), + ], + streamingMessageId: "msg-2", + historySequence: 2, + streamText: "I'll create a test suite for you. Let me start by analyzing...", + }); + }} + /> + ), + play: async ({ canvasElement }) => { + const canvas = within(canvasElement); + + // Feature flags are async; wait for Stats tab to appear, then select it. + const statsTab = await canvas.findByRole("tab", { name: /^stats/i }, { timeout: 5000 }); + await userEvent.click(statsTab); + + await waitFor( + () => { + canvas.getByRole("tab", { name: /^stats/i, selected: true }); + }, + { timeout: 5000 } + ); + + // Verify timing header is shown (with pulsing active indicator) + await waitFor(() => { + canvas.getByText(/timing/i); + }); + + // Verify timing table components are displayed + await waitFor(() => { + canvas.getByText(/model time/i); + }); + }, +}; diff --git a/src/browser/stories/meta.tsx b/src/browser/stories/meta.tsx index 311f3bbfde..afe0333d79 100644 --- a/src/browser/stories/meta.tsx +++ b/src/browser/stories/meta.tsx @@ -38,8 +38,26 @@ interface AppWithMocksProps { } /** Wrapper that runs setup once and passes the client to AppLoader */ +function getStorybookStoryId(): string | null { + if (typeof window === "undefined") { + return null; + } + + const params = new URLSearchParams(window.location.search); + return params.get("id") ?? params.get("path"); +} + export const AppWithMocks: FC = ({ setup }) => { + const lastStoryIdRef = useRef(null); const clientRef = useRef(null); + + const storyId = getStorybookStoryId(); + if (lastStoryIdRef.current !== storyId) { + lastStoryIdRef.current = storyId; + clientRef.current = setup(); + } + clientRef.current ??= setup(); + return ; }; diff --git a/src/browser/stories/mockFactory.ts b/src/browser/stories/mockFactory.ts index 19709cd7e0..a3f2de5d22 100644 --- a/src/browser/stories/mockFactory.ts +++ b/src/browser/stories/mockFactory.ts @@ -605,6 +605,7 @@ export function createStreamingChatHandler(opts: { messageId: opts.streamingMessageId, model: opts.model, historySequence: opts.historySequence, + startTime: Date.now(), }); // Send text delta if provided diff --git a/src/browser/stories/storyHelpers.ts b/src/browser/stories/storyHelpers.ts index f5eb72e688..2c4d60e90e 100644 --- a/src/browser/stories/storyHelpers.ts +++ b/src/browser/stories/storyHelpers.ts @@ -6,7 +6,12 @@ */ import type { FrontendWorkspaceMetadata } from "@/common/types/workspace"; -import type { WorkspaceChatMessage, ChatMuxMessage, ProvidersConfigMap } from "@/common/orpc/types"; +import type { + WorkspaceChatMessage, + ChatMuxMessage, + ProvidersConfigMap, + WorkspaceStatsSnapshot, +} from "@/common/orpc/types"; import type { APIClient } from "@/browser/contexts/API"; import { SELECTED_WORKSPACE_KEY, @@ -161,6 +166,7 @@ export interface SimpleChatSetupOptions { providersConfig?: ProvidersConfigMap; backgroundProcesses?: BackgroundProcessFixture[]; /** Session usage data for Costs tab */ + statsTabEnabled?: boolean; sessionUsage?: MockSessionUsage; /** Optional custom chat handler for emitting additional events (e.g., queued-message-changed) */ onChat?: (workspaceId: string, emit: (msg: WorkspaceChatMessage) => void) => void; @@ -216,6 +222,7 @@ export function setupSimpleChatStory(opts: SimpleChatSetupOptions): APIClient { executeBash: createGitStatusExecutor(gitStatus), providersConfig: opts.providersConfig, backgroundProcesses: bgProcesses, + statsTabVariant: opts.statsTabEnabled ? "stats" : "control", sessionUsage: sessionUsageMap, }); } @@ -235,6 +242,7 @@ export interface StreamingChatSetupOptions { streamText?: string; pendingTool?: { toolCallId: string; toolName: string; args: object }; gitStatus?: GitStatusFixture; + statsTabEnabled?: boolean; } /** @@ -272,12 +280,48 @@ export function setupStreamingChatStory(opts: StreamingChatSetupOptions): APICli // Set localStorage for workspace selection selectWorkspace(workspaces[0]); + const workspaceStatsSnapshots = new Map(); + if (opts.statsTabEnabled) { + workspaceStatsSnapshots.set(workspaceId, { + workspaceId, + generatedAt: Date.now(), + active: { + messageId: opts.streamingMessageId, + model: "openai:gpt-4o", + elapsedMs: 2000, + ttftMs: 200, + toolExecutionMs: 0, + modelTimeMs: 2000, + streamingMs: 1800, + outputTokens: 100, + reasoningTokens: 0, + liveTokenCount: 100, + liveTPS: 50, + invalid: false, + anomalies: [], + }, + session: { + totalDurationMs: 0, + totalToolExecutionMs: 0, + totalStreamingMs: 0, + totalTtftMs: 0, + ttftCount: 0, + responseCount: 0, + totalOutputTokens: 0, + totalReasoningTokens: 0, + byModel: {}, + }, + }); + } + // Return ORPC client return createMockORPCClient({ projects: groupWorkspacesByProject(workspaces), workspaces, onChat: createOnChatAdapter(chatHandlers), executeBash: createGitStatusExecutor(gitStatus), + workspaceStatsSnapshots, + statsTabVariant: opts.statsTabEnabled ? "stats" : "control", }); } diff --git a/src/browser/utils/commandIds.ts b/src/browser/utils/commandIds.ts index a9183704c7..8161601ebf 100644 --- a/src/browser/utils/commandIds.ts +++ b/src/browser/utils/commandIds.ts @@ -40,6 +40,7 @@ export const CommandIds = { chatInterrupt: () => "chat:interrupt" as const, chatJumpBottom: () => "chat:jumpBottom" as const, chatVoiceInput: () => "chat:voiceInput" as const, + chatClearTimingStats: () => "chat:clearTimingStats" as const, // Mode commands modeToggle: () => "mode:toggle" as const, diff --git a/src/browser/utils/commands/sources.ts b/src/browser/utils/commands/sources.ts index 3c78aa176e..da91614224 100644 --- a/src/browser/utils/commands/sources.ts +++ b/src/browser/utils/commands/sources.ts @@ -51,6 +51,7 @@ export interface BuildSourcesParams { onToggleTheme: () => void; onSetTheme: (theme: ThemeMode) => void; onOpenSettings?: (section?: string) => void; + onClearTimingStats?: (workspaceId: string) => void; } const THINKING_LEVELS: ThinkingLevel[] = ["off", "low", "medium", "high", "xhigh"]; @@ -423,6 +424,15 @@ export function buildCoreSources(p: BuildSourcesParams): Array<() => CommandActi window.dispatchEvent(createCustomEvent(CUSTOM_EVENTS.TOGGLE_VOICE_INPUT)); }, }); + list.push({ + id: CommandIds.chatClearTimingStats(), + title: "Clear Timing Stats", + subtitle: "Reset session timing data for this workspace", + section: section.chat, + run: () => { + p.onClearTimingStats?.(id); + }, + }); } return list; }); diff --git a/src/browser/utils/messages/ChatEventProcessor.test.ts b/src/browser/utils/messages/ChatEventProcessor.test.ts index b5d5ce8498..f64aa6713c 100644 --- a/src/browser/utils/messages/ChatEventProcessor.test.ts +++ b/src/browser/utils/messages/ChatEventProcessor.test.ts @@ -13,6 +13,7 @@ describe("ChatEventProcessor - Reasoning Delta", () => { messageId, model: "gpt-4", historySequence: 1, + startTime: Date.now(), }); // Send reasoning deltas @@ -69,6 +70,7 @@ describe("ChatEventProcessor - Reasoning Delta", () => { messageId, model: "gpt-4", historySequence: 1, + startTime: Date.now(), }); // Reasoning 1 diff --git a/src/browser/utils/messages/StreamingMessageAggregator.status.test.ts b/src/browser/utils/messages/StreamingMessageAggregator.status.test.ts index df54f35bfd..f99d1f3b7d 100644 --- a/src/browser/utils/messages/StreamingMessageAggregator.status.test.ts +++ b/src/browser/utils/messages/StreamingMessageAggregator.status.test.ts @@ -113,6 +113,7 @@ describe("StreamingMessageAggregator - Agent Status", () => { messageId, model: "test-model", historySequence: 1, + startTime: Date.now(), }); // Add a status_set tool call @@ -155,6 +156,7 @@ describe("StreamingMessageAggregator - Agent Status", () => { messageId, model: "test-model", historySequence: 1, + startTime: Date.now(), }); // First status_set @@ -218,6 +220,7 @@ describe("StreamingMessageAggregator - Agent Status", () => { messageId, model: "test-model", historySequence: 1, + startTime: Date.now(), }); // Set status @@ -269,6 +272,7 @@ describe("StreamingMessageAggregator - Agent Status", () => { messageId, model: "test-model", historySequence: 1, + startTime: Date.now(), }); // Add a status_set tool call @@ -308,6 +312,7 @@ describe("StreamingMessageAggregator - Agent Status", () => { messageId: "msg1", model: "test-model", historySequence: 1, + startTime: Date.now(), }); aggregator.handleToolCallStart({ @@ -370,6 +375,7 @@ describe("StreamingMessageAggregator - Agent Status", () => { messageId, model: "test-model", historySequence: 1, + startTime: Date.now(), }); // Add a status_set tool call with invalid emoji @@ -429,6 +435,7 @@ describe("StreamingMessageAggregator - Agent Status", () => { messageId, model: "test-model", historySequence: 1, + startTime: Date.now(), }); // Add a successful status_set tool call @@ -672,6 +679,7 @@ describe("StreamingMessageAggregator - Agent Status", () => { messageId, model: "test-model", historySequence: 1, + startTime: Date.now(), }); // Status_set with long message (would be truncated by backend) @@ -717,6 +725,7 @@ describe("StreamingMessageAggregator - Agent Status", () => { messageId, model: "test-model", historySequence: 1, + startTime: Date.now(), }); // Add a status_set tool call with URL @@ -761,6 +770,7 @@ describe("StreamingMessageAggregator - Agent Status", () => { messageId, model: "test-model", historySequence: 1, + startTime: Date.now(), }); // First status with URL @@ -855,6 +865,7 @@ describe("StreamingMessageAggregator - Agent Status", () => { messageId: messageId1, model: "test-model", historySequence: 1, + startTime: Date.now(), }); // Set status with URL in first stream @@ -902,6 +913,7 @@ describe("StreamingMessageAggregator - Agent Status", () => { messageId: messageId2, model: "test-model", historySequence: 2, + startTime: Date.now(), }); // Set new status WITHOUT URL - should use the last URL ever seen diff --git a/src/browser/utils/messages/StreamingMessageAggregator.test.ts b/src/browser/utils/messages/StreamingMessageAggregator.test.ts index 6f4e6e090a..65a2a2295e 100644 --- a/src/browser/utils/messages/StreamingMessageAggregator.test.ts +++ b/src/browser/utils/messages/StreamingMessageAggregator.test.ts @@ -149,6 +149,7 @@ describe("StreamingMessageAggregator", () => { messageId: "msg1", historySequence: 1, model: "claude-3-5-sonnet-20241022", + startTime: Date.now(), }); // Simulate todo_write tool call @@ -208,6 +209,7 @@ describe("StreamingMessageAggregator", () => { messageId: "msg1", historySequence: 1, model: "claude-3-5-sonnet-20241022", + startTime: Date.now(), }); // Simulate todo_write @@ -343,6 +345,7 @@ describe("StreamingMessageAggregator", () => { messageId: "msg1", historySequence: 1, model: "claude-3-5-sonnet-20241022", + startTime: Date.now(), }); aggregator.handleToolCallStart({ diff --git a/src/browser/utils/messages/StreamingMessageAggregator.ts b/src/browser/utils/messages/StreamingMessageAggregator.ts index 7db0d835fb..c2de4deb82 100644 --- a/src/browser/utils/messages/StreamingMessageAggregator.ts +++ b/src/browser/utils/messages/StreamingMessageAggregator.ts @@ -32,6 +32,7 @@ import { isDynamicToolPart } from "@/common/types/toolParts"; import { z } from "zod"; import { createDeltaStorage, type DeltaRecordStorage } from "./StreamingTPSCalculator"; import { computeRecencyTimestamp } from "./recency"; +import { assert } from "@/common/utils/assert"; import { getStatusStateKey } from "@/common/constants/storage"; // Maximum number of messages to display in the DOM for performance @@ -46,10 +47,30 @@ type AgentStatus = z.infer; const MAX_DISPLAYED_MESSAGES = 128; interface StreamingContext { - startTime: number; + /** Backend timestamp when stream started (Date.now()) */ + serverStartTime: number; + /** + * Offset to translate backend timestamps into the renderer clock. + * Computed as: `Date.now() - lastServerTimestamp`. + */ + clockOffsetMs: number; + /** Most recent backend timestamp observed for this stream */ + lastServerTimestamp: number; + isComplete: boolean; isCompacting: boolean; model: string; + + /** Timestamp of first content token (text or reasoning delta) - backend Date.now() */ + serverFirstTokenTime: number | null; + + /** Accumulated tool execution time in ms */ + toolExecutionMs: number; + /** Map of tool call start times for in-progress tool calls (backend timestamps) */ + pendingToolStarts: Map; + + /** Mode (plan/exec) */ + mode?: "plan" | "exec"; } /** @@ -189,6 +210,35 @@ export class StreamingMessageAggregator { // (or the user retries) so retry UI/backoff logic doesn't misfire on send failures. private pendingStreamStartTime: number | null = null; + // Last completed stream timing stats (preserved after stream ends for display) + // Unlike activeStreams, this persists until the next stream starts + private lastCompletedStreamStats: { + startTime: number; + endTime: number; + firstTokenTime: number | null; + toolExecutionMs: number; + model: string; + outputTokens: number; + reasoningTokens: number; + streamingMs: number; // Time from first token to end (for accurate tok/s) + mode?: "plan" | "exec"; // Mode in which this response occurred + } | null = null; + + // Session-level timing stats: model -> stats (totals computed on-the-fly) + private sessionTimingStats: Record< + string, + { + totalDurationMs: number; + totalToolExecutionMs: number; + totalTtftMs: number; + ttftCount: number; + responseCount: number; + totalOutputTokens: number; + totalReasoningTokens: number; + totalStreamingMs: number; // Cumulative streaming time (for accurate tok/s) + } + > = {}; + // Workspace creation timestamp (used for recency calculation) // REQUIRED: Backend guarantees every workspace has createdAt via config.ts private readonly createdAt: string; @@ -196,7 +246,7 @@ export class StreamingMessageAggregator { constructor(createdAt: string, workspaceId?: string) { this.createdAt = createdAt; this.workspaceId = workspaceId; - // Load persisted agent status from localStorage + // Load persisted state from localStorage if (workspaceId) { const persistedStatus = this.loadPersistedAgentStatus(); if (persistedStatus) { @@ -242,6 +292,38 @@ export class StreamingMessageAggregator { // Ignore localStorage errors } } + + /** Clear all session timing stats (in-memory only). */ + clearSessionTimingStats(): void { + this.sessionTimingStats = {}; + this.lastCompletedStreamStats = null; + } + + private updateStreamClock(context: StreamingContext, serverTimestamp: number): void { + assert(context, "updateStreamClock requires context"); + assert(typeof serverTimestamp === "number", "updateStreamClock requires serverTimestamp"); + + // Only update if this timestamp is >= the most recent one we've seen. + // During stream replay, older historical parts may be re-emitted out of order. + // + // NOTE: This is a display-oriented clock translation (not true synchronization). + // We refresh the offset whenever we see a newer backend timestamp. If the renderer clock + // drifts significantly during a very long stream, the translated times may be off by a + // small amount, which is acceptable for UI stats. + if (serverTimestamp < context.lastServerTimestamp) { + return; + } + + context.lastServerTimestamp = serverTimestamp; + context.clockOffsetMs = Date.now() - serverTimestamp; + } + + private translateServerTime(context: StreamingContext, serverTimestamp: number): number { + assert(context, "translateServerTime requires context"); + assert(typeof serverTimestamp === "number", "translateServerTime requires serverTimestamp"); + + return serverTimestamp + context.clockOffsetMs; + } private invalidateCache(): void { this.cachedAllMessages = null; this.cachedDisplayedMessages = null; @@ -339,8 +421,97 @@ export class StreamingMessageAggregator { * - Active stream tracking (this.activeStreams) * - Current TODOs (this.currentTodos) - reconstructed from history on reload * - Transient agentStatus (from displayStatus) - restored to persisted value + * + * Preserves: + * - lastCompletedStreamStats - timing stats from this stream for display after completion */ private cleanupStreamState(messageId: string): void { + // Capture timing stats before removing the stream context + const context = this.activeStreams.get(messageId); + if (context) { + const endTime = Date.now(); + const message = this.messages.get(messageId); + + // Prefer backend-provided duration (computed in the same clock domain as tool/delta timestamps). + // Fall back to renderer-based timing translated into the renderer clock. + const durationMsFromMetadata = message?.metadata?.duration; + const fallbackStartTime = this.translateServerTime(context, context.serverStartTime); + const fallbackDurationMs = Math.max(0, endTime - fallbackStartTime); + const durationMs = + typeof durationMsFromMetadata === "number" && Number.isFinite(durationMsFromMetadata) + ? durationMsFromMetadata + : fallbackDurationMs; + + const ttftMs = + context.serverFirstTokenTime !== null + ? Math.max(0, context.serverFirstTokenTime - context.serverStartTime) + : null; + + // Get output tokens from cumulative usage (if available) + const cumulativeUsage = this.activeStreamUsage.get(messageId)?.cumulative.usage; + const outputTokens = cumulativeUsage?.outputTokens ?? 0; + const reasoningTokens = cumulativeUsage?.reasoningTokens ?? 0; + + // Account for in-progress tool calls (can happen on abort/error) + let totalToolExecutionMs = context.toolExecutionMs; + if (context.pendingToolStarts.size > 0) { + const serverEndTime = context.serverStartTime + durationMs; + for (const toolStartTime of context.pendingToolStarts.values()) { + const toolMs = serverEndTime - toolStartTime; + if (toolMs > 0) { + totalToolExecutionMs += toolMs; + } + } + } + + // Streaming duration excludes TTFT and tool execution - used for avg tok/s + const streamingMs = Math.max(0, durationMs - (ttftMs ?? 0) - totalToolExecutionMs); + + const mode = (message?.metadata?.mode ?? context.mode) as "plan" | "exec" | undefined; + + // Store last completed stream stats (include durations anchored in the renderer clock) + const startTime = endTime - durationMs; + const firstTokenTime = ttftMs !== null ? startTime + ttftMs : null; + this.lastCompletedStreamStats = { + startTime, + endTime, + firstTokenTime, + toolExecutionMs: totalToolExecutionMs, + model: context.model, + outputTokens, + reasoningTokens, + streamingMs, + mode, + }; + + // Use composite key model:mode for per-model+mode stats + // Old data (no mode) will just use model as key, maintaining backward compat + const statsKey = mode ? `${context.model}:${mode}` : context.model; + + // Accumulate into per-model stats (totals computed on-the-fly in getSessionTimingStats) + const modelStats = this.sessionTimingStats[statsKey] ?? { + totalDurationMs: 0, + totalToolExecutionMs: 0, + totalTtftMs: 0, + ttftCount: 0, + responseCount: 0, + totalOutputTokens: 0, + totalReasoningTokens: 0, + totalStreamingMs: 0, + }; + modelStats.totalDurationMs += durationMs; + modelStats.totalToolExecutionMs += totalToolExecutionMs; + modelStats.responseCount += 1; + modelStats.totalOutputTokens += outputTokens; + modelStats.totalReasoningTokens += reasoningTokens; + modelStats.totalStreamingMs += streamingMs; + if (ttftMs !== null) { + modelStats.totalTtftMs += ttftMs; + modelStats.ttftCount += 1; + } + this.sessionTimingStats[statsKey] = modelStats; + } + this.activeStreams.delete(messageId); // Clear todos when stream ends - they're stream-scoped state // On reload, todos will be reconstructed from completed tool_write calls in history @@ -461,6 +632,179 @@ export class StreamingMessageAggregator { this.pendingStreamStartTime = time; } + /** + * Get timing statistics for the active stream (if any). + * Returns null if no active stream exists. + * Includes live token count and TPS for real-time display. + */ + getActiveStreamTimingStats(): { + startTime: number; + firstTokenTime: number | null; + toolExecutionMs: number; + model: string; + /** Live token count from streaming deltas */ + liveTokenCount: number; + /** Live tokens-per-second (trailing window) */ + liveTPS: number; + /** Mode (plan/exec) for this stream */ + mode?: "plan" | "exec"; + } | null { + // Get the first (and typically only) active stream + const entries = Array.from(this.activeStreams.entries()); + if (entries.length === 0) return null; + const [messageId, context] = entries[0]; + + const now = Date.now(); + + const startTime = this.translateServerTime(context, context.serverStartTime); + const firstTokenTime = + context.serverFirstTokenTime !== null + ? this.translateServerTime(context, context.serverFirstTokenTime) + : null; + + // Include time from currently-executing tools (not just completed ones) + let totalToolMs = context.toolExecutionMs; + for (const toolStartServerTime of context.pendingToolStarts.values()) { + const toolStartTime = this.translateServerTime(context, toolStartServerTime); + totalToolMs += Math.max(0, now - toolStartTime); + } + + return { + startTime, + firstTokenTime, + toolExecutionMs: totalToolMs, + model: context.model, + liveTokenCount: this.getStreamingTokenCount(messageId), + liveTPS: this.getStreamingTPS(messageId), + mode: context.mode, + }; + } + + /** + * Get timing statistics from the last completed stream. + * Returns null if no stream has completed yet in this session. + * Unlike getActiveStreamTimingStats, this includes endTime and token counts. + */ + getLastCompletedStreamStats(): { + startTime: number; + endTime: number; + firstTokenTime: number | null; + toolExecutionMs: number; + model: string; + outputTokens: number; + reasoningTokens: number; + streamingMs: number; + mode?: "plan" | "exec"; + } | null { + return this.lastCompletedStreamStats; + } + + /** + * Get aggregate timing statistics across all completed streams in this session. + * Totals are computed on-the-fly from per-model data. + * Returns null if no streams have completed yet. + * + * Session timing keys use format "model" or "model:mode" (e.g., "claude-opus-4:plan"). + * The byModelAndMode map preserves this structure for mode breakdown display. + */ + getSessionTimingStats(): { + totalDurationMs: number; + totalToolExecutionMs: number; + totalStreamingMs: number; + averageTtftMs: number | null; + responseCount: number; + totalOutputTokens: number; + totalReasoningTokens: number; + /** Per-model timing breakdown (keys are composite: "model" or "model:mode") */ + byModel: Record< + string, + { + totalDurationMs: number; + totalToolExecutionMs: number; + totalStreamingMs: number; + averageTtftMs: number | null; + responseCount: number; + totalOutputTokens: number; + totalReasoningTokens: number; + /** Mode extracted from composite key, undefined for old data */ + mode?: "plan" | "exec"; + } + >; + } | null { + const modelEntries = Object.entries(this.sessionTimingStats); + if (modelEntries.length === 0) return null; + + // Aggregate totals from per-model stats + let totalDurationMs = 0; + let totalToolExecutionMs = 0; + let totalStreamingMs = 0; + let totalTtftMs = 0; + let ttftCount = 0; + let responseCount = 0; + let totalOutputTokens = 0; + let totalReasoningTokens = 0; + + const byModel: Record< + string, + { + totalDurationMs: number; + totalToolExecutionMs: number; + totalStreamingMs: number; + averageTtftMs: number | null; + responseCount: number; + totalOutputTokens: number; + totalReasoningTokens: number; + mode?: "plan" | "exec"; + } + > = {}; + + for (const [key, stats] of modelEntries) { + // Parse composite key: "model" or "model:mode" + // Model names can contain colons (e.g., "mux-gateway:provider/model") + // so we look for ":plan" or ":exec" suffix specifically + let mode: "plan" | "exec" | undefined; + if (key.endsWith(":plan")) { + mode = "plan"; + } else if (key.endsWith(":exec")) { + mode = "exec"; + } + + // Accumulate totals + totalDurationMs += stats.totalDurationMs; + totalToolExecutionMs += stats.totalToolExecutionMs; + totalStreamingMs += stats.totalStreamingMs ?? 0; + totalTtftMs += stats.totalTtftMs; + ttftCount += stats.ttftCount; + responseCount += stats.responseCount; + totalOutputTokens += stats.totalOutputTokens; + totalReasoningTokens += stats.totalReasoningTokens; + + // Convert to display format (with computed average) + // Keep composite key as-is - StatsTab will parse/aggregate as needed + byModel[key] = { + totalDurationMs: stats.totalDurationMs, + totalToolExecutionMs: stats.totalToolExecutionMs, + totalStreamingMs: stats.totalStreamingMs ?? 0, + averageTtftMs: stats.ttftCount > 0 ? stats.totalTtftMs / stats.ttftCount : null, + responseCount: stats.responseCount, + totalOutputTokens: stats.totalOutputTokens, + totalReasoningTokens: stats.totalReasoningTokens, + mode, + }; + } + + return { + totalDurationMs, + totalToolExecutionMs, + totalStreamingMs, + averageTtftMs: ttftCount > 0 ? totalTtftMs / ttftCount : null, + responseCount, + totalOutputTokens, + totalReasoningTokens, + byModel, + }; + } + getActiveStreams(): StreamingContext[] { return Array.from(this.activeStreams.values()); } @@ -542,11 +886,18 @@ export class StreamingMessageAggregator { const lastUserMsg = [...messages].reverse().find((m) => m.role === "user"); const isCompacting = lastUserMsg?.metadata?.muxMetadata?.type === "compaction-request"; + const now = Date.now(); const context: StreamingContext = { - startTime: Date.now(), + serverStartTime: data.startTime, + clockOffsetMs: now - data.startTime, + lastServerTimestamp: data.startTime, isComplete: false, isCompacting, model: data.model, + serverFirstTokenTime: null, + toolExecutionMs: 0, + pendingToolStarts: new Map(), + mode: data.mode, }; // Use messageId as key - ensures only ONE stream per message @@ -568,6 +919,16 @@ export class StreamingMessageAggregator { const message = this.messages.get(data.messageId); if (!message) return; + const context = this.activeStreams.get(data.messageId); + if (context) { + this.updateStreamClock(context, data.timestamp); + + // Track first token time (only for non-empty deltas) + if (data.delta.length > 0 && context.serverFirstTokenTime === null) { + context.serverFirstTokenTime = data.timestamp; + } + } + // Append each delta as a new part (merging happens at display time) message.parts.push({ type: "text", @@ -593,8 +954,12 @@ export class StreamingMessageAggregator { const updatedMetadata: MuxMetadata = { ...message.metadata, ...data.metadata, - duration: Date.now() - activeStream.startTime, }; + + const durationMs = data.metadata.duration; + if (typeof durationMs === "number" && Number.isFinite(durationMs)) { + this.updateStreamClock(activeStream, activeStream.serverStartTime + durationMs); + } message.metadata = updatedMetadata; // Update tool parts with their results if provided @@ -731,6 +1096,13 @@ export class StreamingMessageAggregator { return; } + // Track tool start time for execution duration calculation + const context = this.activeStreams.get(data.messageId); + if (context) { + this.updateStreamClock(context, data.timestamp); + context.pendingToolStarts.set(data.toolCallId, data.timestamp); + } + // Add tool part to maintain temporal order const toolPart: DynamicToolPartPending = { type: "dynamic-tool", @@ -804,6 +1176,18 @@ export class StreamingMessageAggregator { } handleToolCallEnd(data: ToolCallEndEvent): void { + // Track tool execution duration + const context = this.activeStreams.get(data.messageId); + if (context) { + this.updateStreamClock(context, data.timestamp); + + const startTime = context.pendingToolStarts.get(data.toolCallId); + if (startTime !== undefined) { + context.toolExecutionMs += data.timestamp - startTime; + context.pendingToolStarts.delete(data.toolCallId); + } + } + const message = this.messages.get(data.messageId); if (message) { // Find the specific tool part by its ID and update it with the result @@ -829,6 +1213,16 @@ export class StreamingMessageAggregator { const message = this.messages.get(data.messageId); if (!message) return; + const context = this.activeStreams.get(data.messageId); + if (context) { + this.updateStreamClock(context, data.timestamp); + + // Track first token time (reasoning also counts as first token) + if (data.delta.length > 0 && context.serverFirstTokenTime === null) { + context.serverFirstTokenTime = data.timestamp; + } + } + // Append each delta as a new part (merging happens at display time) message.parts.push({ type: "reasoning", diff --git a/src/browser/utils/messages/StreamingTPSCalculator.ts b/src/browser/utils/messages/StreamingTPSCalculator.ts index 1e0136ffb0..88387fa34b 100644 --- a/src/browser/utils/messages/StreamingTPSCalculator.ts +++ b/src/browser/utils/messages/StreamingTPSCalculator.ts @@ -5,90 +5,57 @@ * Designed to be independently testable from the main aggregator. */ -export interface DeltaRecord { - tokens: number; - timestamp: number; - type: "text" | "reasoning" | "tool-args"; -} +import { + calculateTPS, + calculateTokenCount, + createDeltaStorage, + type DeltaRecord, + type DeltaRecordStorage, +} from "@/common/utils/tokens/tps"; -const TPS_WINDOW_MS = 60000; // 60 second trailing window +export { calculateTPS, calculateTokenCount, createDeltaStorage }; +export type { DeltaRecord, DeltaRecordStorage }; /** - * Calculate tokens-per-second from a history of delta records - * Uses a 60-second trailing window + * Maximum reasonable TPS for sanity checking. + * No current model exceeds ~200 tok/s sustained; 500 provides margin. + * Values above this indicate corrupted data (e.g., from timestamp bugs). */ -export function calculateTPS(deltas: DeltaRecord[], now: number = Date.now()): number { - if (deltas.length === 0) return 0; - - // Filter to deltas within the trailing window - const windowStart = now - TPS_WINDOW_MS; - const recentDeltas = deltas.filter((d) => d.timestamp >= windowStart); - - if (recentDeltas.length === 0) return 0; - - // Calculate total tokens in window - const totalTokens = recentDeltas.reduce((sum, d) => sum + (d.tokens || 0), 0); - - // Calculate time span from first delta in window to now - const timeSpanMs = now - recentDeltas[0].timestamp; - const timeSpanSec = timeSpanMs / 1000; - - // Avoid division by zero - if (timeSpanSec <= 0) return 0; - - return Math.round(totalTokens / timeSpanSec); -} +const MAX_REASONABLE_TPS = 500; /** - * Calculate cumulative token count from delta records + * Calculate average tokens-per-second from aggregate timing data. + * Used for session/historical stats (not live streaming). + * + * @param streamingMs - Time spent streaming tokens (excludes TTFT and tool execution) + * @param modelTimeMs - Total model time (fallback when streamingMs unavailable/corrupted) + * @param totalTokens - Total output tokens + * @param liveTPS - Live TPS from trailing window (preferred for active streams) + * @returns TPS value or null if insufficient data */ -export function calculateTokenCount(deltas: DeltaRecord[]): number { - if ((deltas?.length ?? 0) === 0) return 0; - return deltas.reduce((sum, d) => sum + (d.tokens || 0), 0); -} - -export interface DeltaRecordStorage { - addDelta(record: DeltaRecord): void; - getTokenCount(): number; - calculateTPS(now?: number): number; - getRecentDeltas(): DeltaRecord[]; -} - -export function createDeltaStorage(windowMs: number = TPS_WINDOW_MS): DeltaRecordStorage { - let recentDeltas: DeltaRecord[] = []; - let olderTokenCount = 0; - - const prune = (now: number): void => { - if (recentDeltas.length === 0) return; - const threshold = now - windowMs; - let pruneCount = 0; - for (const delta of recentDeltas) { - if (delta.timestamp < threshold) { - olderTokenCount += delta.tokens || 0; - pruneCount += 1; - } else { - break; - } +export function calculateAverageTPS( + streamingMs: number, + modelTimeMs: number, + totalTokens: number, + liveTPS: number | null +): number | null { + // Use live TPS if available (active stream) - real-time trailing window + if (liveTPS !== null) return liveTPS; + + // Calculate from streaming time (most accurate for completed streams) + if (streamingMs > 0 && totalTokens > 0) { + const tps = totalTokens / (streamingMs / 1000); + // Sanity check: reject unreasonable values (corrupted persisted data) + if (tps <= MAX_REASONABLE_TPS) { + return tps; } - if (pruneCount > 0) { - recentDeltas = recentDeltas.slice(pruneCount); - } - }; + // Fall through to modelTime calculation + } + + // Fallback: use modelTime for old data without streamingMs or corrupted data + if (modelTimeMs > 0 && totalTokens > 0) { + return totalTokens / (modelTimeMs / 1000); + } - return { - addDelta(record: DeltaRecord) { - recentDeltas.push(record); - prune(record.timestamp); - }, - getTokenCount() { - return olderTokenCount + calculateTokenCount(recentDeltas); - }, - calculateTPS(now: number = Date.now()) { - prune(now); - return calculateTPS(recentDeltas, now); - }, - getRecentDeltas() { - return recentDeltas; - }, - }; + return null; } diff --git a/src/browser/utils/ui/keybinds.ts b/src/browser/utils/ui/keybinds.ts index de1b3f7bba..47ff5c8159 100644 --- a/src/browser/utils/ui/keybinds.ts +++ b/src/browser/utils/ui/keybinds.ts @@ -269,8 +269,14 @@ export const KEYBINDS = { /** Switch to Review tab in right sidebar */ // macOS: Cmd+2, Win/Linux: Ctrl+2 + // NOTE: Both Ctrl and Cmd work for switching tabs on Mac (macOS has no standard Cmd+number behavior) + // This differs from other keybinds where we distinguish Ctrl (literal) from Cmd (meta) REVIEW_TAB: { key: "2", ctrl: true, description: "Review tab" }, + /** Switch to Stats tab in right sidebar */ + // macOS: Cmd+3, Win/Linux: Ctrl+3 + STATS_TAB: { key: "3", ctrl: true, description: "Stats tab" }, + /** Refresh diff in Code Review panel */ // macOS: Cmd+R, Win/Linux: Ctrl+R REFRESH_REVIEW: { key: "r", ctrl: true }, diff --git a/src/cli/cli.test.ts b/src/cli/cli.test.ts index fd4ed9740c..5eac460d0a 100644 --- a/src/cli/cli.test.ts +++ b/src/cli/cli.test.ts @@ -69,6 +69,8 @@ async function createTestServer(authToken?: string): Promise { tokenizerService: services.tokenizerService, serverService: services.serverService, mcpConfigService: services.mcpConfigService, + featureFlagService: services.featureFlagService, + sessionTimingService: services.sessionTimingService, mcpServerManager: services.mcpServerManager, experimentsService: services.experimentsService, menuEventService: services.menuEventService, diff --git a/src/cli/server.test.ts b/src/cli/server.test.ts index 8b4b0ba6d7..2b0fa4d36e 100644 --- a/src/cli/server.test.ts +++ b/src/cli/server.test.ts @@ -72,6 +72,8 @@ async function createTestServer(): Promise { tokenizerService: services.tokenizerService, serverService: services.serverService, mcpConfigService: services.mcpConfigService, + featureFlagService: services.featureFlagService, + sessionTimingService: services.sessionTimingService, mcpServerManager: services.mcpServerManager, menuEventService: services.menuEventService, experimentsService: services.experimentsService, diff --git a/src/cli/server.ts b/src/cli/server.ts index c5f8acc03a..f8f11051da 100644 --- a/src/cli/server.ts +++ b/src/cli/server.ts @@ -89,6 +89,8 @@ const mockWindow: BrowserWindow = { serverService: serviceContainer.serverService, menuEventService: serviceContainer.menuEventService, mcpConfigService: serviceContainer.mcpConfigService, + featureFlagService: serviceContainer.featureFlagService, + sessionTimingService: serviceContainer.sessionTimingService, mcpServerManager: serviceContainer.mcpServerManager, voiceService: serviceContainer.voiceService, telemetryService: serviceContainer.telemetryService, diff --git a/src/common/constants/featureFlags.ts b/src/common/constants/featureFlags.ts new file mode 100644 index 0000000000..df7c4fedc7 --- /dev/null +++ b/src/common/constants/featureFlags.ts @@ -0,0 +1,5 @@ +export const FEATURE_FLAG_KEYS = { + statsTabV1: "stats_tab_v1", +} as const; + +export type FeatureFlagKey = (typeof FEATURE_FLAG_KEYS)[keyof typeof FEATURE_FLAG_KEYS]; diff --git a/src/common/constants/storage.ts b/src/common/constants/storage.ts index c5103b1f42..c4de82c6ce 100644 --- a/src/common/constants/storage.ts +++ b/src/common/constants/storage.ts @@ -175,6 +175,7 @@ export const VIM_ENABLED_KEY = "vimEnabled"; * Git status indicator display mode (global) * Stores: "line-delta" | "divergence" */ + export const GIT_STATUS_INDICATOR_MODE_KEY = "gitStatusIndicatorMode"; /** @@ -248,6 +249,15 @@ export function getStatusStateKey(workspaceId: string): string { return `statusState:${workspaceId}`; } +/** + * Get the localStorage key for session timing stats for a workspace + * Stores aggregate timing data: totalDurationMs, totalToolExecutionMs, totalTtftMs, ttftCount, responseCount + * Format: "sessionTiming:{workspaceId}" + */ +export function getSessionTimingKey(workspaceId: string): string { + return `sessionTiming:${workspaceId}`; +} + /** * Right sidebar tab selection (global) * Format: "right-sidebar-tab" diff --git a/src/common/orpc/schemas.ts b/src/common/orpc/schemas.ts index ff02984998..6ddc254855 100644 --- a/src/common/orpc/schemas.ts +++ b/src/common/orpc/schemas.ts @@ -19,6 +19,17 @@ export { WorkspaceMetadataSchema, } from "./schemas/workspace"; +// Workspace stats schemas +export { + ActiveStreamStatsSchema, + CompletedStreamStatsSchema, + ModelTimingStatsSchema, + SessionTimingFileSchema, + SessionTimingStatsSchema, + TimingAnomalySchema, + WorkspaceStatsSnapshotSchema, +} from "./schemas/workspaceStats"; + // Chat stats schemas export { ChatStatsSchema, @@ -103,6 +114,7 @@ export { export { AWSCredentialStatusSchema, debug, + features, general, menu, nameGeneration, diff --git a/src/common/orpc/schemas/api.ts b/src/common/orpc/schemas/api.ts index a6e0721712..6e25b954ef 100644 --- a/src/common/orpc/schemas/api.ts +++ b/src/common/orpc/schemas/api.ts @@ -14,6 +14,7 @@ import { TerminalSessionSchema, } from "./terminal"; import { BashToolResultSchema, FileTreeNodeSchema } from "./tools"; +import { WorkspaceStatsSnapshotSchema } from "./workspaceStats"; import { FrontendWorkspaceMetadataSchema, WorkspaceActivitySnapshotSchema } from "./workspace"; import { WorkspaceAISettingsSchema } from "./workspaceAiSettings"; import { @@ -447,6 +448,16 @@ export const workspace = { }), output: ResultSchema(z.void(), z.string()), }, + stats: { + subscribe: { + input: z.object({ workspaceId: z.string() }), + output: eventIterator(WorkspaceStatsSnapshotSchema), + }, + clear: { + input: z.object({ workspaceId: z.string() }), + output: ResultSchema(z.void(), z.string()), + }, + }, getSessionUsage: { input: z.object({ workspaceId: z.string() }), output: SessionUsageFileSchema.optional(), @@ -600,6 +611,26 @@ const EditorConfigSchema = z.object({ customCommand: z.string().optional(), }); +const StatsTabVariantSchema = z.enum(["control", "stats"]); +const StatsTabOverrideSchema = z.enum(["default", "on", "off"]); +const StatsTabStateSchema = z.object({ + enabled: z.boolean(), + variant: StatsTabVariantSchema, + override: StatsTabOverrideSchema, +}); + +// Feature gates (PostHog-backed) +export const features = { + getStatsTabState: { + input: z.void(), + output: StatsTabStateSchema, + }, + setStatsTabOverride: { + input: z.object({ override: StatsTabOverrideSchema }), + output: StatsTabStateSchema, + }, +}; + // General export const general = { listDirectory: { diff --git a/src/common/orpc/schemas/stream.ts b/src/common/orpc/schemas/stream.ts index 6c928c1b9c..88de7617fd 100644 --- a/src/common/orpc/schemas/stream.ts +++ b/src/common/orpc/schemas/stream.ts @@ -40,6 +40,12 @@ export const StreamStartEventSchema = z.object({ historySequence: z.number().meta({ description: "Backend assigns global message ordering", }), + startTime: z.number().meta({ + description: "Backend timestamp when stream started (Date.now())", + }), + mode: z.enum(["plan", "exec"]).optional().meta({ + description: "Agent mode (plan/exec) for this stream", + }), }); export const StreamDeltaEventSchema = z.object({ diff --git a/src/common/orpc/schemas/telemetry.ts b/src/common/orpc/schemas/telemetry.ts index 1522ec0e0e..903d03b5ea 100644 --- a/src/common/orpc/schemas/telemetry.ts +++ b/src/common/orpc/schemas/telemetry.ts @@ -127,6 +127,26 @@ const TelemetryMCPServerConfigActionSchema = z.enum([ "set_headers", ]); +const StatsTabOpenedPropertiesSchema = z.object({ + viewMode: z.enum(["session", "last-request"]), + showModeBreakdown: z.boolean(), +}); + +const StreamTimingComputedPropertiesSchema = z.object({ + model: z.string(), + mode: z.string(), + duration_b2: z.number(), + ttft_ms_b2: z.number(), + tool_ms_b2: z.number(), + streaming_ms_b2: z.number(), + tool_percent_bucket: z.number(), + invalid: z.boolean(), +}); + +const StreamTimingInvalidPropertiesSchema = z.object({ + reason: z.string(), +}); + const MCPServerConfigChangedPropertiesSchema = z.object({ action: TelemetryMCPServerConfigActionSchema, transport: TelemetryMCPServerTransportSchema, @@ -197,6 +217,18 @@ export const TelemetryEventSchema = z.discriminatedUnion("event", [ event: z.literal("mcp_server_tested"), properties: MCPServerTestedPropertiesSchema, }), + z.object({ + event: z.literal("stats_tab_opened"), + properties: StatsTabOpenedPropertiesSchema, + }), + z.object({ + event: z.literal("stream_timing_computed"), + properties: StreamTimingComputedPropertiesSchema, + }), + z.object({ + event: z.literal("stream_timing_invalid"), + properties: StreamTimingInvalidPropertiesSchema, + }), z.object({ event: z.literal("mcp_server_config_changed"), properties: MCPServerConfigChangedPropertiesSchema, diff --git a/src/common/orpc/schemas/workspaceStats.ts b/src/common/orpc/schemas/workspaceStats.ts new file mode 100644 index 0000000000..56039da63f --- /dev/null +++ b/src/common/orpc/schemas/workspaceStats.ts @@ -0,0 +1,108 @@ +import { z } from "zod"; + +const ModeSchema = z.enum(["plan", "exec"]); + +export const TimingAnomalySchema = z.enum([ + "negative_duration", + "tool_gt_total", + "ttft_gt_total", + "percent_out_of_range", + "nan", +]); + +export const ActiveStreamStatsSchema = z.object({ + messageId: z.string(), + model: z.string(), + mode: ModeSchema.optional(), + + elapsedMs: z.number(), + ttftMs: z.number().nullable(), + toolExecutionMs: z.number(), + modelTimeMs: z.number(), + streamingMs: z.number(), + + outputTokens: z.number(), + reasoningTokens: z.number(), + + /** Total tokens streamed so far (text + reasoning + tool args). */ + liveTokenCount: z.number(), + /** Tokens/sec, trailing window. */ + liveTPS: z.number(), + + invalid: z.boolean(), + anomalies: z.array(TimingAnomalySchema), +}); + +export const CompletedStreamStatsSchema = z.object({ + messageId: z.string(), + model: z.string(), + mode: ModeSchema.optional(), + + totalDurationMs: z.number(), + ttftMs: z.number().nullable(), + toolExecutionMs: z.number(), + modelTimeMs: z.number(), + streamingMs: z.number(), + + outputTokens: z.number(), + reasoningTokens: z.number(), + + invalid: z.boolean(), + anomalies: z.array(TimingAnomalySchema), +}); + +export const ModelTimingStatsSchema = z.object({ + model: z.string(), + mode: ModeSchema.optional(), + + totalDurationMs: z.number(), + totalToolExecutionMs: z.number(), + totalStreamingMs: z.number(), + + totalTtftMs: z.number(), + ttftCount: z.number(), + responseCount: z.number(), + + totalOutputTokens: z.number(), + totalReasoningTokens: z.number(), +}); + +export const SessionTimingStatsSchema = z.object({ + totalDurationMs: z.number(), + totalToolExecutionMs: z.number(), + totalStreamingMs: z.number(), + + totalTtftMs: z.number(), + ttftCount: z.number(), + responseCount: z.number(), + + totalOutputTokens: z.number(), + totalReasoningTokens: z.number(), + + /** Per-model breakdown (key is stable identifier like normalizeGatewayModel(model) or model:mode). */ + byModel: z.record(z.string(), ModelTimingStatsSchema), +}); + +export const WorkspaceStatsSnapshotSchema = z.object({ + workspaceId: z.string(), + generatedAt: z.number(), + + active: ActiveStreamStatsSchema.optional(), + lastRequest: CompletedStreamStatsSchema.optional(), + session: SessionTimingStatsSchema.optional(), +}); + +export const SessionTimingFileSchema = z.object({ + version: z.literal(1), + lastRequest: CompletedStreamStatsSchema.optional(), + session: SessionTimingStatsSchema, +}); + +// Convenient TypeScript type exports +export type TimingAnomaly = z.infer; +export type ActiveStreamStats = z.infer; +export type CompletedStreamStats = z.infer; +export type ModelTimingStats = z.infer; +export type SessionTimingStats = z.infer; +export type WorkspaceStatsSnapshot = z.infer; +export type SessionTimingFile = z.infer; diff --git a/src/common/orpc/types.ts b/src/common/orpc/types.ts index 5201757219..6eed26e3ff 100644 --- a/src/common/orpc/types.ts +++ b/src/common/orpc/types.ts @@ -29,6 +29,7 @@ export type DeleteMessage = z.infer; export type WorkspaceInitEvent = z.infer; export type UpdateStatus = z.infer; export type ChatMuxMessage = z.infer; +export type WorkspaceStatsSnapshot = z.infer; export type WorkspaceActivitySnapshot = z.infer; export type FrontendWorkspaceMetadataSchemaType = z.infer< typeof schemas.FrontendWorkspaceMetadataSchema diff --git a/src/common/telemetry/client.ts b/src/common/telemetry/client.ts index 391501b2c4..e3adf22974 100644 --- a/src/common/telemetry/client.ts +++ b/src/common/telemetry/client.ts @@ -89,7 +89,7 @@ export function trackEvent(payload: TelemetryEventPayload): void { } const client = window.__ORPC_CLIENT__; - if (!client) { + if (!client?.telemetry?.track) { return; } diff --git a/src/common/telemetry/index.ts b/src/common/telemetry/index.ts index 4cdfa3a5c2..6afa63e674 100644 --- a/src/common/telemetry/index.ts +++ b/src/common/telemetry/index.ts @@ -19,6 +19,7 @@ export { trackWorkspaceCreated, trackWorkspaceSwitched, trackMessageSent, + trackStatsTabOpened, trackStreamCompleted, trackProviderConfigured, trackCommandUsed, diff --git a/src/common/telemetry/payload.ts b/src/common/telemetry/payload.ts index ba44dc73a5..ae188504e5 100644 --- a/src/common/telemetry/payload.ts +++ b/src/common/telemetry/payload.ts @@ -181,6 +181,38 @@ export interface MCPServerConfigChangedPayload { /** Only set when action=set_tool_allowlist */ tool_allowlist_size_b2?: number; } +/** + * Stats tab event - tracks when users view timing stats. + */ +export interface StatsTabOpenedPayload { + viewMode: "session" | "last-request"; + showModeBreakdown: boolean; +} + +/** + * Stream timing computed - emitted by backend timing pipeline. + * + * All numeric metrics are base-2 rounded or bucketed to preserve privacy. + */ +export interface StreamTimingComputedPayload { + model: string; + mode: string; + duration_b2: number; + ttft_ms_b2: number; + tool_ms_b2: number; + streaming_ms_b2: number; + tool_percent_bucket: number; + invalid: boolean; +} + +/** + * Stream timing invalid - emitted when any computed % would exceed 100%, + * durations are negative, or values are NaN. + */ +export interface StreamTimingInvalidPayload { + reason: string; +} + /** * Stream completion event - tracks when AI responses finish */ @@ -302,6 +334,9 @@ export type TelemetryEventPayload = | { event: "mcp_context_injected"; properties: MCPContextInjectedPayload } | { event: "mcp_server_tested"; properties: MCPServerTestedPayload } | { event: "mcp_server_config_changed"; properties: MCPServerConfigChangedPayload } + | { event: "stats_tab_opened"; properties: StatsTabOpenedPayload } + | { event: "stream_timing_computed"; properties: StreamTimingComputedPayload } + | { event: "stream_timing_invalid"; properties: StreamTimingInvalidPayload } | { event: "stream_completed"; properties: StreamCompletedPayload } | { event: "compaction_completed"; properties: CompactionCompletedPayload } | { event: "provider_configured"; properties: ProviderConfiguredPayload } diff --git a/src/common/telemetry/tracking.ts b/src/common/telemetry/tracking.ts index 1de807c3f6..fa0c44e446 100644 --- a/src/common/telemetry/tracking.ts +++ b/src/common/telemetry/tracking.ts @@ -86,6 +86,19 @@ export function trackMessageSent( }); } +/** + * Track stats tab opening. + */ +export function trackStatsTabOpened( + viewMode: "session" | "last-request", + showModeBreakdown: boolean +): void { + trackEvent({ + event: "stats_tab_opened", + properties: { viewMode, showModeBreakdown }, + }); +} + /** * Track stream completion * @param durationSecs - Raw duration in seconds (will be rounded to base-2) diff --git a/src/common/types/project.ts b/src/common/types/project.ts index 6ee5393bc1..800966b257 100644 --- a/src/common/types/project.ts +++ b/src/common/types/project.ts @@ -10,10 +10,14 @@ export type Workspace = z.infer; export type ProjectConfig = z.infer; +export type FeatureFlagOverride = "default" | "on" | "off"; + export interface ProjectsConfig { projects: Map; /** SSH hostname/alias for this machine (used for editor deep links in browser mode) */ serverSshHost?: string; /** IDs of splash screens that have been viewed */ viewedSplashScreens?: string[]; + /** Cross-client feature flag overrides (shared via ~/.mux/config.json). */ + featureFlagOverrides?: Record; } diff --git a/src/common/utils/git/diffParser.test.ts b/src/common/utils/git/diffParser.test.ts index bd6b251953..41a51a30c4 100644 --- a/src/common/utils/git/diffParser.test.ts +++ b/src/common/utils/git/diffParser.test.ts @@ -21,6 +21,8 @@ describe("git diff parser (real repository)", () => { // Initialize git repo execSync("git init", { cwd: testRepoPath }); execSync('git config user.email "test@example.com"', { cwd: testRepoPath }); + // Disable commit signing (some developer machines enforce signing via global config) + execSync("git config commit.gpgsign false", { cwd: testRepoPath }); execSync('git config user.name "Test User"', { cwd: testRepoPath }); // Create initial commit with a file diff --git a/src/common/utils/tokens/tps.ts b/src/common/utils/tokens/tps.ts new file mode 100644 index 0000000000..1702915580 --- /dev/null +++ b/src/common/utils/tokens/tps.ts @@ -0,0 +1,84 @@ +/** + * Shared TPS + token counting utilities. + * + * Used by both backend (stats subscription) and frontend (streaming UI). + */ + +export interface DeltaRecord { + tokens: number; + timestamp: number; + type: "text" | "reasoning" | "tool-args"; +} + +const DEFAULT_TPS_WINDOW_MS = 60000; // 60 second trailing window + +/** + * Calculate tokens-per-second from a history of delta records. + */ +export function calculateTPS(deltas: DeltaRecord[], now: number = Date.now()): number { + if (deltas.length === 0) return 0; + + const windowStart = now - DEFAULT_TPS_WINDOW_MS; + const recentDeltas = deltas.filter((d) => d.timestamp >= windowStart); + if (recentDeltas.length === 0) return 0; + + const totalTokens = recentDeltas.reduce((sum, d) => sum + (d.tokens || 0), 0); + const timeSpanMs = now - recentDeltas[0].timestamp; + const timeSpanSec = timeSpanMs / 1000; + if (timeSpanSec <= 0) return 0; + + return Math.round(totalTokens / timeSpanSec); +} + +export function calculateTokenCount(deltas: DeltaRecord[]): number { + if ((deltas?.length ?? 0) === 0) return 0; + return deltas.reduce((sum, d) => sum + (d.tokens || 0), 0); +} + +export interface DeltaRecordStorage { + addDelta(record: DeltaRecord): void; + getTokenCount(): number; + calculateTPS(now?: number): number; + getRecentDeltas(): DeltaRecord[]; +} + +export function createDeltaStorage(windowMs: number = DEFAULT_TPS_WINDOW_MS): DeltaRecordStorage { + let recentDeltas: DeltaRecord[] = []; + let olderTokenCount = 0; + + const prune = (now: number): void => { + if (recentDeltas.length === 0) return; + const threshold = now - windowMs; + + let pruneCount = 0; + for (const delta of recentDeltas) { + if (delta.timestamp < threshold) { + olderTokenCount += delta.tokens || 0; + pruneCount += 1; + } else { + break; + } + } + + if (pruneCount > 0) { + recentDeltas = recentDeltas.slice(pruneCount); + } + }; + + return { + addDelta(record: DeltaRecord) { + recentDeltas.push(record); + prune(record.timestamp); + }, + getTokenCount() { + return olderTokenCount + calculateTokenCount(recentDeltas); + }, + calculateTPS(now: number = Date.now()) { + prune(now); + return calculateTPS(recentDeltas, now); + }, + getRecentDeltas() { + return recentDeltas; + }, + }; +} diff --git a/src/desktop/main.ts b/src/desktop/main.ts index 34505e0d0e..548310fa00 100644 --- a/src/desktop/main.ts +++ b/src/desktop/main.ts @@ -333,6 +333,8 @@ async function loadServices(): Promise { updateService: services.updateService, tokenizerService: services.tokenizerService, serverService: services.serverService, + featureFlagService: services.featureFlagService, + sessionTimingService: services.sessionTimingService, mcpConfigService: services.mcpConfigService, mcpServerManager: services.mcpServerManager, menuEventService: services.menuEventService, diff --git a/src/node/config.ts b/src/node/config.ts index 5185dd1105..5243bdb71a 100644 --- a/src/node/config.ts +++ b/src/node/config.ts @@ -6,7 +6,12 @@ import writeFileAtomic from "write-file-atomic"; import { log } from "@/node/services/log"; import type { WorkspaceMetadata, FrontendWorkspaceMetadata } from "@/common/types/workspace"; import type { Secret, SecretsConfig } from "@/common/types/secrets"; -import type { Workspace, ProjectConfig, ProjectsConfig } from "@/common/types/project"; +import type { + Workspace, + ProjectConfig, + ProjectsConfig, + FeatureFlagOverride, +} from "@/common/types/project"; import { DEFAULT_RUNTIME_CONFIG } from "@/common/constants/workspace"; import { isIncompatibleRuntimeConfig } from "@/common/utils/runtimeCompatibility"; import { getMuxHome } from "@/common/constants/paths"; @@ -57,6 +62,7 @@ export class Config { projects?: unknown; serverSshHost?: string; viewedSplashScreens?: string[]; + featureFlagOverrides?: Record; }; // Config is stored as array of [path, config] pairs @@ -72,6 +78,7 @@ export class Config { projects: projectsMap, serverSshHost: parsed.serverSshHost, viewedSplashScreens: parsed.viewedSplashScreens, + featureFlagOverrides: parsed.featureFlagOverrides, }; } } @@ -95,12 +102,16 @@ export class Config { projects: Array<[string, ProjectConfig]>; serverSshHost?: string; viewedSplashScreens?: string[]; + featureFlagOverrides?: ProjectsConfig["featureFlagOverrides"]; } = { projects: Array.from(config.projects.entries()), }; if (config.serverSshHost) { data.serverSshHost = config.serverSshHost; } + if (config.featureFlagOverrides) { + data.featureFlagOverrides = config.featureFlagOverrides; + } if (config.viewedSplashScreens) { data.viewedSplashScreens = config.viewedSplashScreens; } @@ -121,6 +132,32 @@ export class Config { await this.saveConfig(newConfig); } + /** + * Cross-client feature flag overrides (shared via ~/.mux/config.json). + */ + getFeatureFlagOverride(flagKey: string): FeatureFlagOverride { + const config = this.loadConfigOrDefault(); + const override = config.featureFlagOverrides?.[flagKey]; + if (override === "on" || override === "off" || override === "default") { + return override; + } + return "default"; + } + + async setFeatureFlagOverride(flagKey: string, override: FeatureFlagOverride): Promise { + await this.editConfig((config) => { + const next = { ...(config.featureFlagOverrides ?? {}) }; + if (override === "default") { + delete next[flagKey]; + } else { + next[flagKey] = override; + } + + config.featureFlagOverrides = Object.keys(next).length > 0 ? next : undefined; + return config; + }); + } + /** * Get the configured SSH hostname for this server (used for editor deep links in browser mode). */ diff --git a/src/node/orpc/context.ts b/src/node/orpc/context.ts index 79e38731f7..6a7ee4935a 100644 --- a/src/node/orpc/context.ts +++ b/src/node/orpc/context.ts @@ -16,6 +16,8 @@ import type { MCPConfigService } from "@/node/services/mcpConfigService"; import type { ExperimentsService } from "@/node/services/experimentsService"; import type { MCPServerManager } from "@/node/services/mcpServerManager"; import type { TelemetryService } from "@/node/services/telemetryService"; +import type { FeatureFlagService } from "@/node/services/featureFlagService"; +import type { SessionTimingService } from "@/node/services/sessionTimingService"; import type { SessionUsageService } from "@/node/services/sessionUsageService"; export interface ORPCContext { @@ -34,6 +36,8 @@ export interface ORPCContext { voiceService: VoiceService; mcpConfigService: MCPConfigService; mcpServerManager: MCPServerManager; + featureFlagService: FeatureFlagService; + sessionTimingService: SessionTimingService; telemetryService: TelemetryService; experimentsService: ExperimentsService; sessionUsageService: SessionUsageService; diff --git a/src/node/orpc/router.ts b/src/node/orpc/router.ts index f11c2fcdc8..d2ceabf328 100644 --- a/src/node/orpc/router.ts +++ b/src/node/orpc/router.ts @@ -9,6 +9,7 @@ import type { UpdateStatus, WorkspaceActivitySnapshot, WorkspaceChatMessage, + WorkspaceStatsSnapshot, FrontendWorkspaceMetadataSchemaType, } from "@/common/orpc/types"; import { createAuthMiddleware } from "./authMiddleware"; @@ -94,6 +95,24 @@ export const router = (authToken?: string) => { })); }), }, + features: { + getStatsTabState: t + .input(schemas.features.getStatsTabState.input) + .output(schemas.features.getStatsTabState.output) + .handler(async ({ context }) => { + const state = await context.featureFlagService.getStatsTabState(); + context.sessionTimingService.setStatsTabState(state); + return state; + }), + setStatsTabOverride: t + .input(schemas.features.setStatsTabOverride.input) + .output(schemas.features.setStatsTabOverride.output) + .handler(async ({ context, input }) => { + const state = await context.featureFlagService.setStatsTabOverride(input.override); + context.sessionTimingService.setStatsTabState(state); + return state; + }), + }, providers: { list: t .input(schemas.providers.list.input) @@ -976,6 +995,55 @@ export const router = (authToken?: string) => { .handler(async ({ context, input }) => { return context.sessionUsageService.getSessionUsage(input.workspaceId); }), + stats: { + subscribe: t + .input(schemas.workspace.stats.subscribe.input) + .output(schemas.workspace.stats.subscribe.output) + .handler(async function* ({ context, input }) { + const workspaceId = input.workspaceId; + + context.sessionTimingService.addSubscriber(workspaceId); + + const queue = createAsyncEventQueue(); + let pending = Promise.resolve(); + + const enqueueSnapshot = () => { + pending = pending.then(async () => { + queue.push(await context.sessionTimingService.getSnapshot(workspaceId)); + }); + }; + + const onChange = (changedWorkspaceId: string) => { + if (changedWorkspaceId !== workspaceId) { + return; + } + enqueueSnapshot(); + }; + + context.sessionTimingService.onStatsChange(onChange); + + try { + queue.push(await context.sessionTimingService.getSnapshot(workspaceId)); + yield* queue.iterate(); + } finally { + queue.end(); + context.sessionTimingService.offStatsChange(onChange); + context.sessionTimingService.removeSubscriber(workspaceId); + } + }), + clear: t + .input(schemas.workspace.stats.clear.input) + .output(schemas.workspace.stats.clear.output) + .handler(async ({ context, input }) => { + try { + await context.sessionTimingService.clearTimingFile(input.workspaceId); + return { success: true, data: undefined }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { success: false, error: message }; + } + }), + }, mcp: { get: t .input(schemas.workspace.mcp.get.input) diff --git a/src/node/services/agentSession.disposeRace.test.ts b/src/node/services/agentSession.disposeRace.test.ts index 161485a338..2dd56c4e62 100644 --- a/src/node/services/agentSession.disposeRace.test.ts +++ b/src/node/services/agentSession.disposeRace.test.ts @@ -105,7 +105,8 @@ describe("AgentSession disposal race conditions", () => { workspaceId: "ws", messageId: "m1", model: "anthropic:claude-sonnet-4-5", - timestamp: Date.now(), + historySequence: 1, + startTime: Date.now(), }) ).not.toThrow(); }); diff --git a/src/node/services/aiService.ts b/src/node/services/aiService.ts index 9c9560ab43..96558c8f74 100644 --- a/src/node/services/aiService.ts +++ b/src/node/services/aiService.ts @@ -1343,6 +1343,8 @@ export class AIService extends EventEmitter { messageId: assistantMessageId, model: modelString, historySequence, + startTime: Date.now(), + ...(uiMode && { mode: uiMode }), }; this.emit("stream-start", streamStartEvent); @@ -1378,6 +1380,8 @@ export class AIService extends EventEmitter { messageId: assistantMessageId, model: modelString, historySequence, + startTime: Date.now(), + ...(uiMode && { mode: uiMode }), }; this.emit("stream-start", streamStartEvent); diff --git a/src/node/services/featureFlagService.ts b/src/node/services/featureFlagService.ts new file mode 100644 index 0000000000..d5ff7ff04a --- /dev/null +++ b/src/node/services/featureFlagService.ts @@ -0,0 +1,50 @@ +import type { Config } from "@/node/config"; +import type { TelemetryService } from "@/node/services/telemetryService"; +import { FEATURE_FLAG_KEYS } from "@/common/constants/featureFlags"; +import type { StatsTabOverride, StatsTabState, StatsTabVariant } from "./sessionTimingService"; + +const FLAG_CACHE_TTL_MS = 10 * 60 * 1000; + +export class FeatureFlagService { + private readonly config: Config; + private readonly telemetryService: TelemetryService; + + private cachedVariant: { value: StatsTabVariant; fetchedAt: number } | null = null; + + constructor(config: Config, telemetryService: TelemetryService) { + this.config = config; + this.telemetryService = telemetryService; + } + + private getOverride(): StatsTabOverride { + return this.config.getFeatureFlagOverride(FEATURE_FLAG_KEYS.statsTabV1); + } + + private async getVariant(): Promise { + const now = Date.now(); + if (this.cachedVariant && now - this.cachedVariant.fetchedAt < FLAG_CACHE_TTL_MS) { + return this.cachedVariant.value; + } + + const value = await this.telemetryService.getFeatureFlag(FEATURE_FLAG_KEYS.statsTabV1); + + const variant: StatsTabVariant = value === true || value === "stats" ? "stats" : "control"; + + this.cachedVariant = { value: variant, fetchedAt: now }; + return variant; + } + + async getStatsTabState(): Promise { + const override = this.getOverride(); + const variant = await this.getVariant(); + + const enabled = override === "on" ? true : override === "off" ? false : variant === "stats"; + + return { enabled, variant, override }; + } + + async setStatsTabOverride(override: StatsTabOverride): Promise { + await this.config.setFeatureFlagOverride(FEATURE_FLAG_KEYS.statsTabV1, override); + return this.getStatsTabState(); + } +} diff --git a/src/node/services/mock/mockScenarioPlayer.ts b/src/node/services/mock/mockScenarioPlayer.ts index 606e38b278..fc89079685 100644 --- a/src/node/services/mock/mockScenarioPlayer.ts +++ b/src/node/services/mock/mockScenarioPlayer.ts @@ -272,6 +272,8 @@ export class MockScenarioPlayer { messageId, model: event.model, historySequence, + startTime: Date.now(), + ...(event.mode && { mode: event.mode }), }; this.deps.aiService.emit("stream-start", payload); break; diff --git a/src/node/services/mock/scenarioTypes.ts b/src/node/services/mock/scenarioTypes.ts index 112fa79e91..787960beda 100644 --- a/src/node/services/mock/scenarioTypes.ts +++ b/src/node/services/mock/scenarioTypes.ts @@ -20,6 +20,7 @@ export interface MockStreamStartEvent extends MockAssistantEventBase { kind: "stream-start"; messageId: string; model: string; + mode?: "plan" | "exec"; } export interface MockStreamDeltaEvent extends MockAssistantEventBase { diff --git a/src/node/services/serviceContainer.ts b/src/node/services/serviceContainer.ts index 7ae56b9d7b..cc8832ab7a 100644 --- a/src/node/services/serviceContainer.ts +++ b/src/node/services/serviceContainer.ts @@ -20,6 +20,18 @@ import { ServerService } from "@/node/services/serverService"; import { MenuEventService } from "@/node/services/menuEventService"; import { VoiceService } from "@/node/services/voiceService"; import { TelemetryService } from "@/node/services/telemetryService"; +import type { + ReasoningDeltaEvent, + StreamAbortEvent, + StreamDeltaEvent, + StreamEndEvent, + StreamStartEvent, + ToolCallDeltaEvent, + ToolCallEndEvent, + ToolCallStartEvent, +} from "@/common/types/stream"; +import { FeatureFlagService } from "@/node/services/featureFlagService"; +import { SessionTimingService } from "@/node/services/sessionTimingService"; import { ExperimentsService } from "@/node/services/experimentsService"; import { BackgroundProcessManager } from "@/node/services/backgroundProcessManager"; import { MCPConfigService } from "@/node/services/mcpConfigService"; @@ -52,6 +64,8 @@ export class ServiceContainer { public readonly mcpConfigService: MCPConfigService; public readonly mcpServerManager: MCPServerManager; public readonly telemetryService: TelemetryService; + public readonly featureFlagService: FeatureFlagService; + public readonly sessionTimingService: SessionTimingService; public readonly experimentsService: ExperimentsService; public readonly sessionUsageService: SessionUsageService; private readonly initStateManager: InitStateManager; @@ -122,6 +136,34 @@ export class ServiceContainer { telemetryService: this.telemetryService, muxHome: config.rootDir, }); + this.featureFlagService = new FeatureFlagService(config, this.telemetryService); + this.sessionTimingService = new SessionTimingService(config, this.telemetryService); + + // Backend timing stats (behind feature flag). + this.aiService.on("stream-start", (data: StreamStartEvent) => + this.sessionTimingService.handleStreamStart(data) + ); + this.aiService.on("stream-delta", (data: StreamDeltaEvent) => + this.sessionTimingService.handleStreamDelta(data) + ); + this.aiService.on("reasoning-delta", (data: ReasoningDeltaEvent) => + this.sessionTimingService.handleReasoningDelta(data) + ); + this.aiService.on("tool-call-start", (data: ToolCallStartEvent) => + this.sessionTimingService.handleToolCallStart(data) + ); + this.aiService.on("tool-call-delta", (data: ToolCallDeltaEvent) => + this.sessionTimingService.handleToolCallDelta(data) + ); + this.aiService.on("tool-call-end", (data: ToolCallEndEvent) => + this.sessionTimingService.handleToolCallEnd(data) + ); + this.aiService.on("stream-end", (data: StreamEndEvent) => + this.sessionTimingService.handleStreamEnd(data) + ); + this.aiService.on("stream-abort", (data: StreamAbortEvent) => + this.sessionTimingService.handleStreamAbort(data) + ); this.workspaceService.setExperimentsService(this.experimentsService); } @@ -129,6 +171,14 @@ export class ServiceContainer { await this.extensionMetadata.initialize(); // Initialize telemetry service await this.telemetryService.initialize(); + + // Initialize feature flag state (don't block startup on network). + this.featureFlagService + .getStatsTabState() + .then((state) => this.sessionTimingService.setStatsTabState(state)) + .catch(() => { + // Ignore feature flag failures. + }); await this.experimentsService.initialize(); // Start idle compaction checker this.idleCompactionService.start(); diff --git a/src/node/services/sessionTimingService.test.ts b/src/node/services/sessionTimingService.test.ts new file mode 100644 index 0000000000..bf86722e2c --- /dev/null +++ b/src/node/services/sessionTimingService.test.ts @@ -0,0 +1,211 @@ +import { describe, it, expect, beforeEach, afterEach, mock } from "bun:test"; +import * as fs from "fs/promises"; +import * as os from "os"; +import * as path from "path"; + +import { Config } from "@/node/config"; +import { SessionTimingService } from "./sessionTimingService"; +import type { TelemetryService } from "./telemetryService"; +import { normalizeGatewayModel } from "@/common/utils/ai/models"; + +function createMockTelemetryService(): Pick { + return { + capture: mock(() => undefined), + getFeatureFlag: mock(() => Promise.resolve(undefined)), + }; +} + +describe("SessionTimingService", () => { + let tempDir: string; + let config: Config; + + beforeEach(async () => { + tempDir = path.join(os.tmpdir(), `mux-session-timing-test-${Date.now()}-${Math.random()}`); + await fs.mkdir(tempDir, { recursive: true }); + config = new Config(tempDir); + }); + + afterEach(async () => { + try { + await fs.rm(tempDir, { recursive: true, force: true }); + } catch { + // ignore + } + }); + + it("persists completed stream stats to session-timing.json", async () => { + const telemetry = createMockTelemetryService(); + const service = new SessionTimingService(config, telemetry as unknown as TelemetryService); + service.setStatsTabState({ enabled: true, variant: "stats", override: "default" }); + + const workspaceId = "test-workspace"; + const messageId = "m1"; + const model = "openai:gpt-4o"; + const startTime = 1_000_000; + + service.handleStreamStart({ + type: "stream-start", + workspaceId, + messageId, + model, + historySequence: 1, + startTime, + mode: "exec", + }); + + service.handleStreamDelta({ + type: "stream-delta", + workspaceId, + messageId, + delta: "hi", + tokens: 5, + timestamp: startTime + 1000, + }); + + service.handleToolCallStart({ + type: "tool-call-start", + workspaceId, + messageId, + toolCallId: "t1", + toolName: "bash", + args: { cmd: "echo hi" }, + tokens: 3, + timestamp: startTime + 2000, + }); + + service.handleToolCallEnd({ + type: "tool-call-end", + workspaceId, + messageId, + toolCallId: "t1", + toolName: "bash", + result: { ok: true }, + timestamp: startTime + 3000, + }); + + service.handleStreamEnd({ + type: "stream-end", + workspaceId, + messageId, + metadata: { + model, + duration: 5000, + usage: { + inputTokens: 1, + outputTokens: 10, + totalTokens: 11, + reasoningTokens: 2, + }, + }, + parts: [], + }); + + await service.waitForIdle(workspaceId); + + const filePath = path.join(config.getSessionDir(workspaceId), "session-timing.json"); + const raw = await fs.readFile(filePath, "utf-8"); + const parsed = JSON.parse(raw) as unknown; + expect(typeof parsed).toBe("object"); + expect(parsed).not.toBeNull(); + + const file = await service.getSnapshot(workspaceId); + expect(file.lastRequest?.messageId).toBe(messageId); + expect(file.lastRequest?.totalDurationMs).toBe(5000); + expect(file.lastRequest?.toolExecutionMs).toBe(1000); + expect(file.lastRequest?.ttftMs).toBe(1000); + expect(file.lastRequest?.streamingMs).toBe(3000); + expect(file.lastRequest?.invalid).toBe(false); + + expect(file.session?.responseCount).toBe(1); + expect(file.session?.totalDurationMs).toBe(5000); + expect(file.session?.totalToolExecutionMs).toBe(1000); + expect(file.session?.totalStreamingMs).toBe(3000); + expect(file.session?.totalOutputTokens).toBe(10); + expect(file.session?.totalReasoningTokens).toBe(2); + + const normalizedModel = normalizeGatewayModel(model); + const key = `${normalizedModel}:exec`; + expect(file.session?.byModel[key]).toBeDefined(); + expect(file.session?.byModel[key]?.responseCount).toBe(1); + }); + + it("emits invalid timing telemetry when tool percent would exceed 100%", async () => { + const telemetry = createMockTelemetryService(); + const service = new SessionTimingService(config, telemetry as unknown as TelemetryService); + service.setStatsTabState({ enabled: true, variant: "stats", override: "default" }); + + const workspaceId = "test-workspace"; + const messageId = "m1"; + const model = "openai:gpt-4o"; + const startTime = 2_000_000; + + service.handleStreamStart({ + type: "stream-start", + workspaceId, + messageId, + model, + historySequence: 1, + startTime, + }); + + // Tool runs 10s, but we lie in metadata.duration=1s. + service.handleToolCallStart({ + type: "tool-call-start", + workspaceId, + messageId, + toolCallId: "t1", + toolName: "bash", + args: { cmd: "sleep" }, + tokens: 1, + timestamp: startTime + 100, + }); + + service.handleToolCallEnd({ + type: "tool-call-end", + workspaceId, + messageId, + toolCallId: "t1", + toolName: "bash", + result: { ok: true }, + timestamp: startTime + 10_100, + }); + + service.handleStreamEnd({ + type: "stream-end", + workspaceId, + messageId, + metadata: { + model, + duration: 1000, + usage: { + inputTokens: 1, + outputTokens: 1, + totalTokens: 2, + }, + }, + parts: [], + }); + + await service.waitForIdle(workspaceId); + + expect(telemetry.capture).toHaveBeenCalled(); + + // Bun's mock() returns a callable with `.mock.calls`, but our TelemetryService typing + // does not expose that. Introspect via unknown. + const calls = (telemetry.capture as unknown as { mock: { calls: Array<[unknown]> } }).mock + .calls; + + const invalidCalls = calls.filter((c) => { + const payload = c[0]; + if (!payload || typeof payload !== "object") { + return false; + } + + return ( + "event" in payload && (payload as { event?: unknown }).event === "stream_timing_invalid" + ); + }); + + expect(invalidCalls.length).toBeGreaterThan(0); + }); +}); diff --git a/src/node/services/sessionTimingService.ts b/src/node/services/sessionTimingService.ts new file mode 100644 index 0000000000..3193c24e19 --- /dev/null +++ b/src/node/services/sessionTimingService.ts @@ -0,0 +1,666 @@ +import assert from "@/common/utils/assert"; +import * as fs from "fs/promises"; +import * as path from "path"; +import { EventEmitter } from "events"; +import writeFileAtomic from "write-file-atomic"; +import type { Config } from "@/node/config"; +import { workspaceFileLocks } from "@/node/utils/concurrency/workspaceFileLocks"; +import { normalizeGatewayModel } from "@/common/utils/ai/models"; +import { + ActiveStreamStatsSchema, + CompletedStreamStatsSchema, + SessionTimingFileSchema, +} from "@/common/orpc/schemas/workspaceStats"; +import type { + ActiveStreamStats, + CompletedStreamStats, + SessionTimingFile, + TimingAnomaly, + WorkspaceStatsSnapshot, +} from "@/common/orpc/schemas/workspaceStats"; +import type { + StreamStartEvent, + StreamDeltaEvent, + ReasoningDeltaEvent, + ToolCallStartEvent, + ToolCallDeltaEvent, + ToolCallEndEvent, + StreamEndEvent, + StreamAbortEvent, +} from "@/common/types/stream"; +import { createDeltaStorage, type DeltaRecordStorage } from "@/common/utils/tokens/tps"; +import { log } from "./log"; +import type { TelemetryService } from "./telemetryService"; +import { roundToBase2 } from "@/common/telemetry/utils"; + +const SESSION_TIMING_FILE = "session-timing.json"; + +export type StatsTabVariant = "control" | "stats"; +export type StatsTabOverride = "default" | "on" | "off"; + +export interface StatsTabState { + enabled: boolean; + variant: StatsTabVariant; + override: StatsTabOverride; +} + +interface ActiveStreamState { + workspaceId: string; + messageId: string; + model: string; + mode?: "plan" | "exec"; + + startTimeMs: number; + firstTokenTimeMs: number | null; + + completedToolExecutionMs: number; + pendingToolStarts: Map; + + outputTokensByDelta: number; + reasoningTokensByDelta: number; + + deltaStorage: DeltaRecordStorage; + + lastEventTimestampMs: number; +} + +function getModelKey(model: string, mode: "plan" | "exec" | undefined): string { + return mode ? `${model}:${mode}` : model; +} + +function createEmptyTimingFile(): SessionTimingFile { + return { + version: 1, + session: { + totalDurationMs: 0, + totalToolExecutionMs: 0, + totalStreamingMs: 0, + totalTtftMs: 0, + ttftCount: 0, + responseCount: 0, + totalOutputTokens: 0, + totalReasoningTokens: 0, + byModel: {}, + }, + }; +} + +function isFiniteNumber(value: number): boolean { + return Number.isFinite(value); +} + +function validateTiming(params: { + totalDurationMs: number; + toolExecutionMs: number; + ttftMs: number | null; + modelTimeMs: number; + streamingMs: number; +}): { invalid: boolean; anomalies: TimingAnomaly[] } { + const anomalies: TimingAnomaly[] = []; + + if ( + !isFiniteNumber(params.totalDurationMs) || + !isFiniteNumber(params.toolExecutionMs) || + !isFiniteNumber(params.modelTimeMs) || + !isFiniteNumber(params.streamingMs) || + (params.ttftMs !== null && !isFiniteNumber(params.ttftMs)) + ) { + anomalies.push("nan"); + } + + if ( + params.totalDurationMs < 0 || + params.toolExecutionMs < 0 || + params.modelTimeMs < 0 || + params.streamingMs < 0 || + (params.ttftMs !== null && params.ttftMs < 0) + ) { + anomalies.push("negative_duration"); + } + + if (params.toolExecutionMs > params.totalDurationMs) { + anomalies.push("tool_gt_total"); + } + + if (params.ttftMs !== null && params.ttftMs > params.totalDurationMs) { + anomalies.push("ttft_gt_total"); + } + + if (params.totalDurationMs > 0) { + const toolPercent = (params.toolExecutionMs / params.totalDurationMs) * 100; + const modelPercent = (params.modelTimeMs / params.totalDurationMs) * 100; + if ( + toolPercent < 0 || + toolPercent > 100 || + modelPercent < 0 || + modelPercent > 100 || + !Number.isFinite(toolPercent) || + !Number.isFinite(modelPercent) + ) { + anomalies.push("percent_out_of_range"); + } + } + + return { invalid: anomalies.length > 0, anomalies }; +} + +/** + * SessionTimingService + * + * Backend source-of-truth for timing stats. + * - Keeps active stream timing in memory + * - Persists cumulative session timing to ~/.mux/sessions/{workspaceId}/session-timing.json + * - Emits snapshots to oRPC subscribers + */ +export class SessionTimingService { + private readonly config: Config; + private readonly telemetryService: TelemetryService; + private readonly fileLocks = workspaceFileLocks; + + private readonly activeStreams = new Map(); + private readonly timingFileCache = new Map(); + + private readonly emitter = new EventEmitter(); + private readonly subscriberCounts = new Map(); + + // Serialize disk writes per workspace; useful for tests and crash-safe ordering. + private readonly pendingWrites = new Map>(); + private readonly writeEpoch = new Map(); + private readonly tickIntervals = new Map(); + + private statsTabState: StatsTabState = { + enabled: false, + variant: "control", + override: "default", + }; + + constructor(config: Config, telemetryService: TelemetryService) { + this.config = config; + this.telemetryService = telemetryService; + } + + setStatsTabState(state: StatsTabState): void { + this.statsTabState = state; + } + + isEnabled(): boolean { + return this.statsTabState.enabled; + } + + addSubscriber(workspaceId: string): void { + const next = (this.subscriberCounts.get(workspaceId) ?? 0) + 1; + this.subscriberCounts.set(workspaceId, next); + this.ensureTicking(workspaceId); + } + + removeSubscriber(workspaceId: string): void { + const current = this.subscriberCounts.get(workspaceId) ?? 0; + const next = Math.max(0, current - 1); + if (next === 0) { + this.subscriberCounts.delete(workspaceId); + const interval = this.tickIntervals.get(workspaceId); + if (interval) { + clearInterval(interval); + this.tickIntervals.delete(workspaceId); + } + return; + } + this.subscriberCounts.set(workspaceId, next); + } + + onStatsChange(listener: (workspaceId: string) => void): void { + this.emitter.on("change", listener); + } + + offStatsChange(listener: (workspaceId: string) => void): void { + this.emitter.off("change", listener); + } + + private emitChange(workspaceId: string): void { + // Only wake subscribers if anyone is listening for this workspace. + if ((this.subscriberCounts.get(workspaceId) ?? 0) === 0) { + return; + } + this.emitter.emit("change", workspaceId); + } + + private ensureTicking(workspaceId: string): void { + if (this.tickIntervals.has(workspaceId)) { + return; + } + + // Tick only while there is an active stream. + const interval = setInterval(() => { + if (!this.activeStreams.has(workspaceId)) { + return; + } + this.emitChange(workspaceId); + }, 1000); + + this.tickIntervals.set(workspaceId, interval); + } + + private getFilePath(workspaceId: string): string { + return path.join(this.config.getSessionDir(workspaceId), SESSION_TIMING_FILE); + } + + private async readTimingFile(workspaceId: string): Promise { + try { + const data = await fs.readFile(this.getFilePath(workspaceId), "utf-8"); + const parsed = JSON.parse(data) as unknown; + const validated = SessionTimingFileSchema.parse(parsed); + return validated; + } catch (error) { + if (error && typeof error === "object" && "code" in error && error.code === "ENOENT") { + return createEmptyTimingFile(); + } + log.warn(`session-timing.json corrupted for ${workspaceId}; resetting`, { error }); + return createEmptyTimingFile(); + } + } + + private async writeTimingFile(workspaceId: string, data: SessionTimingFile): Promise { + const filePath = this.getFilePath(workspaceId); + await fs.mkdir(path.dirname(filePath), { recursive: true }); + await writeFileAtomic(filePath, JSON.stringify(data, null, 2)); + } + + async waitForIdle(workspaceId: string): Promise { + await (this.pendingWrites.get(workspaceId) ?? Promise.resolve()); + } + + private applyCompletedStreamToFile( + file: SessionTimingFile, + completed: CompletedStreamStats + ): void { + file.lastRequest = completed; + + file.session.totalDurationMs += completed.totalDurationMs; + file.session.totalToolExecutionMs += completed.toolExecutionMs; + file.session.totalStreamingMs += completed.streamingMs; + if (completed.ttftMs !== null) { + file.session.totalTtftMs += completed.ttftMs; + file.session.ttftCount += 1; + } + file.session.responseCount += 1; + file.session.totalOutputTokens += completed.outputTokens; + file.session.totalReasoningTokens += completed.reasoningTokens; + + const key = getModelKey(completed.model, completed.mode); + const existing = file.session.byModel[key]; + const base = existing ?? { + model: completed.model, + mode: completed.mode, + totalDurationMs: 0, + totalToolExecutionMs: 0, + totalStreamingMs: 0, + totalTtftMs: 0, + ttftCount: 0, + responseCount: 0, + totalOutputTokens: 0, + totalReasoningTokens: 0, + }; + + base.totalDurationMs += completed.totalDurationMs; + base.totalToolExecutionMs += completed.toolExecutionMs; + base.totalStreamingMs += completed.streamingMs; + if (completed.ttftMs !== null) { + base.totalTtftMs += completed.ttftMs; + base.ttftCount += 1; + } + base.responseCount += 1; + base.totalOutputTokens += completed.outputTokens; + base.totalReasoningTokens += completed.reasoningTokens; + + file.session.byModel[key] = base; + } + + private queuePersistCompletedStream(workspaceId: string, completed: CompletedStreamStats): void { + const epoch = this.writeEpoch.get(workspaceId) ?? 0; + + const previous = this.pendingWrites.get(workspaceId) ?? Promise.resolve(); + + const next = previous + .then(async () => { + await this.fileLocks.withLock(workspaceId, async () => { + // If a clear() happened after this persist was scheduled, skip. + if ((this.writeEpoch.get(workspaceId) ?? 0) !== epoch) { + return; + } + + const current = await this.readTimingFile(workspaceId); + this.applyCompletedStreamToFile(current, completed); + + await this.writeTimingFile(workspaceId, current); + this.timingFileCache.set(workspaceId, current); + }); + + // Telemetry (only when feature enabled) + const durationSecs = Math.max(0, completed.totalDurationMs / 1000); + + const toolPercentBucket = + completed.totalDurationMs > 0 + ? Math.max( + 0, + Math.min( + 100, + Math.round(((completed.toolExecutionMs / completed.totalDurationMs) * 100) / 5) * + 5 + ) + ) + : 0; + + this.telemetryService.capture({ + event: "stream_timing_computed", + properties: { + model: completed.model, + mode: completed.mode ?? "unknown", + duration_b2: roundToBase2(durationSecs), + ttft_ms_b2: completed.ttftMs !== null ? roundToBase2(completed.ttftMs) : 0, + tool_ms_b2: roundToBase2(completed.toolExecutionMs), + streaming_ms_b2: roundToBase2(completed.streamingMs), + tool_percent_bucket: toolPercentBucket, + invalid: completed.invalid, + }, + }); + + if (completed.invalid) { + const reason = completed.anomalies[0] ?? "unknown"; + this.telemetryService.capture({ + event: "stream_timing_invalid", + properties: { + reason, + }, + }); + } + }) + .catch((error) => { + log.warn(`Failed to persist session-timing.json for ${workspaceId}`, error); + }); + + this.pendingWrites.set(workspaceId, next); + } + private async getCachedTimingFile(workspaceId: string): Promise { + const cached = this.timingFileCache.get(workspaceId); + if (cached) { + return cached; + } + + const loaded = await this.fileLocks.withLock(workspaceId, async () => { + return this.readTimingFile(workspaceId); + }); + this.timingFileCache.set(workspaceId, loaded); + return loaded; + } + + async clearTimingFile(workspaceId: string): Promise { + // Invalidate any pending writes. + this.writeEpoch.set(workspaceId, (this.writeEpoch.get(workspaceId) ?? 0) + 1); + + await this.fileLocks.withLock(workspaceId, async () => { + this.timingFileCache.delete(workspaceId); + try { + await fs.unlink(this.getFilePath(workspaceId)); + } catch (error) { + if (!(error && typeof error === "object" && "code" in error && error.code === "ENOENT")) { + throw error; + } + } + }); + + this.emitChange(workspaceId); + } + + getActiveStreamStats(workspaceId: string): ActiveStreamStats | undefined { + const state = this.activeStreams.get(workspaceId); + if (!state) return undefined; + + const now = Date.now(); + const elapsedMs = Math.max(0, now - state.startTimeMs); + + let toolExecutionMs = state.completedToolExecutionMs; + for (const toolStart of state.pendingToolStarts.values()) { + toolExecutionMs += Math.max(0, now - toolStart); + } + + const ttftMs = + state.firstTokenTimeMs !== null + ? Math.max(0, state.firstTokenTimeMs - state.startTimeMs) + : null; + + const modelTimeMs = Math.max(0, elapsedMs - toolExecutionMs); + const streamingMs = Math.max(0, elapsedMs - toolExecutionMs - (ttftMs ?? 0)); + + const validation = validateTiming({ + totalDurationMs: elapsedMs, + toolExecutionMs, + ttftMs, + modelTimeMs, + streamingMs, + }); + + const stats: ActiveStreamStats = { + messageId: state.messageId, + model: state.model, + mode: state.mode, + elapsedMs, + ttftMs, + toolExecutionMs, + modelTimeMs, + streamingMs, + outputTokens: state.outputTokensByDelta, + reasoningTokens: state.reasoningTokensByDelta, + liveTokenCount: state.deltaStorage.getTokenCount(), + liveTPS: state.deltaStorage.calculateTPS(now), + invalid: validation.invalid, + anomalies: validation.anomalies, + }; + + return ActiveStreamStatsSchema.parse(stats); + } + + async getSnapshot(workspaceId: string): Promise { + const file = await this.getCachedTimingFile(workspaceId); + const active = this.getActiveStreamStats(workspaceId); + + return { + workspaceId, + generatedAt: Date.now(), + active, + lastRequest: file.lastRequest, + session: file.session, + }; + } + + // --- Stream event handlers (wired from AIService) --- + + handleStreamStart(data: StreamStartEvent): void { + if (!this.isEnabled()) return; + + assert(typeof data.workspaceId === "string" && data.workspaceId.length > 0); + assert(typeof data.messageId === "string" && data.messageId.length > 0); + + const model = normalizeGatewayModel(data.model); + + const state: ActiveStreamState = { + workspaceId: data.workspaceId, + messageId: data.messageId, + model, + mode: data.mode, + startTimeMs: data.startTime, + firstTokenTimeMs: null, + completedToolExecutionMs: 0, + pendingToolStarts: new Map(), + outputTokensByDelta: 0, + reasoningTokensByDelta: 0, + deltaStorage: createDeltaStorage(), + lastEventTimestampMs: data.startTime, + }; + + this.activeStreams.set(data.workspaceId, state); + this.emitChange(data.workspaceId); + } + + handleStreamDelta(data: StreamDeltaEvent): void { + const state = this.activeStreams.get(data.workspaceId); + if (!state) return; + + state.lastEventTimestampMs = Math.max(state.lastEventTimestampMs, data.timestamp); + + if (data.delta.length > 0 && state.firstTokenTimeMs === null) { + state.firstTokenTimeMs = data.timestamp; + this.emitChange(data.workspaceId); + } + + state.outputTokensByDelta += data.tokens; + state.deltaStorage.addDelta({ tokens: data.tokens, timestamp: data.timestamp, type: "text" }); + + this.emitChange(data.workspaceId); + } + + handleReasoningDelta(data: ReasoningDeltaEvent): void { + const state = this.activeStreams.get(data.workspaceId); + if (!state) return; + + state.lastEventTimestampMs = Math.max(state.lastEventTimestampMs, data.timestamp); + + if (data.delta.length > 0 && state.firstTokenTimeMs === null) { + state.firstTokenTimeMs = data.timestamp; + this.emitChange(data.workspaceId); + } + + state.reasoningTokensByDelta += data.tokens; + state.deltaStorage.addDelta({ + tokens: data.tokens, + timestamp: data.timestamp, + type: "reasoning", + }); + + this.emitChange(data.workspaceId); + } + + handleToolCallStart(data: ToolCallStartEvent): void { + const state = this.activeStreams.get(data.workspaceId); + if (!state) return; + + state.lastEventTimestampMs = Math.max(state.lastEventTimestampMs, data.timestamp); + state.pendingToolStarts.set(data.toolCallId, data.timestamp); + + // Tool args contribute to the visible token count + TPS. + state.deltaStorage.addDelta({ + tokens: data.tokens, + timestamp: data.timestamp, + type: "tool-args", + }); + + this.emitChange(data.workspaceId); + } + + handleToolCallDelta(data: ToolCallDeltaEvent): void { + const state = this.activeStreams.get(data.workspaceId); + if (!state) return; + + state.lastEventTimestampMs = Math.max(state.lastEventTimestampMs, data.timestamp); + state.deltaStorage.addDelta({ + tokens: data.tokens, + timestamp: data.timestamp, + type: "tool-args", + }); + + this.emitChange(data.workspaceId); + } + + handleToolCallEnd(data: ToolCallEndEvent): void { + const state = this.activeStreams.get(data.workspaceId); + if (!state) return; + + state.lastEventTimestampMs = Math.max(state.lastEventTimestampMs, data.timestamp); + + const start = state.pendingToolStarts.get(data.toolCallId); + if (start !== undefined) { + state.completedToolExecutionMs += Math.max(0, data.timestamp - start); + state.pendingToolStarts.delete(data.toolCallId); + } + + this.emitChange(data.workspaceId); + } + + handleStreamAbort(data: StreamAbortEvent): void { + // We currently ignore aborted streams for session timing. + this.activeStreams.delete(data.workspaceId); + this.emitChange(data.workspaceId); + } + + handleStreamEnd(data: StreamEndEvent): void { + const state = this.activeStreams.get(data.workspaceId); + if (!state) { + return; + } + + // Stop tracking active stream state immediately. + this.activeStreams.delete(data.workspaceId); + + const durationMs = + typeof data.metadata.duration === "number" && Number.isFinite(data.metadata.duration) + ? data.metadata.duration + : Math.max(0, Date.now() - state.startTimeMs); + + // Include time for any in-flight tools (should not happen, but keep defensive). + let toolExecutionMs = state.completedToolExecutionMs; + const endTimestamp = Math.max(state.lastEventTimestampMs, state.startTimeMs + durationMs); + for (const toolStart of state.pendingToolStarts.values()) { + toolExecutionMs += Math.max(0, endTimestamp - toolStart); + } + + const ttftMs = + state.firstTokenTimeMs !== null + ? Math.max(0, state.firstTokenTimeMs - state.startTimeMs) + : null; + + const modelTimeMs = Math.max(0, durationMs - toolExecutionMs); + const streamingMs = Math.max(0, durationMs - toolExecutionMs - (ttftMs ?? 0)); + + const usage = data.metadata.usage; + const outputTokens = + typeof usage?.outputTokens === "number" ? usage.outputTokens : state.outputTokensByDelta; + const reasoningTokens = + typeof usage?.reasoningTokens === "number" + ? usage.reasoningTokens + : state.reasoningTokensByDelta; + + const validation = validateTiming({ + totalDurationMs: durationMs, + toolExecutionMs, + ttftMs, + modelTimeMs, + streamingMs, + }); + + const completed = { + messageId: data.messageId, + model: state.model, + mode: state.mode, + totalDurationMs: durationMs, + ttftMs, + toolExecutionMs, + modelTimeMs, + streamingMs, + outputTokens, + reasoningTokens, + invalid: validation.invalid, + anomalies: validation.anomalies, + }; + + const completedValidated = CompletedStreamStatsSchema.parse(completed); + + // Optimistically update cache so subscribers see the updated session immediately. + const cached = this.timingFileCache.get(data.workspaceId); + if (cached) { + this.applyCompletedStreamToFile(cached, completedValidated); + } + + this.queuePersistCompletedStream(data.workspaceId, completedValidated); + + this.emitChange(data.workspaceId); + } +} diff --git a/src/node/services/streamManager.ts b/src/node/services/streamManager.ts index 2f85435aa0..328c40ede6 100644 --- a/src/node/services/streamManager.ts +++ b/src/node/services/streamManager.ts @@ -789,13 +789,16 @@ export class StreamManager extends EventEmitter { // Update state to streaming streamInfo.state = StreamState.STREAMING; - // Emit stream start event + // Emit stream start event (include mode from initialMetadata if available) + const streamStartMode = streamInfo.initialMetadata?.mode as "plan" | "exec" | undefined; this.emit("stream-start", { type: "stream-start", workspaceId: workspaceId as string, messageId: streamInfo.messageId, model: streamInfo.model, historySequence, + startTime: streamInfo.startTime, + ...(streamStartMode && { mode: streamStartMode }), } as StreamStartEvent); // Initialize token tracker for this model @@ -1689,13 +1692,16 @@ export class StreamManager extends EventEmitter { // Initialize token tracker for this model (required for tokenization) await this.tokenTracker.setModel(streamInfo.model); - // Emit stream-start event + // Emit stream-start event (include mode from initialMetadata if available) + const replayMode = streamInfo.initialMetadata?.mode as "plan" | "exec" | undefined; this.emit("stream-start", { type: "stream-start", workspaceId, messageId: streamInfo.messageId, model: streamInfo.model, historySequence: streamInfo.historySequence, + startTime: streamInfo.startTime, + ...(replayMode && { mode: replayMode }), }); // Replay accumulated parts as events using shared emission logic diff --git a/src/node/services/telemetryService.ts b/src/node/services/telemetryService.ts index 87248fef4e..6ea5af885e 100644 --- a/src/node/services/telemetryService.ts +++ b/src/node/services/telemetryService.ts @@ -271,6 +271,19 @@ export class TelemetryService { * Track a telemetry event. * Events are silently ignored when disabled. */ + + async getFeatureFlag(key: string): Promise { + if (isTelemetryDisabledByEnv(process.env) || !this.client || !this.distinctId) { + return undefined; + } + + try { + // `getFeatureFlag` will automatically emit $feature_flag_called. + return await this.client.getFeatureFlag(key, this.distinctId, { disableGeoip: true }); + } catch { + return undefined; + } + } capture(payload: TelemetryEventPayload): void { if (isTelemetryDisabledByEnv(process.env) || !this.client || !this.distinctId) { return; diff --git a/src/node/services/tools/bash.test.ts b/src/node/services/tools/bash.test.ts index 0bef787429..c88a21337f 100644 --- a/src/node/services/tools/bash.test.ts +++ b/src/node/services/tools/bash.test.ts @@ -735,7 +735,7 @@ describe("bash tool", () => { // Extremely minimal case - just enough to trigger rebase --continue const script = ` T=$(mktemp -d) && cd "$T" - git init && git config user.email "t@t" && git config user.name "T" + git init && git config user.email "t@t" && git config user.name "T" && git config commit.gpgsign false echo a > f && git add f && git commit -m a git checkout -b b && echo b > f && git commit -am b git checkout main && echo c > f && git commit -am c diff --git a/src/node/services/tools/web_fetch.test.ts b/src/node/services/tools/web_fetch.test.ts index b34597d680..44cb286727 100644 --- a/src/node/services/tools/web_fetch.test.ts +++ b/src/node/services/tools/web_fetch.test.ts @@ -1,5 +1,3 @@ -import { shouldRunIntegrationTests } from "../../../../tests/testUtils"; - import { describe, it, expect } from "bun:test"; import { createWebFetchTool } from "./web_fetch"; import type { WebFetchToolArgs, WebFetchToolResult } from "@/common/types/tools"; @@ -12,7 +10,7 @@ import type { ToolCallOptions } from "ai"; // ToolCallOptions stub for testing -const itInternet = shouldRunIntegrationTests() ? it : it.skip; +const itIntegration = process.env.TEST_INTEGRATION === "1" ? it : it.skip; const toolCallOptions: ToolCallOptions = { toolCallId: "test-call-id", messages: [], @@ -35,7 +33,7 @@ function createTestWebFetchTool() { describe("web_fetch tool", () => { // Integration test: fetch a real public URL - itInternet("should fetch and convert a real web page to markdown", async () => { + itIntegration("should fetch and convert a real web page to markdown", async () => { using testEnv = createTestWebFetchTool(); const args: WebFetchToolArgs = { // example.com is a stable, simple HTML page maintained by IANA @@ -55,7 +53,7 @@ describe("web_fetch tool", () => { }); // Integration test: fetch plain text endpoint (not HTML) - itInternet("should fetch plain text content without HTML processing", async () => { + itIntegration("should fetch plain text content without HTML processing", async () => { using testEnv = createTestWebFetchTool(); const args: WebFetchToolArgs = { // Cloudflare's trace endpoint returns plain text diagnostics @@ -76,7 +74,7 @@ describe("web_fetch tool", () => { } }); - itInternet("should handle DNS failure gracefully", async () => { + itIntegration("should handle DNS failure gracefully", async () => { using testEnv = createTestWebFetchTool(); const args: WebFetchToolArgs = { // .invalid TLD is reserved and guaranteed to never resolve @@ -221,7 +219,7 @@ describe("web_fetch tool", () => { }); // Test HTTP error handling with body parsing - it("should include HTTP status code in error for non-2xx responses", async () => { + itIntegration("should include HTTP status code in error for non-2xx responses", async () => { using testEnv = createTestWebFetchTool(); const args: WebFetchToolArgs = { // httpbin.dev reliably returns the requested status code @@ -236,7 +234,7 @@ describe("web_fetch tool", () => { } }); - it("should detect Cloudflare challenge pages", async () => { + itIntegration("should detect Cloudflare challenge pages", async () => { using testEnv = createTestWebFetchTool(); const args: WebFetchToolArgs = { // platform.openai.com is known to serve Cloudflare challenges diff --git a/src/node/services/workspaceService.ts b/src/node/services/workspaceService.ts index 2df85890d5..912e2ac415 100644 --- a/src/node/services/workspaceService.ts +++ b/src/node/services/workspaceService.ts @@ -1071,6 +1071,15 @@ export class WorkspaceService extends EventEmitter { } } + const sourceTimingPath = path.join(sourceSessionDir, "session-timing.json"); + const newTimingPath = path.join(newSessionDir, "session-timing.json"); + try { + await fsPromises.copyFile(sourceTimingPath, newTimingPath); + } catch (error) { + if (!(error && typeof error === "object" && "code" in error && error.code === "ENOENT")) { + throw error; + } + } const sourceUsagePath = path.join(sourceSessionDir, "session-usage.json"); const newUsagePath = path.join(newSessionDir, "session-usage.json"); try { diff --git a/tests/ipc/setup.ts b/tests/ipc/setup.ts index 38eb3a1a08..3eed6b912e 100644 --- a/tests/ipc/setup.ts +++ b/tests/ipc/setup.ts @@ -80,6 +80,8 @@ export async function createTestEnvironment(): Promise { updateService: services.updateService, tokenizerService: services.tokenizerService, serverService: services.serverService, + featureFlagService: services.featureFlagService, + sessionTimingService: services.sessionTimingService, mcpConfigService: services.mcpConfigService, mcpServerManager: services.mcpServerManager, menuEventService: services.menuEventService,