diff --git a/.claude/hooks/guard-git.sh b/.claude/hooks/guard-git.sh index 92bf09fa..1702292a 100644 --- a/.claude/hooks/guard-git.sh +++ b/.claude/hooks/guard-git.sh @@ -21,8 +21,8 @@ if [ -z "$COMMAND" ]; then exit 0 fi -# Only act on git commands -if ! echo "$COMMAND" | grep -qE '^\s*git\s+'; then +# Act on git and gh commands (may appear after cd "..." &&) +if ! echo "$COMMAND" | grep -qE '(^|\s|&&\s*)(git|gh)\s+'; then exit 0 fi @@ -74,21 +74,47 @@ if echo "$COMMAND" | grep -qE '^\s*git\s+stash'; then deny "BLOCKED: 'git stash' hides all working tree changes including other sessions' work. In worktree mode, commit your changes directly instead." fi -# --- Branch name validation on push --- +# --- Branch name validation helper --- + +validate_branch_name() { + # Try to get branch from the working directory where the command runs + # Extract cd target if command starts with cd "..." && ... + local work_dir="" + if echo "$COMMAND" | grep -qE '^\s*cd\s+'; then + work_dir=$(echo "$COMMAND" | sed -nE 's/^\s*cd\s+"?([^"&]+)"?\s*&&.*/\1/p') + fi + + local BRANCH="" + if [ -n "$work_dir" ] && [ -d "$work_dir" ]; then + BRANCH=$(git -C "$work_dir" rev-parse --abbrev-ref HEAD 2>/dev/null) || true + fi + if [ -z "$BRANCH" ]; then + BRANCH=$(git rev-parse --abbrev-ref HEAD 2>/dev/null) || true + fi -if echo "$COMMAND" | grep -qE '^\s*git\s+push'; then - BRANCH=$(git rev-parse --abbrev-ref HEAD 2>/dev/null) || true if [ -n "$BRANCH" ] && [ "$BRANCH" != "main" ] && [ "$BRANCH" != "HEAD" ]; then - PATTERN="^(feat|fix|docs|refactor|test|chore|ci|perf|build|release|dependabot|revert)/" + local PATTERN="^(feat|fix|docs|refactor|test|chore|ci|perf|build|release|dependabot|revert)/" if [[ ! "$BRANCH" =~ $PATTERN ]]; then deny "BLOCKED: Branch '$BRANCH' does not match required pattern. Branch names must start with: feat/, fix/, docs/, refactor/, test/, chore/, ci/, perf/, build/, release/, revert/" fi fi +} + +# --- Branch name validation on push --- + +if echo "$COMMAND" | grep -qE '(^|\s|&&\s*)git\s+push'; then + validate_branch_name +fi + +# --- Branch name validation on gh pr create --- + +if echo "$COMMAND" | grep -qE '(^|\s|&&\s*)gh\s+pr\s+create'; then + validate_branch_name fi # --- Commit validation against edit log --- -if echo "$COMMAND" | grep -qE '^\s*git\s+commit'; then +if echo "$COMMAND" | grep -qE '(^|\s|&&\s*)git\s+commit'; then PROJECT_DIR="${CLAUDE_PROJECT_DIR:-.}" LOG_FILE="$PROJECT_DIR/.claude/session-edits.log" diff --git a/generated/DOGFOOD-REPORT-2.1.0.md b/generated/DOGFOOD-REPORT-2.1.0.md index c8cf0cd5..9d51212e 100644 --- a/generated/DOGFOOD-REPORT-2.1.0.md +++ b/generated/DOGFOOD-REPORT-2.1.0.md @@ -1,287 +1,169 @@ -# Codegraph 2.1.0 Dogfood Report & Improvement Plan +# Dogfood Report — codegraph v2.1.0 -**Date:** 2026-02-22 -**Version tested:** 2.1.0 (npm), native engine v0.1.0 -**Tested on:** codegraph's own codebase (87 files, 443 nodes, 1393 edges) +**Date:** 2026-02-23 +**Platform:** Windows 11 Pro (win32-x64), Node v22.18.0 +**Native binary:** `@optave/codegraph-win32-x64-msvc` 2.1.0 +**Active engine:** native v0.1.0 (auto-detected) +**Target repo:** codegraph itself (92 files, JS + Rust) --- -## Part 1: Dogfood Findings +## 1. Test Summary -### Critical Bug Found & Fixed - -**MCP server crash with `@modelcontextprotocol/sdk` >= 1.26.0** -- `server.setRequestHandler('tools/list', ...)` crashes with `Schema is missing a method literal` -- Root cause: SDK breaking change — `setRequestHandler` now requires Zod schema objects (`ListToolsRequestSchema`, `CallToolRequestSchema`) instead of string method names -- Since `package.json` specifies `^1.0.0`, users installing fresh get the incompatible version -- **Fix applied:** Import schema objects from `@modelcontextprotocol/sdk/types.js` and use them instead of strings -- **Files changed:** `src/mcp.js`, `tests/unit/mcp.test.js` - -### Call Resolution False Positives (Most Impactful Quality Issue) - -The function-level call graph has significant false positives from **name collision**. Any call to a common method name gets resolved to the wrong function: - -| Call site | Actual target | Resolved to | -|-----------|---------------|-------------| -| `insertNode.run(...)` in builder.js | SQLite prepared statement `.run()` | `f run` in tests/integration/cli.test.js:42 | -| `deleteEdgesForFile.run(...)` | SQLite prepared statement | same test `run` | -| `upsertHash.run(...)` | SQLite prepared statement | same test `run` | -| `path.normalize(...)` in resolve.js | Node.js built-in | `f normalize` in tests/engines/parity.test.js:65 | - -**Impact:** `buildGraph` shows 23 callees, but 12 (52%) are false positives. Every function that calls `db.run()` appears to call the test helper. This makes the call graph unreliable for understanding code flow. - -**Root cause:** Call resolution treats any `identifier(...)` call as a match for any global function with that name, without considering: -1. **Receiver/qualifier context** — `stmt.run()` is a method call on `stmt`, not a call to a standalone `run` function -2. **Import scope** — `run` is never imported in `builder.js`, so it can't be called directly -3. **File boundary** — test files shouldn't be callee candidates for source files (unless explicitly imported) - -### Duplicate Call Edges - -Many caller/callee relationships appear multiple times: -- `buildGraph` → `run`: appears 12 times (once per call site, but all resolve to the same target) -- `normalizePath` appears 6x as callee of `resolveImportPathJS` -- `tests/unit/db.test.js` appears 3x as caller of `openDb` - -Expected behavior: each unique caller→callee pair should appear once, optionally with a count of call sites. - -### Missing Symbols - -- **`cli.js` shows 0 symbols** — Commander-style `program.command(...).action(async () => {...})` callbacks are not extracted. This is the main entry point and has the most complex orchestration logic. -- **`.cjs` files show 0 symbols** — `scripts/gen-deps.cjs` and others have `module.exports` and `require()` patterns that aren't extracted (CommonJS). -- **Prepared statements not tracked** — `const insertNode = db.prepare(...)` creates callable objects that are used throughout, but they're just `const` assignments, not function definitions. - -### Other Observations - -| Area | Finding | Severity | -|------|---------|----------| -| `fn` search | Substring matching: `fn run` returns `pruneRegistry` (contains "run") first | Low — but confusing when looking for exact names | -| `fn` disambiguation | When multiple matches exist, all results are shown but without guidance on which is most relevant | Low | -| `diff-impact` | Shows `changedFiles: 5, affectedFunctions: 0` in worktree — the 5 changed files are probably build artifacts, but they're not function-bearing | Low | -| `hotspots` | No `--functions` flag — only file-level hotspots available | Feature gap | -| `cycles --functions` | Works correctly; found the real `walkPythonNode ↔ findPythonParentClass` mutual recursion | Good | -| `search` | Semantic search works well with good ranking | Good | -| `stats` | Clean summary, correct data | Good | -| `structure` | Excellent output with cohesion scores | Good | -| All `--json` flags | Work correctly across all commands | Good | -| Error messages | Clean, helpful messages for missing files/functions | Good | - -### Test Suite - -- **423/423 tests pass** (5 skipped, all for platform-specific reasons) -- Test suite runs in ~2.3 seconds — fast +| Area | Result | +|------|--------| +| `npm install` | OK — native binary + WASM grammars built successfully | +| `npm test` | **494 passed**, 5 skipped, 0 failures | +| `npm run lint` | Clean — no issues | +| Native engine build | 500 nodes, 724 edges | +| WASM engine build | 527 nodes, 699 edges | +| Incremental rebuild (no changes) | Correctly detected "Graph is up to date" | --- -## Part 2: Improvement Plan for AI Agent Usage - -The goal: make codegraph the **essential first tool** an AI agent uses when working on any codebase. Every improvement below targets a specific problem AI agents (like Claude) face. +## 2. Commands Tested + +All 22 CLI commands were exercised against the codegraph codebase: + +| Command | Status | Notes | +|---------|--------|-------| +| `build .` | OK | Both `--engine native` and `--engine wasm` | +| `build .` (incremental) | OK | Correctly skips unchanged files | +| `map` | OK | | +| `stats` | OK | | +| `cycles` | OK | 0 file-level, 2 function-level | +| `deps ` | OK | | +| `impact ` | OK | | +| `fn ` | OK | | +| `fn-impact ` | OK | | +| `context ` | OK | Full source + deps + callers + tests | +| `explain ` | OK | Data flow analysis is very useful | +| `explain ` | OK | | +| `where ` | OK | | +| `diff-impact main` | OK | 56 functions changed, 31 callers affected | +| `export --format dot` | OK | | +| `export --format mermaid` | OK | | +| `export --format json` | OK | | +| `structure` | OK | 18 directories, cohesion scores | +| `hotspots` | OK | | +| `models` | OK | 7 models listed | +| `info` | OK | Correctly reports native engine | +| `--version` | OK | `2.1.0` | + +### Edge cases tested + +| Scenario | Result | +|----------|--------| +| Non-existent symbol (`query nonexistent`) | Graceful message: "No results for..." | +| Non-existent file (`deps nonexistent.js`) | Graceful message: "No file matching..." | +| Non-existent symbol (`fn nonexistent`) | Graceful message: "No function/method/class..." | +| `--json` flag on all supporting commands | Correct JSON output | +| `--no-tests` on fn, fn-impact, context, explain, where, diff-impact | Correctly filters test files | +| `--file` filter on fn | Correctly scopes results | -### Priority 1: Fix Call Resolution Accuracy (HIGH IMPACT) - -**Problem:** AI agents trust the call graph to understand what code does. With 52% false positives on common function names, agents will waste tokens investigating wrong call chains, make incorrect assumptions about dependencies, and suggest changes that break things they didn't know were related. - -**Solution: Qualified Call Resolution** +--- -1. **Distinguish method calls from function calls** - - `obj.method()` should only match methods defined on that object's type or prototype - - `standalone()` should only match imported/in-scope standalone functions - - Implementation: During extraction, tag call sites with their receiver (if any). During resolution, only match method calls to methods and standalone calls to functions. +## 3. Bugs Found & Fixed -2. **Respect import scope** - - A call to `foo()` in file A should only resolve to a function `foo` if: - - `foo` is defined in file A, OR - - `foo` is imported (directly or via re-export) in file A, OR - - `foo` is a global/built-in (and should be excluded) - - This alone would eliminate most false positives. +### BUG: `--no-tests` flag missing on `map`, `deps`, `impact`, `hotspots`, `stats`, `query` CLI commands -3. **Deduplicate call edges** - - Store unique `(caller, callee)` pairs with an optional `count` field - - Display: `-> openDb (calls, 3 sites) src/db.js:72` instead of 3 duplicate lines +**Severity:** Medium +**Commit reference:** `ec158c3` claims to add `--no-tests` to these commands, but the CLI option was never wired up. -4. **Exclude built-in/library calls** - - `db.run()`, `path.normalize()`, `console.log()`, `Array.map()` etc. are not user code - - Don't create edges for calls where the receiver is a known library/built-in object - - Optionally, create a separate "external calls" edge type for traceability +**Symptoms:** +- `codegraph map --no-tests` → `error: unknown option '--no-tests'` +- `codegraph deps --no-tests` → `error: unknown option '--no-tests'` +- `codegraph impact --no-tests` → `error: unknown option '--no-tests'` +- `codegraph hotspots --no-tests` → `error: unknown option '--no-tests'` +- `codegraph stats --no-tests` → `error: unknown option '--no-tests'` +- `codegraph query --no-tests` → `error: unknown option '--no-tests'` -### Priority 2: New `context` Command (HIGH IMPACT) +**Root cause:** The underlying data functions (`moduleMapData`, `fileDepsData`, `impactAnalysisData`, `hotspotsData`) all accept a `noTests` option and implement filtering, but the Commander CLI option definitions in `cli.js` were never updated to add `-T, --no-tests` and pass it through. Additionally, `queryNameData` and `statsData` lacked `noTests` support entirely. -**Problem:** When an AI agent needs to understand or modify a function, it currently must: -1. Use `fn` to find the function and its call chain (tokens for parsing output) -2. Read the source file to see the implementation (tokens for file content) -3. Read each dependency file to understand called functions (many more tokens) -4. Often re-read files because it forgot details +**Fix:** +- Added `-T, --no-tests` option and `noTests: !opts.tests` passthrough to all six commands in `cli.js` +- Added `noTests` filtering to `queryNameData` (nodes, callees, callers) and `statsData` (file list, hotspots) +- Added `no_tests` schema property and handler passthrough to MCP tools: `query_function`, `file_deps`, `impact_analysis`, `module_map` +- Standardized all `--no-tests` help text to `'Exclude test/spec files from results'` -This "read-think-read" loop consumes 50-80% of an agent's token budget on navigation alone. +**Verification:** +- `deps src/builder.js --no-tests` → "Imported by" drops from 5 to 1 (filters 4 test files) +- `impact src/parser.js --no-tests` → Total drops from 30 to 8 files +- `stats --no-tests` → File count drops from 92 to 59 +- `query buildGraph --no-tests` → Test callers filtered out +- All 494 tests still pass after fix -**Solution: `codegraph context ` command** +### BUG: guard-git hook doesn't validate branch names on `gh pr create` -Returns everything an AI needs to understand and safely modify a function in one call: +**Severity:** Low +**Symptom:** PR #46 was created with branch `worktree-dogfood-testing` which failed the CI branch name check. -``` -codegraph context buildGraph - -# buildGraph (function) — src/builder.js:143-310 -## Source - - -## Direct Dependencies (what it calls) - openDb() — src/db.js:72 — Opens or creates the SQLite database - initSchema() — src/db.js:80 — Creates tables if they don't exist - collectFiles() — src/builder.js:14 — Walks directory tree respecting ignore rules - parseFilesAuto() — src/parser.js:276 — Parses files with tree-sitter - resolveImportsBatch() — src/resolve.js:150 — Resolves all import paths - - -## Callers (what breaks if this changes) - cli.js — main build command handler - watcher.js:watchProject — incremental rebuild trigger - -## Type/Shape Info - Parameters: (dir, options={}) - Returns: { nodeCount, edgeCount, dbPath } - -## Related Tests - tests/integration/build.test.js — 6 tests - tests/integration/build-parity.test.js — 2 tests -``` +**Root cause:** The hook only validated on `git push`, not `gh pr create`. Also, commands prefixed with `cd "..." &&` (standard in worktree sessions) didn't match the `^\s*git\s+` pattern. -**Options:** -- `--depth N` — Include source of called functions up to N levels deep -- `--no-source` — Metadata only (for quick orientation) -- `--include-tests` — Include test source code too -- `--json` — Machine-readable output +**Fix:** Extended the hook to validate on both `git push` and `gh pr create`, and updated all patterns to match commands after `cd` prefixes. -### Priority 3: New `explain` Command (MEDIUM IMPACT) +--- -**Problem:** AI agents spend many tokens reading code to figure out "what does this module do?" and "how do these pieces fit together?" before they can start actual work. This is especially wasteful for large files like `queries.js` (1009 lines, 21 symbols). +## 4. Observations -**Solution: `codegraph explain ` command** +### 4.1 Engine Parity Gap (Native vs WASM) -Generates a structural summary without requiring the agent to read the entire file: +| Metric | Native | WASM | Delta | +|--------|--------|------|-------| +| Nodes | 500 | 527 | +27 (+5.4%) | +| Edges | 724 | 699 | -25 (-3.5%) | +| Functions | 315 | 342 | +27 | +| Call edges | 591 | 566 | -25 | +| Call confidence | 96.8% | 99.3% | +2.5pp | +| Graph quality | 83/100 | 82/100 | -1 | -``` -codegraph explain src/builder.js - -# src/builder.js — Graph Building Pipeline - 600 lines, 8 exported functions - -## Public API - buildGraph(dir, options) — Main entry: scans files, parses, resolves imports, stores in DB - collectFiles(dir, config) — Directory walker with .gitignore and config-based filtering - getChangedFiles(db, files) — Incremental: returns only files whose hash changed - -## Internal - loadPathAliases(config) — Loads tsconfig/jsconfig path aliases - fileHash(content) — SHA-256 content hash for incremental tracking - getResolved(imports, ...) — Resolves import paths with priority scoring - resolveBarrelExport(db, ...) — Follows re-exports through index.js barrel files - buildMetrics(db, file, ...) — Computes per-file metrics (line count, import/export counts) - -## Data Flow - buildGraph calls: collectFiles → getChangedFiles → parseFilesAuto → getResolved → resolveBarrelExport - Each file is: hashed → parsed → symbols extracted → imports resolved → stored in DB - -## Key Patterns - - Uses better-sqlite3 prepared statements for all DB operations - - Incremental: skips files whose hash matches the DB record - - Handles barrel re-exports by following index.js chains -``` +The native engine extracts 27 fewer function symbols but resolves 25 more call edges. This suggests the native engine may be merging/deduplicating some symbols while being better at call-site resolution. The WASM engine has higher confidence (99.3% vs 96.8%) but lower caller coverage (55.5% vs 60.4%). -### Priority 4: Smarter `fn` Search (MEDIUM IMPACT) +**Recommendation:** The parity test (`build-parity.test.js`) exists but only checks a small fixture. Consider adding a snapshot test on a larger fixture (or the codegraph repo itself) to track parity drift between engines. -**Problem:** `fn run` returns `pruneRegistry` first because it substring-matches "run" in "p**run**eRegistry". An AI agent looking for a specific function wastes tokens processing irrelevant results. +### 4.2 `structure` Cohesion of 0.00 for Test Directories -**Improvements:** -1. **Exact match priority** — If there's an exact name match, show it first (before substring matches) -2. **File-scoped search** — `fn buildGraph --file src/builder.js` to narrow results -3. **Kind filter** — `fn --kind method run` to search only methods -4. **Relevance scoring** — Rank by: exact match > prefix match > substring match > fuzzy match. Weight by fan-in (more-connected functions are more likely targets) +All test directories show `cohesion=0.00`, which is technically correct (tests import source, not each other) but may alarm users who don't understand the metric. Consider hiding cohesion for test directories or adding a note. -### Priority 5: `where` / `locate` Command (MEDIUM IMPACT) +--- -**Problem:** AI agents frequently need to answer "where is X defined?" or "where is X used?" without needing the full dependency chain. Currently they must use `fn` (which shows the full call graph) or `query` (similar), which returns too much data. +## 5. Suggestions for Improvement -**Solution: `codegraph where ` — Minimal, fast lookup** +### 5.1 UX: Default `--no-tests` in Config +Many codebases have large test directories. A `.codegraphrc.json` option like `"excludeTests": true` would let users default to production-only views: +```json +{ + "excludeTests": true +} ``` -codegraph where buildGraph - Defined: src/builder.js:143 (function, exported) - Used in: src/cli.js:45, tests/integration/build.test.js:12, ... - -codegraph where SYMBOL_KINDS - Defined: src/queries.js:5 (const, exported) - Used in: src/queries.js:88, src/queries.js:120, tests/unit/queries-unit.test.js:8 - -codegraph where --file src/builder.js - Functions: buildGraph:143, collectFiles:14, getChangedFiles:87, ... - Imports: db.js, parser.js, resolve.js, config.js, constants.js, logger.js - Exported: buildGraph, collectFiles, getChangedFiles, loadPathAliases, fileHash -``` +This would save typing `-T` on every command while still allowing `--include-tests` to override. -### Priority 6: Extract Symbols from Commander/Express Patterns (LOW IMPACT) +### 5.2 UX: `map` Could Show Coupling Score -**Problem:** `cli.js` shows 0 symbols because all logic is in Commander `.action()` callbacks. This is the main entry point — knowing its structure is critical. +The `map` command shows fan-in/fan-out bars, but doesn't show the actual coupling score (in+out combined). The `stats` command shows "Top 5 coupling hotspots" — `map` could integrate this as a column since it already has the data. -**Solution:** Recognize common callback patterns: -- Commander: `program.command('build').action(async (dir, opts) => {...})` → extract as `command:build` -- Express: `app.get('/api/users', handler)` → extract as `route:GET /api/users` -- Event emitters: `emitter.on('data', handler)` → extract as `event:data` +### 5.3 UX: `explain` Is the Most Useful Command for AI Workflows -### Priority 7: Quality-of-Life Improvements +The `explain` command produces the most AI-agent-friendly output — structured sections (exports, internals, data flow) that give an LLM exactly the context it needs. Consider: +- Making it the default recommendation in the README for AI workflows +- Adding a `--depth` option to recursively explain dependencies -1. **Pin MCP SDK version** — Change `^1.0.0` to `~1.11.0` or a specific compatible range to prevent future breakage -2. **`stats` command enhancement** — Add a "graph quality" score based on: - - % of functions with resolved callers - - % of imports successfully resolved - - Ratio of false-positive-prone names (like `run`, `get`, `set`) in the call graph -3. **`--no-tests` everywhere** — Add this flag to `map`, `hotspots`, `deps`, `impact` (currently only on `fn` variants) -4. **Warn on common false-positive names** — When a function named `run`, `get`, `set`, `init`, `start`, `handle` has > 20 callers, flag it as a potential resolution issue - ---- +### 5.4 Performance: Status Messages to stderr -## Part 3: Implementation Priority Matrix +The native engine still prints "Using native engine" to stdout, which pollutes piped output. Consider using `process.stderr.write` for status messages, keeping stdout clean for actual data output. -| # | Feature | Impact on AI | Effort | Priority | -|---|---------|-------------|--------|----------| -| 1 | Fix call resolution (method vs function) | Critical — eliminates 50%+ false edges | Large | P0 | -| 2 | `context` command | Critical — saves 50-80% of navigation tokens | Medium | P0 | -| 3 | `explain` command | High — saves initial orientation tokens | Medium | P1 | -| 4 | Smarter `fn` search ranking | Medium — reduces noise in results | Small | P1 | -| 5 | `where` command | Medium — fast precise lookups | Small | P1 | -| 6 | Commander/Express extraction | Low — only affects specific patterns | Medium | P2 | -| 7 | QoL improvements | Low-Medium — polish | Small each | P2 | +### 5.5 UX: `--no-tests` Help Text Consistency -### Suggested Implementation Order - -1. **Call resolution fix** (P0) — This is the foundation. Every other feature's value depends on accurate edges. -2. **`context` command** (P0) — The single highest-ROI feature for AI agents. -3. **Deduplicate call edges** (part of P0) — Quick win during resolution refactor. -4. **Smarter `fn` search** (P1) — Small change, big usability improvement. -5. **`where` command** (P1) — Complements `context` for quick lookups. -6. **`explain` command** (P1) — Builds on `context` infrastructure. -7. **Everything else** (P2) — Polish and edge cases. +All commands now use `'Exclude test/spec files from results'` after this fix. Future commands should follow the same wording. --- -## Part 4: What Makes Codegraph Amazing for AI Agents +## 6. Overall Assessment -### Current Strengths (Keep & Amplify) -- **`map` command** is excellent for initial orientation — AI agents should always run this first -- **`structure` command** with cohesion scores gives perfect project overview -- **`--json` flags** on every command enable structured parsing -- **Semantic search** finds functions by intent, not just name -- **`fn-impact` / `diff-impact`** directly answers "what will break?" -- **Fast** — full build + query in seconds, not minutes +Codegraph v2.1.0 on Windows x64 with the native engine is **solid**. All 22 commands work correctly, edge cases are handled gracefully, the test suite is comprehensive (494 tests), and the native binary installs cleanly as an optional dependency. -### The AI Agent Workflow This Enables +The bugs found (missing `--no-tests` wiring on 6 CLI commands + 4 MCP tools, hook not catching `gh pr create`) are fixed in this PR. The engine parity gap is the most significant technical observation — worth tracking but not blocking since both engines produce usable graphs. -``` -1. codegraph map --limit 30 --json → "What are the key modules?" -2. codegraph structure --json → "How is the project organized?" -3. codegraph where --json → "Where exactly is this?" -4. codegraph context --json → "Give me everything I need to modify this" -5. codegraph fn-impact --json → "What will break if I change this?" -6. codegraph diff-impact --json → "Did my changes break anything?" -``` +**Rating: 9/10** — Production-ready with minor consistency issues. -With these improvements, an AI agent can go from "I don't know this codebase" to "I have full context for this change" in 3-4 tool calls instead of 15-20 file reads. That's the difference between a confident, accurate AI assistant and one that guesses and backtracks. diff --git a/src/cli.js b/src/cli.js index d832d2d1..1c5f3aae 100644 --- a/src/cli.js +++ b/src/cli.js @@ -62,18 +62,20 @@ program .command('query ') .description('Find a function/class, show callers and callees') .option('-d, --db ', 'Path to graph.db') + .option('-T, --no-tests', 'Exclude test/spec files from results') .option('-j, --json', 'Output as JSON') .action((name, opts) => { - queryName(name, opts.db, { json: opts.json }); + queryName(name, opts.db, { noTests: !opts.tests, json: opts.json }); }); program .command('impact ') .description('Show what depends on this file (transitive)') .option('-d, --db ', 'Path to graph.db') + .option('-T, --no-tests', 'Exclude test/spec files from results') .option('-j, --json', 'Output as JSON') .action((file, opts) => { - impactAnalysis(file, opts.db, { json: opts.json }); + impactAnalysis(file, opts.db, { noTests: !opts.tests, json: opts.json }); }); program @@ -81,27 +83,30 @@ program .description('High-level module overview with most-connected nodes') .option('-d, --db ', 'Path to graph.db') .option('-n, --limit ', 'Number of top nodes', '20') + .option('-T, --no-tests', 'Exclude test/spec files from results') .option('-j, --json', 'Output as JSON') .action((opts) => { - moduleMap(opts.db, parseInt(opts.limit, 10), { json: opts.json }); + moduleMap(opts.db, parseInt(opts.limit, 10), { noTests: !opts.tests, json: opts.json }); }); program .command('stats') .description('Show graph health overview: nodes, edges, languages, cycles, hotspots, embeddings') .option('-d, --db ', 'Path to graph.db') + .option('-T, --no-tests', 'Exclude test/spec files from results') .option('-j, --json', 'Output as JSON') .action((opts) => { - stats(opts.db, { json: opts.json }); + stats(opts.db, { noTests: !opts.tests, json: opts.json }); }); program .command('deps ') .description('Show what this file imports and what imports it') .option('-d, --db ', 'Path to graph.db') + .option('-T, --no-tests', 'Exclude test/spec files from results') .option('-j, --json', 'Output as JSON') .action((file, opts) => { - fileDeps(file, opts.db, { json: opts.json }); + fileDeps(file, opts.db, { noTests: !opts.tests, json: opts.json }); }); program @@ -159,7 +164,7 @@ program .option('-k, --kind ', 'Filter to a specific symbol kind') .option('--no-source', 'Metadata only (skip source extraction)') .option('--include-tests', 'Include test source code') - .option('-T, --no-tests', 'Exclude test files from callers') + .option('-T, --no-tests', 'Exclude test/spec files from results') .option('-j, --json', 'Output as JSON') .action((name, opts) => { if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { @@ -181,7 +186,7 @@ program .command('explain ') .description('Structural summary of a file or function (no LLM needed)') .option('-d, --db ', 'Path to graph.db') - .option('-T, --no-tests', 'Exclude test/spec files') + .option('-T, --no-tests', 'Exclude test/spec files from results') .option('-j, --json', 'Output as JSON') .action((target, opts) => { explain(target, opts.db, { noTests: !opts.tests, json: opts.json }); @@ -192,7 +197,7 @@ program .description('Find where a symbol is defined and used (minimal, fast lookup)') .option('-d, --db ', 'Path to graph.db') .option('-f, --file ', 'File overview: list symbols, imports, exports') - .option('-T, --no-tests', 'Exclude test/spec files') + .option('-T, --no-tests', 'Exclude test/spec files from results') .option('-j, --json', 'Output as JSON') .action((name, opts) => { if (!name && !opts.file) { @@ -395,7 +400,7 @@ program .option('-d, --db ', 'Path to graph.db') .option('-m, --model ', 'Override embedding model (auto-detects from DB)') .option('-n, --limit ', 'Max results', '15') - .option('-T, --no-tests', 'Exclude test/spec files') + .option('-T, --no-tests', 'Exclude test/spec files from results') .option('--min-score ', 'Minimum similarity threshold', '0.2') .option('-k, --kind ', 'Filter by kind: function, method, class') .option('--file ', 'Filter by file path pattern') @@ -444,6 +449,7 @@ program .option('-n, --limit ', 'Number of results', '10') .option('--metric ', 'fan-in | fan-out | density | coupling', 'fan-in') .option('--level ', 'file | directory', 'file') + .option('-T, --no-tests', 'Exclude test/spec files from results') .option('-j, --json', 'Output as JSON') .action(async (opts) => { const { hotspotsData, formatHotspots } = await import('./structure.js'); @@ -451,6 +457,7 @@ program metric: opts.metric, level: opts.level, limit: parseInt(opts.limit, 10), + noTests: !opts.tests, }); if (opts.json) { console.log(JSON.stringify(data, null, 2)); diff --git a/src/mcp.js b/src/mcp.js index b50a0641..d16a6033 100644 --- a/src/mcp.js +++ b/src/mcp.js @@ -30,6 +30,7 @@ const BASE_TOOLS = [ description: 'Traversal depth for transitive callers', default: 2, }, + no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, }, required: ['name'], }, @@ -41,6 +42,7 @@ const BASE_TOOLS = [ type: 'object', properties: { file: { type: 'string', description: 'File path (partial match supported)' }, + no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, }, required: ['file'], }, @@ -52,6 +54,7 @@ const BASE_TOOLS = [ type: 'object', properties: { file: { type: 'string', description: 'File path to analyze' }, + no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, }, required: ['file'], }, @@ -71,6 +74,7 @@ const BASE_TOOLS = [ type: 'object', properties: { limit: { type: 'number', description: 'Number of top files to show', default: 20 }, + no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, }, }, }, @@ -282,6 +286,7 @@ const BASE_TOOLS = [ description: 'Rank files or directories', }, limit: { type: 'number', description: 'Number of results to return', default: 10 }, + no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, }, }, }, @@ -408,13 +413,13 @@ export async function startMCPServer(customDbPath, options = {}) { let result; switch (name) { case 'query_function': - result = queryNameData(args.name, dbPath); + result = queryNameData(args.name, dbPath, { noTests: args.no_tests }); break; case 'file_deps': - result = fileDepsData(args.file, dbPath); + result = fileDepsData(args.file, dbPath, { noTests: args.no_tests }); break; case 'impact_analysis': - result = impactAnalysisData(args.file, dbPath); + result = impactAnalysisData(args.file, dbPath, { noTests: args.no_tests }); break; case 'find_cycles': { const db = new Database(findDbPath(dbPath), { readonly: true }); @@ -424,7 +429,7 @@ export async function startMCPServer(customDbPath, options = {}) { break; } case 'module_map': - result = moduleMapData(dbPath, args.limit || 20); + result = moduleMapData(dbPath, args.limit || 20, { noTests: args.no_tests }); break; case 'fn_deps': result = fnDepsData(args.name, dbPath, { @@ -536,6 +541,7 @@ export async function startMCPServer(customDbPath, options = {}) { metric: args.metric, level: args.level, limit: args.limit, + noTests: args.no_tests, }); break; } diff --git a/src/queries.js b/src/queries.js index d99e545b..9d43298a 100644 --- a/src/queries.js +++ b/src/queries.js @@ -190,16 +190,18 @@ function kindIcon(kind) { // ─── Data-returning functions ─────────────────────────────────────────── -export function queryNameData(name, customDbPath) { +export function queryNameData(name, customDbPath, opts = {}) { const db = openReadonlyOrFail(customDbPath); - const nodes = db.prepare(`SELECT * FROM nodes WHERE name LIKE ?`).all(`%${name}%`); + const noTests = opts.noTests || false; + let nodes = db.prepare(`SELECT * FROM nodes WHERE name LIKE ?`).all(`%${name}%`); + if (noTests) nodes = nodes.filter((n) => !isTestFile(n.file)); if (nodes.length === 0) { db.close(); return { query: name, results: [] }; } const results = nodes.map((node) => { - const callees = db + let callees = db .prepare(` SELECT n.name, n.kind, n.file, n.line, e.kind as edge_kind FROM edges e JOIN nodes n ON e.target_id = n.id @@ -207,7 +209,7 @@ export function queryNameData(name, customDbPath) { `) .all(node.id); - const callers = db + let callers = db .prepare(` SELECT n.name, n.kind, n.file, n.line, e.kind as edge_kind FROM edges e JOIN nodes n ON e.source_id = n.id @@ -215,6 +217,11 @@ export function queryNameData(name, customDbPath) { `) .all(node.id); + if (noTests) { + callees = callees.filter((c) => !isTestFile(c.file)); + callers = callers.filter((c) => !isTestFile(c.file)); + } + return { name: node.name, kind: node.kind, @@ -728,11 +735,40 @@ export function listFunctionsData(customDbPath, opts = {}) { return { count: rows.length, functions: rows }; } -export function statsData(customDbPath) { +export function statsData(customDbPath, opts = {}) { const db = openReadonlyOrFail(customDbPath); + const noTests = opts.noTests || false; + + // Build set of test file IDs for filtering nodes and edges + let testFileIds = null; + if (noTests) { + const allFileNodes = db.prepare("SELECT id, file FROM nodes WHERE kind = 'file'").all(); + testFileIds = new Set(); + const testFiles = new Set(); + for (const n of allFileNodes) { + if (isTestFile(n.file)) { + testFileIds.add(n.id); + testFiles.add(n.file); + } + } + // Also collect non-file node IDs that belong to test files + const allNodes = db.prepare('SELECT id, file FROM nodes').all(); + for (const n of allNodes) { + if (testFiles.has(n.file)) testFileIds.add(n.id); + } + } // Node breakdown by kind - const nodeRows = db.prepare('SELECT kind, COUNT(*) as c FROM nodes GROUP BY kind').all(); + let nodeRows; + if (noTests) { + const allNodes = db.prepare('SELECT id, kind, file FROM nodes').all(); + const filtered = allNodes.filter((n) => !testFileIds.has(n.id)); + const counts = {}; + for (const n of filtered) counts[n.kind] = (counts[n.kind] || 0) + 1; + nodeRows = Object.entries(counts).map(([kind, c]) => ({ kind, c })); + } else { + nodeRows = db.prepare('SELECT kind, COUNT(*) as c FROM nodes GROUP BY kind').all(); + } const nodesByKind = {}; let totalNodes = 0; for (const r of nodeRows) { @@ -741,7 +777,18 @@ export function statsData(customDbPath) { } // Edge breakdown by kind - const edgeRows = db.prepare('SELECT kind, COUNT(*) as c FROM edges GROUP BY kind').all(); + let edgeRows; + if (noTests) { + const allEdges = db.prepare('SELECT source_id, target_id, kind FROM edges').all(); + const filtered = allEdges.filter( + (e) => !testFileIds.has(e.source_id) && !testFileIds.has(e.target_id), + ); + const counts = {}; + for (const e of filtered) counts[e.kind] = (counts[e.kind] || 0) + 1; + edgeRows = Object.entries(counts).map(([kind, c]) => ({ kind, c })); + } else { + edgeRows = db.prepare('SELECT kind, COUNT(*) as c FROM edges GROUP BY kind').all(); + } const edgesByKind = {}; let totalEdges = 0; for (const r of edgeRows) { @@ -756,7 +803,8 @@ export function statsData(customDbPath) { extToLang.set(ext, entry.id); } } - const fileNodes = db.prepare("SELECT file FROM nodes WHERE kind = 'file'").all(); + let fileNodes = db.prepare("SELECT file FROM nodes WHERE kind = 'file'").all(); + if (noTests) fileNodes = fileNodes.filter((n) => !isTestFile(n.file)); const byLanguage = {}; for (const row of fileNodes) { const ext = path.extname(row.file).toLowerCase(); @@ -779,10 +827,10 @@ export function statsData(customDbPath) { WHERE n.kind = 'file' ORDER BY (SELECT COUNT(*) FROM edges WHERE target_id = n.id) + (SELECT COUNT(*) FROM edges WHERE source_id = n.id) DESC - LIMIT 5 `) .all(); - const hotspots = hotspotRows.map((r) => ({ + const filteredHotspots = noTests ? hotspotRows.filter((r) => !isTestFile(r.file)) : hotspotRows; + const hotspots = filteredHotspots.slice(0, 5).map((r) => ({ file: r.file, fanIn: r.fan_in, fanOut: r.fan_out, @@ -881,7 +929,7 @@ export function statsData(customDbPath) { } export function stats(customDbPath, opts = {}) { - const data = statsData(customDbPath); + const data = statsData(customDbPath, { noTests: opts.noTests }); if (opts.json) { console.log(JSON.stringify(data, null, 2)); return; @@ -979,7 +1027,7 @@ export function stats(customDbPath, opts = {}) { // ─── Human-readable output (original formatting) ─────────────────────── export function queryName(name, customDbPath, opts = {}) { - const data = queryNameData(name, customDbPath); + const data = queryNameData(name, customDbPath, { noTests: opts.noTests }); if (opts.json) { console.log(JSON.stringify(data, null, 2)); return; diff --git a/tests/integration/queries.test.js b/tests/integration/queries.test.js index 8d5d5cf9..6c982bca 100644 --- a/tests/integration/queries.test.js +++ b/tests/integration/queries.test.js @@ -34,6 +34,7 @@ import { impactAnalysisData, moduleMapData, queryNameData, + statsData, whereData, } from '../../src/queries.js'; @@ -470,3 +471,86 @@ describe('whereData', () => { expect(data.results).toHaveLength(0); }); }); + +// ─── noTests filtering ─────────────────────────────────────────────── + +describe('noTests filtering', () => { + test('queryNameData excludes test file nodes and callers', () => { + const all = queryNameData('authenticate', dbPath); + const filtered = queryNameData('authenticate', dbPath, { noTests: true }); + + const allFn = all.results.find((r) => r.name === 'authenticate' && r.kind === 'function'); + const filteredFn = filtered.results.find( + (r) => r.name === 'authenticate' && r.kind === 'function', + ); + + // testAuthenticate should be in callers without filter + expect(allFn.callers.map((c) => c.name)).toContain('testAuthenticate'); + // testAuthenticate should be excluded with noTests + expect(filteredFn.callers.map((c) => c.name)).not.toContain('testAuthenticate'); + }); + + test('queryNameData excludes test file results', () => { + const all = queryNameData('testAuthenticate', dbPath); + const filtered = queryNameData('testAuthenticate', dbPath, { noTests: true }); + + expect(all.results).toHaveLength(1); + expect(filtered.results).toHaveLength(0); + }); + + test('statsData excludes test files from counts', () => { + const all = statsData(dbPath); + const filtered = statsData(dbPath, { noTests: true }); + + // File count should be lower + expect(filtered.files.total).toBeLessThan(all.files.total); + // Node count should be lower (test file + testAuthenticate removed) + expect(filtered.nodes.total).toBeLessThan(all.nodes.total); + // Edge count should be lower (test import + test call edge removed) + expect(filtered.edges.total).toBeLessThan(all.edges.total); + }); + + test('statsData hotspots exclude test files', () => { + const filtered = statsData(dbPath, { noTests: true }); + for (const h of filtered.hotspots) { + expect(h.file).not.toMatch(/\.test\./); + } + }); + + test('impactAnalysisData excludes test dependents', () => { + const all = impactAnalysisData('auth.js', dbPath); + const filtered = impactAnalysisData('auth.js', dbPath, { noTests: true }); + + const allFiles = Object.values(all.levels) + .flat() + .map((f) => f.file); + const filteredFiles = Object.values(filtered.levels) + .flat() + .map((f) => f.file); + + expect(allFiles).toContain('auth.test.js'); + expect(filteredFiles).not.toContain('auth.test.js'); + }); + + test('fileDepsData excludes test importers', () => { + const all = fileDepsData('auth.js', dbPath); + const filtered = fileDepsData('auth.js', dbPath, { noTests: true }); + + const allImportedBy = all.results[0].importedBy.map((i) => i.file); + const filteredImportedBy = filtered.results[0].importedBy.map((i) => i.file); + + expect(allImportedBy).toContain('auth.test.js'); + expect(filteredImportedBy).not.toContain('auth.test.js'); + }); + + test('moduleMapData excludes test files', () => { + const all = moduleMapData(dbPath, 20); + const filtered = moduleMapData(dbPath, 20, { noTests: true }); + + const allFiles = all.topNodes.map((n) => n.file); + const filteredFiles = filtered.topNodes.map((n) => n.file); + + expect(allFiles).toContain('auth.test.js'); + expect(filteredFiles).not.toContain('auth.test.js'); + }); +}); diff --git a/tests/unit/mcp.test.js b/tests/unit/mcp.test.js index 22a4f831..4199467c 100644 --- a/tests/unit/mcp.test.js +++ b/tests/unit/mcp.test.js @@ -523,7 +523,9 @@ describe('startMCPServer handler dispatch', () => { params: { name: 'query_function', arguments: { name: 'test', repo: 'my-project' } }, }); expect(result.isError).toBeUndefined(); - expect(queryMock).toHaveBeenCalledWith('test', '/resolved/path/.codegraph/graph.db'); + expect(queryMock).toHaveBeenCalledWith('test', '/resolved/path/.codegraph/graph.db', { + noTests: undefined, + }); vi.doUnmock('@modelcontextprotocol/sdk/server/index.js'); vi.doUnmock('@modelcontextprotocol/sdk/server/stdio.js'); @@ -677,7 +679,7 @@ describe('startMCPServer handler dispatch', () => { params: { name: 'query_function', arguments: { name: 'test', repo: 'my-repo' } }, }); expect(result.isError).toBeUndefined(); - expect(queryMock).toHaveBeenCalledWith('test', '/resolved/db'); + expect(queryMock).toHaveBeenCalledWith('test', '/resolved/db', { noTests: undefined }); vi.doUnmock('@modelcontextprotocol/sdk/server/index.js'); vi.doUnmock('@modelcontextprotocol/sdk/server/stdio.js');