diff --git a/.claude/.gitignore b/.claude/.gitignore new file mode 100644 index 00000000..8f600b7e --- /dev/null +++ b/.claude/.gitignore @@ -0,0 +1 @@ +session-edits.log diff --git a/.claude/hooks/check-readme.sh b/.claude/hooks/check-readme.sh index b6862c1c..5f045204 100644 --- a/.claude/hooks/check-readme.sh +++ b/.claude/hooks/check-readme.sh @@ -3,7 +3,14 @@ # Runs as a PreToolUse hook on Bash tool calls. INPUT=$(cat) -COMMAND=$(echo "$INPUT" | jq -r '.tool_input.command // empty') +COMMAND=$(echo "$INPUT" | node -e " + let d=''; + process.stdin.on('data',c=>d+=c); + process.stdin.on('end',()=>{ + const p=JSON.parse(d).tool_input?.command||''; + if(p)process.stdout.write(p); + }); +" 2>/dev/null) || true # Only act on git commit commands if ! echo "$COMMAND" | grep -qE '^\s*git\s+commit'; then @@ -30,13 +37,16 @@ if [ "$NEEDS_CHECK" -gt 0 ]; then [ "$CLAUDE_STAGED" -eq 0 ] && MISSING="${MISSING:+$MISSING, }CLAUDE.md" [ "$ROADMAP_STAGED" -eq 0 ] && MISSING="${MISSING:+$MISSING, }ROADMAP.md" - jq -n --arg missing "$MISSING" '{ - hookSpecificOutput: { - hookEventName: "PreToolUse", - permissionDecision: "deny", - permissionDecisionReason: ($missing + " not staged but source files were changed. Review whether these docs need updating — README.md (language support table, feature list, command docs), CLAUDE.md (architecture table, supported languages, key design decisions), and ROADMAP.md (phase status, new features, deliverables). If they truly do not need changes, re-run the commit with docs check acknowledged.") - } - }' + node -e " + const missing = process.argv[1]; + console.log(JSON.stringify({ + hookSpecificOutput: { + hookEventName: 'PreToolUse', + permissionDecision: 'deny', + permissionDecisionReason: missing + ' not staged but source files were changed. Review whether these docs need updating — README.md (language support table, feature list, command docs), CLAUDE.md (architecture table, supported languages, key design decisions), and ROADMAP.md (phase status, new features, deliverables). If they truly do not need changes, re-run the commit with docs check acknowledged.' + } + })); + " "$MISSING" exit 0 fi diff --git a/.claude/hooks/enrich-context.sh b/.claude/hooks/enrich-context.sh index e22e7adc..83d7ef8f 100644 --- a/.claude/hooks/enrich-context.sh +++ b/.claude/hooks/enrich-context.sh @@ -10,7 +10,15 @@ INPUT=$(cat) # Extract file path based on tool type # Read tool uses tool_input.file_path, Grep uses tool_input.path -FILE_PATH=$(echo "$INPUT" | jq -r '.tool_input.file_path // .tool_input.path // empty' 2>/dev/null) +FILE_PATH=$(echo "$INPUT" | node -e " + let d=''; + process.stdin.on('data',c=>d+=c); + process.stdin.on('end',()=>{ + const o=JSON.parse(d).tool_input||{}; + const p=o.file_path||o.path||''; + if(p)process.stdout.write(p); + }); +" 2>/dev/null) || true # Guard: no file path found if [ -z "$FILE_PATH" ]; then @@ -30,8 +38,9 @@ fi # Convert absolute path to relative (strip project dir prefix) REL_PATH="$FILE_PATH" -if [[ "$FILE_PATH" == "${CLAUDE_PROJECT_DIR}"* ]]; then - REL_PATH="${FILE_PATH#"${CLAUDE_PROJECT_DIR}"/}" +PROJECT_DIR="${CLAUDE_PROJECT_DIR:-.}" +if [[ "$FILE_PATH" == "${PROJECT_DIR}"* ]]; then + REL_PATH="${FILE_PATH#"${PROJECT_DIR}"/}" fi # Normalize backslashes to forward slashes (Windows compatibility) REL_PATH="${REL_PATH//\\//}" @@ -50,13 +59,22 @@ if [ -z "$DEPS" ] || [ "$DEPS" = "null" ]; then fi # Output as informational context (never deny) -echo "$DEPS" | jq -c '{ - hookSpecificOutput: ( - "Codegraph context for " + (.file // "unknown") + ":\n" + - " Imports: " + ((.results[0].imports // []) | length | tostring) + " files\n" + - " Imported by: " + ((.results[0].importedBy // []) | length | tostring) + " files\n" + - " Definitions: " + ((.results[0].definitions // []) | length | tostring) + " symbols" - ) -}' 2>/dev/null || true +echo "$DEPS" | node -e " + let d=''; + process.stdin.on('data',c=>d+=c); + process.stdin.on('end',()=>{ + try { + const o=JSON.parse(d); + const r=o.results?.[0]||{}; + const imports=(r.imports||[]).length; + const importedBy=(r.importedBy||[]).length; + const defs=(r.definitions||[]).length; + const file=o.file||'unknown'; + console.log(JSON.stringify({ + hookSpecificOutput: 'Codegraph context for '+file+':\\n Imports: '+imports+' files\\n Imported by: '+importedBy+' files\\n Definitions: '+defs+' symbols' + })); + } catch(e) {} + }); +" 2>/dev/null || true exit 0 diff --git a/.claude/hooks/guard-git.sh b/.claude/hooks/guard-git.sh new file mode 100644 index 00000000..adf0d1e1 --- /dev/null +++ b/.claude/hooks/guard-git.sh @@ -0,0 +1,111 @@ +#!/usr/bin/env bash +# guard-git.sh — PreToolUse hook for Bash tool calls +# Blocks dangerous git commands that interfere with parallel sessions +# and validates commits against the session edit log. + +set -euo pipefail + +INPUT=$(cat) + +# Extract the command from tool_input JSON +COMMAND=$(echo "$INPUT" | node -e " + let d=''; + process.stdin.on('data',c=>d+=c); + process.stdin.on('end',()=>{ + const p=JSON.parse(d).tool_input?.command||''; + if(p)process.stdout.write(p); + }); +" 2>/dev/null) || true + +if [ -z "$COMMAND" ]; then + exit 0 +fi + +# Only act on git commands +if ! echo "$COMMAND" | grep -qE '^\s*git\s+'; then + exit 0 +fi + +deny() { + local reason="$1" + node -e " + console.log(JSON.stringify({ + hookSpecificOutput: { + hookEventName: 'PreToolUse', + permissionDecision: 'deny', + permissionDecisionReason: process.argv[1] + } + })); + " "$reason" + exit 0 +} + +# --- Block dangerous commands --- + +# git add . / git add -A / git add --all (broad staging) +if echo "$COMMAND" | grep -qE '^\s*git\s+add\s+(\.\s*$|-A|--all)'; then + deny "BLOCKED: 'git add .' / 'git add -A' stages ALL changes including other sessions' work. Stage specific files instead: git add " +fi + +# git reset (unstaging / hard reset) +if echo "$COMMAND" | grep -qE '^\s*git\s+reset'; then + deny "BLOCKED: 'git reset' can unstage or destroy other sessions' work. To unstage your own files, use: git restore --staged " +fi + +# git checkout -- (reverting files) +if echo "$COMMAND" | grep -qE '^\s*git\s+checkout\s+--'; then + deny "BLOCKED: 'git checkout -- ' reverts working tree changes and may destroy other sessions' edits. If you need to discard your own changes, be explicit about which files." +fi + +# git restore (reverting) — EXCEPT git restore --staged (safe unstaging) +if echo "$COMMAND" | grep -qE '^\s*git\s+restore'; then + if ! echo "$COMMAND" | grep -qE '^\s*git\s+restore\s+--staged'; then + deny "BLOCKED: 'git restore ' reverts working tree changes and may destroy other sessions' edits. To unstage files safely, use: git restore --staged " + fi +fi + +# git clean (delete untracked files) +if echo "$COMMAND" | grep -qE '^\s*git\s+clean'; then + deny "BLOCKED: 'git clean' deletes untracked files that may belong to other sessions." +fi + +# git stash (hides all changes) +if echo "$COMMAND" | grep -qE '^\s*git\s+stash'; then + deny "BLOCKED: 'git stash' hides all working tree changes including other sessions' work. In worktree mode, commit your changes directly instead." +fi + +# --- Commit validation against edit log --- + +if echo "$COMMAND" | grep -qE '^\s*git\s+commit'; then + PROJECT_DIR="${CLAUDE_PROJECT_DIR:-.}" + LOG_FILE="$PROJECT_DIR/.claude/session-edits.log" + + # If no edit log exists, allow (backward compat for sessions without tracking) + if [ ! -f "$LOG_FILE" ] || [ ! -s "$LOG_FILE" ]; then + exit 0 + fi + + # Get unique edited files from log + EDITED_FILES=$(awk '{print $2}' "$LOG_FILE" | sort -u) + + # Get staged files + STAGED_FILES=$(git diff --cached --name-only 2>/dev/null) || true + + if [ -z "$STAGED_FILES" ]; then + exit 0 + fi + + # Find staged files that weren't edited in this session + UNEXPECTED="" + while IFS= read -r staged_file; do + if ! echo "$EDITED_FILES" | grep -qxF "$staged_file"; then + UNEXPECTED="${UNEXPECTED:+$UNEXPECTED, }$staged_file" + fi + done <<< "$STAGED_FILES" + + if [ -n "$UNEXPECTED" ]; then + deny "BLOCKED: These staged files were NOT edited in this session: $UNEXPECTED. They may belong to another session. Commit only your files: git commit -m \"msg\"" + fi +fi + +exit 0 diff --git a/.claude/hooks/rebuild-graph.sh b/.claude/hooks/rebuild-graph.sh new file mode 100644 index 00000000..e85848ad --- /dev/null +++ b/.claude/hooks/rebuild-graph.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash +# rebuild-graph.sh — PostToolUse hook for Edit and Write tools +# Incrementally rebuilds the codegraph after source file edits. +# Always exits 0 (informational only, never blocks). + +set -euo pipefail + +INPUT=$(cat) + +# Extract file path using node (jq may not be available on Windows) +FILE_PATH=$(echo "$INPUT" | node -e " + let d=''; + process.stdin.on('data',c=>d+=c); + process.stdin.on('end',()=>{ + const p=JSON.parse(d).tool_input?.file_path||''; + if(p)process.stdout.write(p); + }); +" 2>/dev/null) || true + +if [ -z "$FILE_PATH" ]; then + exit 0 +fi + +# Only rebuild for source files codegraph tracks +# Skip docs, configs, test fixtures, and non-code files +case "$FILE_PATH" in + *.js|*.ts|*.tsx|*.jsx|*.py|*.go|*.rs|*.java|*.cs|*.php|*.rb|*.tf|*.hcl) + ;; + *) + exit 0 + ;; +esac + +# Skip test fixtures — they're copied to tmp dirs anyway +if echo "$FILE_PATH" | grep -qE '(fixtures|__fixtures__|testdata)/'; then + exit 0 +fi + +# Guard: codegraph DB must exist (project has been built at least once) +DB_PATH="${CLAUDE_PROJECT_DIR:-.}/.codegraph/graph.db" +if [ ! -f "$DB_PATH" ]; then + exit 0 +fi + +# Run incremental build (skips unchanged files via hash check) +if command -v codegraph &>/dev/null; then + codegraph build "${CLAUDE_PROJECT_DIR:-.}" -d "$DB_PATH" 2>/dev/null || true +else + node "${CLAUDE_PROJECT_DIR}/src/cli.js" build "${CLAUDE_PROJECT_DIR:-.}" -d "$DB_PATH" 2>/dev/null || true +fi + +exit 0 diff --git a/.claude/hooks/track-edits.sh b/.claude/hooks/track-edits.sh new file mode 100644 index 00000000..c6cb84e3 --- /dev/null +++ b/.claude/hooks/track-edits.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +# track-edits.sh — PostToolUse hook for Edit and Write tools +# Logs each edited file path to .claude/session-edits.log so that +# guard-git.sh can validate commits against actually-edited files. +# In worktrees each session gets its own log automatically. +# Always exits 0 (informational only, never blocks). + +set -euo pipefail + +INPUT=$(cat) + +# Extract file_path from tool_input JSON +FILE_PATH=$(echo "$INPUT" | node -e " + let d=''; + process.stdin.on('data',c=>d+=c); + process.stdin.on('end',()=>{ + const p=JSON.parse(d).tool_input?.file_path||''; + if(p)process.stdout.write(p); + }); +" 2>/dev/null) || true + +if [ -z "$FILE_PATH" ]; then + exit 0 +fi + +PROJECT_DIR="${CLAUDE_PROJECT_DIR:-.}" +LOG_FILE="$PROJECT_DIR/.claude/session-edits.log" + +# Normalize to relative path with forward slashes +REL_PATH=$(node -e " + const path = require('path'); + const abs = path.resolve(process.argv[1]); + const base = path.resolve(process.argv[2]); + const rel = path.relative(base, abs).split(path.sep).join('/'); + process.stdout.write(rel); +" "$FILE_PATH" "$PROJECT_DIR" 2>/dev/null) || true + +if [ -z "$REL_PATH" ]; then + exit 0 +fi + +# Append timestamped entry +mkdir -p "$(dirname "$LOG_FILE")" +echo "$(date -u +%Y-%m-%dT%H:%M:%SZ) $REL_PATH" >> "$LOG_FILE" + +exit 0 diff --git a/.claude/settings.json b/.claude/settings.json index 8c3d0d60..fa9c6edf 100644 --- a/.claude/settings.json +++ b/.claude/settings.json @@ -8,6 +8,11 @@ "type": "command", "command": "bash \"$CLAUDE_PROJECT_DIR/.claude/hooks/check-readme.sh\"", "timeout": 10 + }, + { + "type": "command", + "command": "bash \"$CLAUDE_PROJECT_DIR/.claude/hooks/guard-git.sh\"", + "timeout": 10 } ] }, @@ -31,6 +36,23 @@ } ] } + ], + "PostToolUse": [ + { + "matcher": "Edit|Write", + "hooks": [ + { + "type": "command", + "command": "bash \"$CLAUDE_PROJECT_DIR/.claude/hooks/rebuild-graph.sh\"", + "timeout": 30 + }, + { + "type": "command", + "command": "bash \"$CLAUDE_PROJECT_DIR/.claude/hooks/track-edits.sh\"", + "timeout": 5 + } + ] + } ] } } diff --git a/.github/workflows/embedding-regression.yml b/.github/workflows/embedding-regression.yml new file mode 100644 index 00000000..32bf31e1 --- /dev/null +++ b/.github/workflows/embedding-regression.yml @@ -0,0 +1,41 @@ +name: Embedding Regression + +on: + schedule: + - cron: '0 6 * * 1' # Monday 6am UTC + workflow_dispatch: + pull_request: + paths: + - 'src/embedder.js' + - 'tests/search/**' + - 'package.json' + +concurrency: + group: embedding-regression-${{ github.ref }} + cancel-in-progress: true + +jobs: + embedding-regression: + runs-on: ubuntu-latest + name: Embedding regression tests + timeout-minutes: 15 + + steps: + - uses: actions/checkout@v4 + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: 22 + + - name: Install dependencies + run: npm install + + - name: Cache HuggingFace models + uses: actions/cache@v4 + with: + path: ~/.cache/huggingface + key: hf-models-minilm-v1 + + - name: Run embedding regression tests + run: npx vitest run tests/search/embedding-regression.test.js diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 5a65aee4..40e0d934 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -1,19 +1,14 @@ name: Publish on: + push: + branches: [main] release: types: [published] - workflow_dispatch: - inputs: - version-override: - description: "Override auto-detected bump (patch/minor/major/x.y.z). Leave empty to auto-detect from commits." - required: false - type: string - dry-run: - description: "Dry run (skip actual publish)" - required: false - type: boolean - default: false + +concurrency: + group: publish + cancel-in-progress: true permissions: {} @@ -21,6 +16,8 @@ jobs: preflight: name: Preflight checks runs-on: ubuntu-latest + # Skip dev publish when the push is a stable release version bump + if: github.event_name == 'release' || !startsWith(github.event.head_commit.message, 'chore: release v') permissions: contents: read steps: @@ -31,6 +28,39 @@ jobs: - run: npm install - run: npm test + compute-version: + needs: preflight + runs-on: ubuntu-latest + permissions: + contents: read + outputs: + version: ${{ steps.compute.outputs.version }} + npm_tag: ${{ steps.compute.outputs.npm_tag }} + steps: + - uses: actions/checkout@v4 + + - name: Compute version + id: compute + run: | + CURRENT=$(node -p "require('./package.json').version") + + if [ "${{ github.event_name }}" = "release" ]; then + TAG="${{ github.event.release.tag_name }}" + VERSION="${TAG#v}" + NPM_TAG="latest" + echo "Stable release: $VERSION (from tag $TAG)" + else + IFS='.' read -r MAJOR MINOR PATCH <<< "$CURRENT" + NEXT_PATCH=$((PATCH + 1)) + SHORT_SHA=$(echo "${{ github.sha }}" | cut -c1-7) + VERSION="${MAJOR}.${MINOR}.${NEXT_PATCH}-dev.${SHORT_SHA}" + NPM_TAG="dev" + echo "Dev release: $VERSION" + fi + + echo "version=$VERSION" >> "$GITHUB_OUTPUT" + echo "npm_tag=$NPM_TAG" >> "$GITHUB_OUTPUT" + build-native: needs: preflight strategy: @@ -92,7 +122,7 @@ jobs: if-no-files-found: error publish: - needs: build-native + needs: [compute-version, build-native] runs-on: ubuntu-latest environment: npm-publish permissions: @@ -117,48 +147,18 @@ jobs: - run: npm install - - name: Configure git - run: | - git config user.name "github-actions[bot]" - git config user.email "github-actions[bot]@users.noreply.github.com" - - - name: Bump version + - name: Set version id: version + env: + VERSION: ${{ needs.compute-version.outputs.version }} run: | git checkout -- package-lock.json - CURRENT=$(node -p "require('./package.json').version") + npm version "$VERSION" --no-git-tag-version + node scripts/sync-native-versions.js + echo "Publishing version $VERSION" - if [ "${{ github.event_name }}" = "release" ]; then - # Extract version from the release tag instead of trusting package.json - TAG="${{ github.event.release.tag_name }}" - RELEASE_VERSION="${TAG#v}" - if [ "$CURRENT" != "$RELEASE_VERSION" ]; then - echo "::warning::package.json ($CURRENT) doesn't match release tag ($TAG) — bumping to match" - npx commit-and-tag-version --release-as "$RELEASE_VERSION" --skip.tag --skip.changelog - else - echo "Triggered by release event — version $CURRENT matches tag $TAG" - fi - else - OVERRIDE="${{ inputs.version-override }}" - if [ -n "$OVERRIDE" ] && [ "$CURRENT" = "$OVERRIDE" ]; then - echo "Version already at $OVERRIDE — skipping bump" - elif [ -n "$OVERRIDE" ]; then - npx commit-and-tag-version --release-as "$OVERRIDE" --skip.tag - else - npx commit-and-tag-version --skip.tag - fi - fi - - NEW_VERSION=$(node -p "require('./package.json').version") - echo "new_version=$NEW_VERSION" >> "$GITHUB_OUTPUT" - - # Verify the version was actually bumped (skip for release events and matching overrides) - if [ "${{ github.event_name }}" != "release" ] && [ "$NEW_VERSION" = "$CURRENT" ] && [ "$CURRENT" != "$OVERRIDE" ]; then - echo "::error::Version was not bumped (still $CURRENT). Check commit history or provide a version-override." - exit 1 - fi - - echo "Will publish version $NEW_VERSION (was $CURRENT)" + - name: Disable prepublishOnly + run: npm pkg set scripts.prepublishOnly="" - name: Download native artifacts uses: actions/download-artifact@v4 @@ -166,21 +166,23 @@ jobs: path: artifacts/ - name: Verify version not already on npm + env: + VERSION: ${{ needs.compute-version.outputs.version }} run: | - VERSION="${{ steps.version.outputs.new_version }}" PKG="@optave/codegraph" echo "Checking if $PKG@$VERSION already exists on npm..." if npm view "$PKG@$VERSION" version 2>/dev/null; then - echo "::error::$PKG@$VERSION is already published on npm. Bump to a higher version." + echo "::error::$PKG@$VERSION is already published on npm." exit 1 fi echo "$PKG@$VERSION is not yet published — proceeding" - name: Publish platform packages + env: + VERSION: ${{ needs.compute-version.outputs.version }} + NPM_TAG: ${{ needs.compute-version.outputs.npm_tag }} shell: bash run: | - VERSION="${{ steps.version.outputs.new_version }}" - declare -A PACKAGES=( ["linux-x64"]="@optave/codegraph-linux-x64-gnu" ["darwin-arm64"]="@optave/codegraph-darwin-arm64" @@ -214,28 +216,29 @@ jobs: } PKGJSON - echo "Publishing ${pkg_name}@${VERSION}" - if [ "${{ inputs.dry-run }}" = "true" ]; then - npm publish "./pkg/$platform" --access public --provenance --dry-run - else - npm publish "./pkg/$platform" --access public --provenance - fi + echo "Publishing ${pkg_name}@${VERSION} with --tag ${NPM_TAG}" + npm publish "./pkg/$platform" --access public --provenance --tag "$NPM_TAG" done - - name: Publish main package (dry run) - if: inputs.dry-run - run: npm publish --access public --provenance --dry-run - - name: Publish main package - if: "!inputs.dry-run" - run: npm publish --access public --provenance + env: + NPM_TAG: ${{ needs.compute-version.outputs.npm_tag }} + run: npm publish --access public --provenance --tag "$NPM_TAG" + + # ── Stable-only: version bump PR and tag ── + + - name: Configure git + if: github.event_name == 'release' + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" - name: Push version bump via PR - if: "!inputs.dry-run" + if: github.event_name == 'release' env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + VERSION: ${{ needs.compute-version.outputs.version }} run: | - VERSION="${{ steps.version.outputs.new_version }}" TAG="v${VERSION}" BRANCH="release/v${VERSION}" @@ -243,6 +246,8 @@ jobs: if git diff --quiet HEAD; then echo "No version bump commit to push — skipping PR" else + git add -A + git commit -m "chore: release v${VERSION}" git push origin "HEAD:refs/heads/${BRANCH}" gh pr create \ --base main \ @@ -259,3 +264,25 @@ jobs: git tag -a "$TAG" -m "release: $TAG" git push origin "$TAG" fi + + # ── Dev-only: summary with install instructions ── + + - name: Summary + if: github.event_name == 'push' + env: + VERSION: ${{ needs.compute-version.outputs.version }} + run: | + cat >> "$GITHUB_STEP_SUMMARY" <.js # See what imports/depends on a file If codegraph reports an error, crashes, or produces wrong results when analyzing itself, **fix the bug in the codebase** — don't just work around it. This is the best way to find and resolve real issues. +## Parallel Sessions + +Multiple Claude Code instances run concurrently in this repo. **Every session must start with `/worktree`** to get an isolated copy of the repo before making any changes. This prevents cross-session interference entirely. + +**Safety hooks** (`.claude/hooks/guard-git.sh` and `track-edits.sh`) enforce these rules automatically: + +- `guard-git.sh` (PreToolUse on Bash) **blocks**: `git add .`, `git add -A`, `git reset`, `git checkout -- `, `git restore `, `git clean`, `git stash`. It allows `git restore --staged ` for safe unstaging. +- `guard-git.sh` also **validates commits**: compares staged files against the session edit log and blocks commits that include files you didn't edit. +- `track-edits.sh` (PostToolUse on Edit/Write) logs every file you touch to `.claude/session-edits.log` (gitignored, per-worktree). + +**Rules:** +- Run `/worktree` before starting work +- Stage only files you explicitly changed +- Commit with specific file paths: `git commit -m "msg"` +- Ignore unexpected dirty files — they belong to another session +- Do not clean up lint/format issues in files you aren't working on + ## Git Conventions - Never add AI co-authorship lines (`Co-Authored-By` or similar) to commit messages. diff --git a/COMPETITIVE_ANALYSIS.md b/COMPETITIVE_ANALYSIS.md index 8d62483c..ffde3bfb 100644 --- a/COMPETITIVE_ANALYSIS.md +++ b/COMPETITIVE_ANALYSIS.md @@ -1,7 +1,7 @@ # Competitive Analysis — Code Graph / Code Intelligence Tools **Date:** 2026-02-22 -**Scope:** 21 code analysis tools compared against `@optave/codegraph` +**Scope:** 135+ code analysis tools evaluated, 80+ ranked against `@optave/codegraph` --- @@ -9,59 +9,160 @@ Ranked by weighted score across 6 dimensions (each 1–5): +### Tier 1: Direct Competitors (score ≥ 3.0) + +| # | Score | Project | Stars | Lang | License | Summary | +|---|-------|---------|-------|------|---------|---------| +| 1 | 4.5 | [joernio/joern](https://github.com/joernio/joern) | 2,956 | Scala | Apache-2.0 | Full CPG analysis platform for vulnerability discovery, Scala query DSL, multi-language, daily releases | +| 2 | 4.5 | [postrv/narsil-mcp](https://github.com/postrv/narsil-mcp) | 101 | Rust | Apache-2.0 | 90 MCP tools, 32 languages, taint analysis, SBOM, dead code, neural semantic search, single ~30MB binary | +| 3 | 4.5 | [vitali87/code-graph-rag](https://github.com/vitali87/code-graph-rag) | 1,916 | Python | MIT | Graph RAG with Memgraph, multi-provider AI, code editing, semantic search, MCP | +| 4 | 4.2 | [Fraunhofer-AISEC/cpg](https://github.com/Fraunhofer-AISEC/cpg) | 411 | Kotlin | Apache-2.0 | CPG library for 8+ languages with MCP module, Neo4j visualization, formal specs, LLVM IR support | +| 5 | 4.2 | [seatedro/glimpse](https://github.com/seatedro/glimpse) | 349 | Rust | MIT | Clipboard-first codebase-to-LLM tool with call graphs, token counting, LSP resolution | +| 6 | 4.0 | [SimplyLiz/CodeMCP (CKB)](https://github.com/SimplyLiz/CodeMCP) | 59 | Go | Custom | SCIP-based indexing, compound operations (83% token savings), CODEOWNERS, secret scanning | +| 7 | 3.9 | [harshkedia177/axon](https://github.com/harshkedia177/axon) | 29 | Python | None | 11-phase pipeline, KuzuDB, Leiden community detection, dead code, change coupling | +| 8 | 3.8 | [anrgct/autodev-codebase](https://github.com/anrgct/autodev-codebase) | 111 | TypeScript | None | 40+ languages, 7 embedding providers, Cytoscape.js visualization, LLM reranking | +| 9 | 3.8 | [ShiftLeftSecurity/codepropertygraph](https://github.com/ShiftLeftSecurity/codepropertygraph) | 564 | Scala | Apache-2.0 | CPG specification + Tinkergraph library, Scala query DSL, protobuf serialization (Joern foundation) | +| 10 | 3.8 | [Jakedismo/codegraph-rust](https://github.com/Jakedismo/codegraph-rust) | 142 | Rust | None | 100% Rust GraphRAG, SurrealDB, LSP-powered dataflow analysis, architecture boundary enforcement | +| 11 | 3.7 | [Anandb71/arbor](https://github.com/Anandb71/arbor) | 85 | Rust | MIT | Native GUI, confidence scoring, architectural role classification, fuzzy search, MCP | +| 12 | 3.7 | [JudiniLabs/mcp-code-graph](https://github.com/JudiniLabs/mcp-code-graph) | 380 | JavaScript | MIT | Cloud-hosted MCP server by CodeGPT, semantic search, dependency links (requires account) | +| 13 | 3.7 | [entrepeneur4lyf/code-graph-mcp](https://github.com/entrepeneur4lyf/code-graph-mcp) | 80 | Python | MIT | ast-grep for 25+ languages, complexity metrics, code smells, circular dependency detection | +| 14 | 3.7 | [cs-au-dk/jelly](https://github.com/cs-au-dk/jelly) | 417 | TypeScript | BSD-3 | Academic-grade JS/TS points-to analysis, call graphs, vulnerability exposure, 5 published papers | +| **15** | **3.6** | **[@optave/codegraph](https://github.com/optave/codegraph)** | — | **JS/Rust** | **Apache-2.0** | **Sub-second incremental rebuilds, dual engine (native Rust + WASM), 11 languages, MCP, zero-cost core + optional LLM enhancement** | +| 16 | 3.5 | [er77/code-graph-rag-mcp](https://github.com/er77/code-graph-rag-mcp) | 89 | TypeScript | MIT | 26 MCP methods, 11 languages, tree-sitter, semantic search, hotspot analysis, clone detection | +| 17 | 3.5 | [MikeRecognex/mcp-codebase-index](https://github.com/MikeRecognex/mcp-codebase-index) | 25 | Python | AGPL-3.0 | 18 MCP tools, zero runtime deps, auto-incremental reindexing via git diff | +| 18 | 3.5 | [nahisaho/CodeGraphMCPServer](https://github.com/nahisaho/CodeGraphMCPServer) | 7 | Python | MIT | GraphRAG with Louvain community detection, 16 languages, 14 MCP tools, 334 tests | +| 19 | 3.5 | [colbymchenry/codegraph](https://github.com/colbymchenry/codegraph) | 165 | TypeScript | MIT | tree-sitter + SQLite + MCP, Claude Code token reduction benchmarks, npx installer | +| 20 | 3.5 | [dundalek/stratify](https://github.com/dundalek/stratify) | 102 | Clojure | MIT | Multi-backend extraction (LSP/SCIP/Joern), 10 languages, DGML/CodeCharta output, architecture linting | +| 21 | 3.5 | [kraklabs/cie](https://github.com/kraklabs/cie) | 9 | Go | AGPL-3.0 | Code Intelligence Engine: 20+ MCP tools, tree-sitter, semantic search (Ollama), Homebrew, single Go binary | +| 22 | 3.4 | [Durafen/Claude-code-memory](https://github.com/Durafen/Claude-code-memory) | 72 | Python | None | Memory Guard quality gate, persistent codebase memory, Voyage AI + Qdrant | +| 23 | 3.3 | [NeuralRays/codexray](https://github.com/NeuralRays/codexray) | 2 | TypeScript | MIT | 16 MCP tools, TF-IDF semantic search (~50MB), dead code, complexity, path finding | +| 24 | 3.3 | [DucPhamNgoc08/CodeVisualizer](https://github.com/DucPhamNgoc08/CodeVisualizer) | 475 | TypeScript | MIT | VS Code extension, tree-sitter WASM, flowcharts + dependency graphs, 5 AI providers, 9 themes | +| 25 | 3.3 | [helabenkhalfallah/code-health-meter](https://github.com/helabenkhalfallah/code-health-meter) | 34 | JavaScript | MIT | Formal health metrics (MI, CC, Louvain modularity), published in ACM TOSEM 2025 | +| 26 | 3.3 | [JohT/code-graph-analysis-pipeline](https://github.com/JohT/code-graph-analysis-pipeline) | 27 | Cypher | GPL-3.0 | 200+ CSV reports, ML anomaly detection, Leiden/HashGNN, jQAssistant + Neo4j for Java | +| 27 | 3.3 | [Lekssays/codebadger](https://github.com/Lekssays/codebadger) | 43 | Python | GPL-3.0 | Containerized MCP server using Joern CPG, 12+ languages | +| 28 | 3.2 | [al1-nasir/codegraph-cli](https://github.com/al1-nasir/codegraph-cli) | 11 | Python | MIT | CrewAI multi-agent system, 6 LLM providers, browser explorer, DOCX export | +| 29 | 3.1 | [anasdayeh/claude-context-local](https://github.com/anasdayeh/claude-context-local) | 0 | Python | None | 100% local, Merkle DAG incremental indexing, sharded FAISS, hybrid BM25+vector, GPU accel | +| 30 | 3.0 | [Vasu014/loregrep](https://github.com/Vasu014/loregrep) | 12 | Rust | Apache-2.0 | In-memory index library, Rust + Python bindings, AI-tool-ready schemas | +| 31 | 3.0 | [xnuinside/codegraph](https://github.com/xnuinside/codegraph) | 438 | Python | MIT | Python-only interactive HTML dependency diagrams with zoom/pan/search | +| 32 | 3.0 | [Adrninistrator/java-all-call-graph](https://github.com/Adrninistrator/java-all-call-graph) | 551 | Java | Apache-2.0 | Complete Java bytecode call graphs, Spring/MyBatis-aware, SQL-queryable DB | +| 33 | 3.0 | [Technologicat/pyan](https://github.com/Technologicat/pyan) | 395 | Python | GPL-2.0 | Python 3 call graph generator, module import analysis, cycle detection, interactive HTML | +| 34 | 3.0 | [GaloisInc/MATE](https://github.com/GaloisInc/MATE) | 194 | Python | BSD-3 | DARPA-funded interactive CPG-based bug hunting for C/C++ via LLVM | +| 35 | 3.0 | [clouditor/cloud-property-graph](https://github.com/clouditor/cloud-property-graph) | 28 | Kotlin | Apache-2.0 | Connects code property graphs with cloud runtime security assessment | + +### Tier 2: Niche & Single-Language Tools (score 2.0–2.9) + | # | Score | Project | Stars | Lang | License | Summary | |---|-------|---------|-------|------|---------|---------| -| 1 | 4.5 | [vitali87/code-graph-rag](https://github.com/vitali87/code-graph-rag) | 1,916 | Python | MIT | Graph RAG with Memgraph, multi-provider AI, code editing, semantic search, MCP | -| 2 | 4.2 | [seatedro/glimpse](https://github.com/seatedro/glimpse) | 349 | Rust | MIT | Clipboard-first codebase-to-LLM tool with call graphs, token counting, LSP resolution | -| 3 | 4.0 | [SimplyLiz/CodeMCP (CKB)](https://github.com/SimplyLiz/CodeMCP) | 59 | Go | Custom | SCIP-based indexing, compound operations (83% token savings), CODEOWNERS, secret scanning | -| 4 | 3.9 | [harshkedia177/axon](https://github.com/harshkedia177/axon) | 29 | Python | None | 11-phase pipeline, KuzuDB, Leiden community detection, dead code, change coupling | -| 5 | 3.8 | [anrgct/autodev-codebase](https://github.com/anrgct/autodev-codebase) | 111 | TypeScript | None | 40+ languages, 7 embedding providers, Cytoscape.js visualization, LLM reranking | -| 6 | 3.7 | [Anandb71/arbor](https://github.com/Anandb71/arbor) | 85 | Rust | MIT | Native GUI, confidence scoring, architectural role classification, fuzzy search, MCP | -| **7** | **3.6** | **[@optave/codegraph](https://github.com/optave/codegraph)** | — | **JS/Rust** | **Apache-2.0** | **Sub-second incremental rebuilds, dual engine (native Rust + WASM), 11 languages, MCP, zero-cost core + optional LLM enhancement** | -| 8 | 3.4 | [Durafen/Claude-code-memory](https://github.com/Durafen/Claude-code-memory) | 72 | Python | None | Memory Guard quality gate, persistent codebase memory, Voyage AI + Qdrant | -| 9 | 3.3 | [NeuralRays/codexray](https://github.com/NeuralRays/codexray) | 2 | TypeScript | MIT | 16 MCP tools, TF-IDF semantic search (~50MB), dead code, complexity, path finding | -| 10 | 3.2 | [al1-nasir/codegraph-cli](https://github.com/al1-nasir/codegraph-cli) | 11 | Python | MIT | CrewAI multi-agent system, 6 LLM providers, browser explorer, DOCX export | -| 11 | 3.1 | [anasdayeh/claude-context-local](https://github.com/anasdayeh/claude-context-local) | 0 | Python | None | 100% local, Merkle DAG incremental indexing, sharded FAISS, hybrid BM25+vector, GPU accel | -| 12 | 3.0 | [Vasu014/loregrep](https://github.com/Vasu014/loregrep) | 12 | Rust | Apache-2.0 | In-memory index library, Rust + Python bindings, AI-tool-ready schemas | -| 13 | 2.9 | [rahulvgmail/CodeInteliMCP](https://github.com/rahulvgmail/CodeInteliMCP) | 8 | Python | None | DuckDB + ChromaDB (zero Docker), multi-repo, lightweight embedded DBs | -| 14 | 2.8 | [Bikach/codeGraph](https://github.com/Bikach/codeGraph) | 6 | TypeScript | MIT | Neo4j graph, Claude Code slash commands, Kotlin support, 40-50% cost reduction | -| 15 | 2.7 | [yumeiriowl/repo-graphrag-mcp](https://github.com/yumeiriowl/repo-graphrag-mcp) | 3 | Python | MIT | LightRAG + tree-sitter, entity merge (code ↔ docs), implementation planning tool | -| 16 | 2.6 | [0xjcf/MCP_CodeAnalysis](https://github.com/0xjcf/MCP_CodeAnalysis) | 7 | Python/TS | None | Stateful tools (XState), Redis sessions, socio-technical analysis, dual language impl | -| 17 | 2.5 | [RaheesAhmed/code-context-mcp](https://github.com/RaheesAhmed/code-context-mcp) | 0 | Python | MIT | Security pattern detection, auto architecture diagrams, code flow tracing | -| 18 | 2.4 | [shantham/codegraph](https://github.com/shantham/codegraph) | 0 | TypeScript | MIT | Polished `npx` one-command installer, sqlite-vss, 7 MCP tools | -| 19 | 2.3 | [0xd219b/codegraph](https://github.com/0xd219b/codegraph) | 0 | Rust | None | Pure Rust, HTTP server mode, Java + Go support | -| 20 | 2.1 | [floydw1234/badger-graph](https://github.com/floydw1234/badger-graph) | 0 | Python | None | Dgraph backend (Docker), C struct field access tracking | -| 21 | 2.0 | [khushil/code-graph-rag](https://github.com/khushil/code-graph-rag) | 0 | Python | MIT | Fork of vitali87/code-graph-rag with no modifications | -| 22 | 1.8 | [m3et/CodeRAG](https://github.com/m3et/CodeRAG) | 0 | Python | None | Iterative RAG with self-reflection, ChromaDB, Azure OpenAI dependent | +| 36 | 2.9 | [rahulvgmail/CodeInteliMCP](https://github.com/rahulvgmail/CodeInteliMCP) | 8 | Python | None | DuckDB + ChromaDB (zero Docker), multi-repo, lightweight embedded DBs | +| 37 | 2.8 | [scottrogowski/code2flow](https://github.com/scottrogowski/code2flow) | 4,528 | Python | MIT | Call graphs for Python/JS/Ruby/PHP via AST, DOT output, 100% test coverage | +| 38 | 2.8 | [ysk8hori/typescript-graph](https://github.com/ysk8hori/typescript-graph) | 200 | TypeScript | None | TypeScript file-level dependency Mermaid diagrams, code metrics (MI, CC), watch mode | +| 39 | 2.8 | [nuanced-dev/nuanced-py](https://github.com/nuanced-dev/nuanced-py) | 126 | Python | MIT | Python call graph enrichment designed for AI agent consumption | +| 40 | 2.8 | [Bikach/codeGraph](https://github.com/Bikach/codeGraph) | 6 | TypeScript | MIT | Neo4j graph, Claude Code slash commands, Kotlin support, 40-50% cost reduction | +| 41 | 2.8 | [ChrisRoyse/CodeGraph](https://github.com/ChrisRoyse/CodeGraph) | 65 | TypeScript | None | Neo4j + MCP, multi-language, framework detection (React, Tailwind, Supabase) | +| 42 | 2.8 | [Symbolk/Code2Graph](https://github.com/Symbolk/Code2Graph) | 48 | Java | None | Multilingual code → language-agnostic graph representation | +| 43 | 2.7 | [yumeiriowl/repo-graphrag-mcp](https://github.com/yumeiriowl/repo-graphrag-mcp) | 3 | Python | MIT | LightRAG + tree-sitter, entity merge (code ↔ docs), implementation planning tool | +| 44 | 2.7 | [davidfraser/pyan](https://github.com/davidfraser/pyan) | 712 | Python | GPL-2.0 | Python call graph generator (stable fork), DOT/SVG/HTML output, Sphinx integration | +| 45 | 2.7 | [mamuz/PhpDependencyAnalysis](https://github.com/mamuz/PhpDependencyAnalysis) | 572 | PHP | MIT | PHP dependency graphs, cycle detection, architecture verification against defined layers | +| 46 | 2.7 | [faraazahmad/graphsense](https://github.com/faraazahmad/graphsense) | 35 | TypeScript | MIT | MCP server providing code intelligence via static analysis | +| 47 | 2.7 | [JonnoC/CodeRAG](https://github.com/JonnoC/CodeRAG) | 14 | TypeScript | MIT | Enterprise code intelligence with CK metrics, Neo4j, 23 analysis tools, MCP server | +| 48 | 2.6 | [0xjcf/MCP_CodeAnalysis](https://github.com/0xjcf/MCP_CodeAnalysis) | 7 | Python/TS | None | Stateful tools (XState), Redis sessions, socio-technical analysis, dual language impl | +| 49 | 2.5 | [koknat/callGraph](https://github.com/koknat/callGraph) | 325 | Perl | GPL-3.0 | Multi-language (22+) call graph generator via regex, GraphViz output | +| 50 | 2.5 | [RaheesAhmed/code-context-mcp](https://github.com/RaheesAhmed/code-context-mcp) | 0 | Python | MIT | Security pattern detection, auto architecture diagrams, code flow tracing | +| 51 | 2.5 | [league1991/CodeAtlasVsix](https://github.com/league1991/CodeAtlasVsix) | 265 | C# | GPL-2.0 | Visual Studio plugin, Doxygen-based call graph navigation (VS 2010-2015 era) | +| 52 | 2.5 | [beicause/call-graph](https://github.com/beicause/call-graph) | 105 | TypeScript | Apache-2.0 | VS Code extension generating call graphs via LSP call hierarchy API | +| 53 | 2.5 | [Thibault-Knobloch/codebase-intelligence](https://github.com/Thibault-Knobloch/codebase-intelligence) | 44 | Python | None | Code indexing + call graph + vector DB + natural language queries (requires OpenAI) | +| 54 | 2.5 | [darkmacheken/wasmati](https://github.com/darkmacheken/wasmati) | 31 | C++ | Apache-2.0 | CPG infrastructure for scanning vulnerabilities in WebAssembly | +| 55 | 2.5 | [sutragraph/sutracli](https://github.com/sutragraph/sutracli) | 28 | Python | GPL-3.0 | AI-powered cross-repo dependency graphs for coding agents | +| 56 | 2.5 | [julianjensen/ast-flow-graph](https://github.com/julianjensen/ast-flow-graph) | 69 | JavaScript | Other | JavaScript control flow graphs from AST analysis | +| 57 | 2.5 | [yoanbernabeu/grepai-skills](https://github.com/yoanbernabeu/grepai-skills) | 14 | — | MIT | 27 AI agent skills for semantic code search and call graph analysis | +| 59 | 2.4 | [shantham/codegraph](https://github.com/shantham/codegraph) | 0 | TypeScript | MIT | Polished `npx` one-command installer, sqlite-vss, 7 MCP tools | +| 60 | 2.3 | [ozyyshr/RepoGraph](https://github.com/ozyyshr/RepoGraph) | 251 | Python | Apache-2.0 | SWE-bench code graph research (ctags + networkx for LLM context) | +| 61 | 2.3 | [emad-elsaid/rubrowser](https://github.com/emad-elsaid/rubrowser) | 644 | Ruby | MIT | Ruby-only interactive D3 force-directed dependency graph | +| 62 | 2.3 | [Chentai-Kao/call-graph-plugin](https://github.com/Chentai-Kao/call-graph-plugin) | 87 | Kotlin | None | IntelliJ plugin for visualizing call graphs in IDE | +| 63 | 2.3 | [ehabterra/apispec](https://github.com/ehabterra/apispec) | 72 | Go | Apache-2.0 | OpenAPI 3.1 spec generator from Go code via call graph analysis | +| 64 | 2.3 | [huoyo/ko-time](https://github.com/huoyo/ko-time) | 61 | Java | LGPL-2.1 | Spring Boot call graph with runtime durations | +| 65 | 2.3 | [Fraunhofer-AISEC/codyze](https://github.com/Fraunhofer-AISEC/codyze) | 91 | Kotlin | None | CPG-based analyzer for cryptographic API misuse (archived, merged into cpg repo) | +| 66 | 2.3 | [CartographAI/mcp-server-codegraph](https://github.com/CartographAI/mcp-server-codegraph) | 17 | JavaScript | MIT | Lightweight MCP code graph (3 tools only, Python/JS/Rust) | +| 67 | 2.3 | [YounesBensafia/DevLens](https://github.com/YounesBensafia/DevLens) | 21 | Python | None | Repo scanner with AI summaries, dead code detection (dep graph not yet implemented) | +| 68 | 2.3 | [0xd219b/codegraph](https://github.com/0xd219b/codegraph) | 0 | Rust | None | Pure Rust, HTTP server mode, Java + Go support | +| 69 | 2.3 | [aryx/codegraph](https://github.com/aryx/codegraph) | 6 | OCaml | Other | Multi-language source code dependency visualizer (the original "codegraph" name) | +| 70 | 2.2 | [jmarkowski/codeviz](https://github.com/jmarkowski/codeviz) | 144 | Python | MIT | C/C++ `#include` header dependency graph visualization | +| 71 | 2.2 | [juanallo/vscode-dependency-cruiser](https://github.com/juanallo/vscode-dependency-cruiser) | 76 | JavaScript | MIT | VS Code wrapper for dependency-cruiser (JS/TS) | +| 72 | 2.2 | [hidva/as2cfg](https://github.com/hidva/as2cfg) | 63 | Rust | GPL-3.0 | Intel assembly → control flow graph | +| 73 | 2.2 | [microsoft/cmd-call-graph](https://github.com/microsoft/cmd-call-graph) | 55 | Python | MIT | Call graphs for Windows CMD batch files | +| 74 | 2.2 | [siggy/gographs](https://github.com/siggy/gographs) | 52 | Go | MIT | Go package dependency graph generator | +| 75 | 2.2 | [henryhale/depgraph](https://github.com/henryhale/depgraph) | 33 | Go | MIT | Go-focused codebase dependency analysis | +| 76 | 2.2 | [2015xli/clangd-graph-rag](https://github.com/2015xli/clangd-graph-rag) | 28 | Python | Apache-2.0 | C/C++ Neo4j GraphRAG via clangd (scales to Linux kernel) | +| 77 | 2.1 | [floydw1234/badger-graph](https://github.com/floydw1234/badger-graph) | 0 | Python | None | Dgraph backend (Docker), C struct field access tracking | +| 78 | 2.0 | [crubier/code-to-graph](https://github.com/crubier/code-to-graph) | 382 | JavaScript | None | JS code → Mermaid flowchart (single-function, web demo) | +| 79 | 2.0 | [khushil/code-graph-rag](https://github.com/khushil/code-graph-rag) | 0 | Python | MIT | Fork of vitali87/code-graph-rag with no modifications | +| 80 | 2.0 | [FalkorDB/code-graph-backend](https://github.com/FalkorDB/code-graph-backend) | 26 | Python | MIT | FalkorDB (Redis-based graph) code analysis demo | +| 81 | 2.0 | [jillesvangurp/spring-depend](https://github.com/jillesvangurp/spring-depend) | 46 | Java | MIT | Spring bean dependency graph extraction | +| 82 | 2.0 | [ivan-m/SourceGraph](https://github.com/ivan-m/SourceGraph) | 27 | Haskell | GPL-3.0 | Haskell graph-theoretic code analysis (last updated 2022) | +| 83 | 2.0 | [brutski/go-code-graph](https://github.com/brutski/go-code-graph) | 13 | Go | MIT | Go codebase analyzer with MCP integration | + +### Tier 3: Minimal or Inactive (score < 2.0) + +| Score | Project | Stars | Summary | +|-------|---------|-------|---------| +| 1.8 | [m3et/CodeRAG](https://github.com/m3et/CodeRAG) | 0 | Iterative RAG with self-reflection, ChromaDB, Azure OpenAI dependent | +| 1.8 | [getyourguide/spmgraph](https://github.com/getyourguide/spmgraph) | 239 | Swift Package Manager dependency graph + architecture linting | +| 1.8 | [mvidner/code-explorer](https://github.com/mvidner/code-explorer) | 53 | Ruby call graph and class dependency browser | +| 1.8 | [ytsutano/jitana](https://github.com/ytsutano/jitana) | 41 | Android DEX static+dynamic hybrid analysis | +| 1.8 | [ShiftLeftSecurity/fuzzyc2cpg](https://github.com/ShiftLeftSecurity/fuzzyc2cpg) | 37 | [ARCHIVED] Fuzzy C/C++ parser to CPG (Joern ecosystem) | +| 1.8 | [mufasadb/code-grapher](https://github.com/mufasadb/code-grapher) | 10 | MCP code graph server (early stage) | +| 1.8 | [dtsbourg/codegraph-fmt](https://github.com/dtsbourg/codegraph-fmt) | 7 | Annotated AST graph representations from Python | +| 1.8 | [mloncode/codegraph](https://github.com/mloncode/codegraph) | 5 | Git/UAST graph experiments | +| 1.7 | [ashishb/python_dep_generator](https://github.com/ashishb/python_dep_generator) | 22 | Python dependency graph generator | +| 1.7 | [LaurEars/codegrapher](https://github.com/LaurEars/codegrapher) | 15 | Python call graph visualizer | +| 1.7 | [AdilZouitine/ouakha.rs](https://github.com/AdilZouitine/ouakha.rs) | 7 | LLM-based Rust code analysis for suspicious code | +| 1.7 | [ensozos/geneci](https://github.com/ensozos/geneci) | 6 | UML diagrams and call graphs from source | +| 1.7 | [spullara/codegraph](https://github.com/spullara/codegraph) | 5 | Java JARs → Neo4j loader | +| 1.5 | [z7zmey/codegraph](https://github.com/z7zmey/codegraph) | 10 | PHP code visualization (last updated 2020) | +| 1.5 | [marcusva/cflow](https://github.com/marcusva/cflow) | 10 | C/assembler call graph generator | +| 1.5 | [beacoder/call-graph](https://github.com/beacoder/call-graph) | 5 | Emacs-based C/C++ call graph | --- -## Scoring Breakdown +## Scoring Breakdown (Tier 1) | # | Project | Features | Analysis Depth | Deploy Simplicity | Lang Support | Code Quality | Community | |---|---------|----------|---------------|-------------------|-------------|-------------|-----------| -| 1 | code-graph-rag | 5 | 4 | 3 | 4 | 4 | 5 | -| 2 | glimpse | 4 | 4 | 5 | 3 | 5 | 5 | -| 3 | CKB | 5 | 5 | 4 | 3 | 4 | 3 | -| 4 | axon | 5 | 5 | 4 | 2 | 4 | 2 | -| 5 | autodev-codebase | 5 | 3 | 3 | 5 | 3 | 4 | -| 6 | arbor | 4 | 4 | 5 | 4 | 5 | 3 | -| **7** | **codegraph (us)** | **3** | **3** | **5** | **4** | **4** | **2** | -| 8 | Claude-code-memory | 4 | 3 | 3 | 3 | 4 | 3 | -| 9 | codexray | 5 | 4 | 4 | 4 | 3 | 1 | -| 10 | codegraph-cli | 5 | 3 | 3 | 2 | 3 | 2 | -| 11 | claude-context-local | 4 | 3 | 3 | 4 | 4 | 1 | -| 12 | loregrep | 3 | 3 | 4 | 3 | 5 | 2 | -| 13 | CodeInteliMCP | 3 | 3 | 4 | 3 | 3 | 1 | -| 14 | Bikach/codeGraph | 3 | 3 | 3 | 2 | 3 | 1 | -| 15 | repo-graphrag-mcp | 3 | 3 | 3 | 4 | 3 | 1 | -| 16 | MCP_CodeAnalysis | 4 | 3 | 3 | 2 | 3 | 1 | -| 17 | code-context-mcp | 4 | 2 | 3 | 2 | 2 | 1 | -| 18 | shantham/codegraph | 3 | 2 | 4 | 4 | 3 | 1 | -| 19 | 0xd219b/codegraph | 2 | 3 | 4 | 1 | 4 | 1 | -| 20 | badger-graph | 2 | 2 | 2 | 1 | 2 | 1 | -| 21 | khushil/code-graph-rag | 5 | 4 | 3 | 4 | 4 | 1 | -| 22 | CodeRAG | 3 | 2 | 2 | 1 | 2 | 1 | +| 1 | joern | 5 | 5 | 3 | 4 | 5 | 5 | +| 2 | narsil-mcp | 5 | 5 | 5 | 5 | 4 | 3 | +| 3 | code-graph-rag | 5 | 4 | 3 | 4 | 4 | 5 | +| 4 | cpg | 5 | 5 | 2 | 5 | 5 | 3 | +| 5 | glimpse | 4 | 4 | 5 | 3 | 5 | 5 | +| 6 | CKB | 5 | 5 | 4 | 3 | 4 | 3 | +| 7 | axon | 5 | 5 | 4 | 2 | 4 | 2 | +| 8 | autodev-codebase | 5 | 3 | 3 | 5 | 3 | 4 | +| 9 | codepropertygraph | 4 | 5 | 2 | 4 | 5 | 3 | +| 10 | codegraph-rust | 5 | 5 | 2 | 4 | 4 | 3 | +| 11 | arbor | 4 | 4 | 5 | 4 | 5 | 3 | +| 12 | mcp-code-graph | 4 | 3 | 4 | 4 | 3 | 4 | +| 13 | code-graph-mcp | 4 | 4 | 4 | 5 | 3 | 2 | +| 14 | jelly | 4 | 5 | 4 | 1 | 5 | 3 | +| **15** | **codegraph (us)** | **3** | **3** | **5** | **4** | **4** | **2** | +| 16 | code-graph-rag-mcp | 5 | 4 | 3 | 4 | 3 | 2 | +| 17 | mcp-codebase-index | 4 | 3 | 5 | 3 | 4 | 2 | +| 18 | CodeGraphMCPServer | 4 | 4 | 4 | 5 | 3 | 1 | +| 19 | colbymchenry/codegraph | 4 | 3 | 5 | 3 | 3 | 3 | +| 20 | stratify | 4 | 4 | 2 | 5 | 4 | 2 | +| 21 | cie | 5 | 4 | 4 | 3 | 4 | 1 | +| 22 | Claude-code-memory | 4 | 3 | 3 | 3 | 4 | 3 | +| 23 | codexray | 5 | 4 | 4 | 4 | 3 | 1 | +| 24 | CodeVisualizer | 4 | 3 | 5 | 3 | 3 | 2 | +| 25 | code-health-meter | 3 | 5 | 5 | 1 | 4 | 2 | +| 26 | code-graph-analysis-pipeline | 5 | 5 | 1 | 2 | 5 | 2 | +| 27 | codebadger | 4 | 4 | 3 | 5 | 3 | 1 | +| 28 | codegraph-cli | 5 | 3 | 3 | 2 | 3 | 2 | +| 29 | claude-context-local | 4 | 3 | 3 | 4 | 4 | 1 | +| 30 | loregrep | 3 | 3 | 4 | 3 | 5 | 2 | +| 31 | xnuinside/codegraph | 3 | 2 | 5 | 1 | 3 | 4 | +| 32 | java-all-call-graph | 4 | 4 | 3 | 1 | 3 | 3 | +| 33 | pyan | 3 | 3 | 5 | 1 | 4 | 2 | +| 34 | MATE | 3 | 5 | 1 | 1 | 3 | 2 | +| 35 | cloud-property-graph | 4 | 4 | 2 | 2 | 4 | 2 | **Scoring criteria:** - **Features** (1-5): breadth of tools, MCP integration, search, visualization, export @@ -78,48 +179,74 @@ Ranked by weighted score across 6 dimensions (each 1–5): | Strength | Details | |----------|---------| | **Always-fresh graph (incremental rebuilds)** | File-level MD5 hashing means only changed files are re-parsed. Change 1 file in a 3,000-file project → rebuild in under a second. No other tool in this space offers this. Competitors re-index everything from scratch — making them unusable in commit hooks, watch mode, or agent-driven loops | -| **Zero-cost core, LLM-enhanced when you choose** | The full graph pipeline (parse, resolve, query, impact analysis) runs with no API keys, no cloud, no cost. LLM features (richer embeddings, semantic search) are an optional layer on top — using whichever provider the user already works with. Competitors either require cloud APIs for core features (code-graph-rag, autodev-codebase) or offer no AI enhancement at all (CKB, axon). Nobody else offers both modes in one tool | -| **Data goes only where you send it** | Your code reaches exactly one place: the AI agent you already chose (via MCP). No additional third-party services, no surprise cloud calls. Competitors like code-graph-rag, autodev-codebase, and Claude-code-memory send your code to additional AI providers beyond the agent you're using | -| **Dual engine architecture** | Only project with native Rust (napi-rs) + automatic WASM fallback. Others are pure Rust OR pure JS/Python — never both | +| **Zero-cost core, LLM-enhanced when you choose** | The full graph pipeline (parse, resolve, query, impact analysis) runs with no API keys, no cloud, no cost. LLM features (richer embeddings, semantic search) are an optional layer on top — using whichever provider the user already works with. Competitors either require cloud APIs for core features (code-graph-rag, autodev-codebase, mcp-code-graph) or offer no AI enhancement at all (CKB, axon). Nobody else offers both modes in one tool | +| **Data goes only where you send it** | Your code reaches exactly one place: the AI agent you already chose (via MCP). No additional third-party services, no surprise cloud calls. Competitors like code-graph-rag, autodev-codebase, mcp-code-graph, and Claude-code-memory send your code to additional AI providers beyond the agent you're using | +| **Dual engine architecture** | Only project with native Rust (napi-rs) + automatic WASM fallback. Others are pure Rust (narsil-mcp, codegraph-rust) OR pure JS/Python — never both | +| **Standalone CLI + MCP** | Full CLI experience (`diff-impact`, `cycles`, `map`, `fn`, `deps`, `search`) alongside MCP server. Many competitors are MCP-only (narsil-mcp, code-graph-mcp, CodeGraphMCPServer) with no standalone query interface | | **Single-repo MCP isolation** | Security-conscious default: tools have no `repo` property unless `--multi-repo` is explicitly enabled. Most competitors default to exposing everything | -| **Zero-dependency deployment** | `npm install` and done. No Docker, no external databases, no Python, no SCIP toolchains. Published platform-specific binaries (`@optave/codegraph-{platform}-{arch}`) resolve automatically | +| **Zero-dependency deployment** | `npm install` and done. No Docker, no external databases, no Python, no SCIP toolchains, no JVM. Published platform-specific binaries (`@optave/codegraph-{platform}-{arch}`) resolve automatically. Joern requires JDK 21, cpg requires Gradle + language-specific deps, codegraph-rust requires SurrealDB + LSP servers | | **Import resolution depth** | 6-level priority system with confidence scoring — more sophisticated than most competitors' resolution | --- ## Where Codegraph Loses -### vs code-graph-rag (#1, 1916 stars) +### vs joern (#1, 2,956 stars) +- **Full Code Property Graph**: AST + CFG + PDG combined for deep vulnerability analysis; our tree-sitter extraction captures structure but not control/data flow +- **Scala query DSL**: purpose-built query language for arbitrary graph traversals vs our fixed SQL queries +- **Binary analysis**: Ghidra frontend can analyze compiled binaries — we're source-only +- **Enterprise backing**: ShiftLeft/Fraunhofer support, daily automated releases, Discord community, professional documentation at joern.io +- **Community**: 2,956 stars, 389 forks — massive traction + +### vs narsil-mcp (#2, 101 stars) +- **Feature breadth**: 90 MCP tools vs our ~10; covers taint analysis, SBOM, license compliance, control flow graphs, data flow analysis +- **Language count**: 32 languages (including Verilog, Fortran, PowerShell, Nix) vs our 11 +- **Security analysis**: vulnerability scanning with OWASP/CWE coverage — we have no security features +- **Dead code detection**: built-in — we lack this +- **Single-binary deployment**: ~30MB Rust binary via brew/scoop/cargo/npm — as easy as ours + +### vs code-graph-rag (#3, 1,916 stars) - **Graph query expressiveness**: Memgraph + Cypher enables arbitrary graph traversals; our SQL queries are more rigid - **AI-powered code editing**: they can surgically edit functions via AST targeting with visual diffs - **Provider flexibility**: they support Gemini/OpenAI/Claude/Ollama and can mix providers per task - **Community**: 1,916 stars — orders of magnitude more traction -### vs glimpse (#2, 349 stars) +### vs cpg (#4, 411 stars) +- **Formal CPG specification**: academic-grade graph representation (AST + CFG + PDG + DFG) with published specs +- **MCP module**: built-in MCP support now, matching our integration +- **LLVM IR support**: extends language coverage to any LLVM-compiled language (Rust, Swift, etc.) +- **Type inference**: can analyze incomplete/partial code — our tree-sitter requires syntactically valid input + +### vs glimpse (#5, 349 stars) - **LLM workflow optimization**: clipboard-first output + token counting + XML output mode — purpose-built for "code → LLM context" - **LSP-based call resolution**: compiler-grade accuracy vs our tree-sitter heuristic approach - **Web content processing**: can fetch URLs and convert HTML to markdown for context -### vs CKB (#3, 59 stars) +### vs CKB (#6, 59 stars) - **Indexing accuracy**: SCIP provides compiler-grade cross-file references (type-aware), fundamentally more accurate than tree-sitter for supported languages - **Compound operations**: `explore`/`understand`/`prepareChange` batch multiple queries into one call — 83% token reduction, 60-70% fewer tool calls - **CODEOWNERS + secret scanning**: enterprise features we lack entirely -### vs axon (#4, 29 stars) +### vs axon (#7, 29 stars) - **Analysis depth**: their 11-phase pipeline includes community detection (Leiden), execution flow tracing, git change coupling, dead code detection — all features we lack - **Graph database**: KuzuDB with native Cypher is more expressive for complex graph queries than our SQLite - **Branch structural diff**: compares code structure between branches using git worktrees -### vs autodev-codebase (#5, 111 stars) -- **Language breadth**: 40+ languages vs our 11 -- **Interactive visualization**: Cytoscape.js call graph explorer in the browser — we only have static DOT/Mermaid -- **LLM reranking**: secondary LLM pass to improve search relevance — more sophisticated retrieval pipeline +### vs codegraph-rust (#10, 142 stars) +- **LSP-powered analysis**: compiler-grade cross-file references via rust-analyzer, pyright, gopls vs our tree-sitter heuristics +- **Dataflow edges**: defines/uses/flows_to/returns/mutates relationships we don't capture +- **Architecture boundary enforcement**: configurable rules for detecting violations — we have no architectural awareness +- **Tiered indexing**: fast/balanced/full modes for different use cases — we have one mode + +### vs jelly (#14, 417 stars) +- **Points-to analysis**: flow-insensitive analysis with access paths for JS/TS — fundamentally more precise than our tree-sitter-based call resolution +- **Academic rigor**: 5 published papers backing the methodology (Aarhus University) +- **Vulnerability exposure analysis**: library usage pattern matching specific to the JS/TS ecosystem -### vs arbor (#6, 85 stars) -- **Native GUI**: desktop app for interactive impact analysis (we're CLI/MCP only) -- **Confidence scoring surfaced to users**: every result shows High/Medium/Low confidence -- **Architectural role classification**: auto-tags symbols as Entry Point / Core Logic / Utility / Adapter -- **Fuzzy symbol search**: typo tolerance with Jaro-Winkler matching +### vs colbymchenry/codegraph (#19, 165 stars) +- **Naming competitor**: same name, same tech stack (tree-sitter + SQLite + MCP + Node.js) — marketplace confusion risk +- **Published benchmarks**: 67% fewer tool calls and measurable Claude Code token reduction — compelling marketing angle we lack +- **One-liner setup**: `npx @colbymchenry/codegraph` with interactive installer auto-configures Claude Code --- @@ -128,7 +255,7 @@ Ranked by weighted score across 6 dimensions (each 1–5): ### Tier 1: High impact, low effort | Feature | Inspired by | Why | |---------|------------|-----| -| **Dead code detection** | axon, codexray, CKB | We have the graph — find nodes with zero incoming edges (minus entry points/exports). Agents constantly ask "is this used?" | +| **Dead code detection** | narsil-mcp, axon, codexray, CKB | We have the graph — find nodes with zero incoming edges (minus entry points/exports). Agents constantly ask "is this used?" | | **Fuzzy symbol search** | arbor | Add Levenshtein/Jaro-Winkler to `fn` command. Currently requires exact match | | **Expose confidence scores** | arbor | Already computed internally in import resolution — just surface them | | **Shortest path A→B** | codexray, arbor | BFS on existing edges table. We have `fn` for single chains but no A→B pathfinding | @@ -137,34 +264,39 @@ Ranked by weighted score across 6 dimensions (each 1–5): | Feature | Inspired by | Why | |---------|------------|-----| | **Optional LLM provider integration** | code-graph-rag, autodev-codebase | Bring-your-own provider (OpenAI, etc.) for richer embeddings and AI-powered search. Enhancement layer only — core graph never depends on it. No other tool offers both zero-cost local and LLM-enhanced modes in one package | -| **Compound MCP tools** | CKB | `explore`/`understand` meta-tools that batch deps + fn + map into single responses. Biggest token-savings opportunity | +| **Compound MCP tools** | CKB, colbymchenry/codegraph | `explore`/`understand` meta-tools that batch deps + fn + map into single responses. Biggest token-savings opportunity. colbymchenry shows 67% fewer tool calls | | **Token counting on responses** | glimpse, arbor | tiktoken-based counts so agents know context budget consumed | | **Node classification** | arbor | Auto-tag Entry Point / Core / Utility / Adapter from in-degree/out-degree patterns | | **TF-IDF lightweight search** | codexray | SQLite FTS5 + TF-IDF as a middle tier (~50MB) between "no search" and full transformers (~500MB) | +| **OWASP/CWE pattern detection** | narsil-mcp, CKB | Security pattern scanning on the existing AST — hardcoded secrets, SQL injection patterns, XSS | +| **Formal code health metrics** | code-health-meter | Cyclomatic complexity, Maintainability Index, Halstead metrics per function — we already parse the AST | ### Tier 3: High impact, high effort | Feature | Inspired by | Why | |---------|------------|-----| -| **Interactive HTML visualization** | autodev-codebase, codegraph-cli | `codegraph viz` → opens interactive vis.js/Cytoscape.js graph in browser | +| **Interactive HTML visualization** | autodev-codebase, CodeVisualizer | `codegraph viz` → opens interactive vis.js/Cytoscape.js graph in browser | | **Git change coupling** | axon | Analyze git history for files that always change together — enhances `diff-impact` | -| **Community detection** | axon | Leiden algorithm to discover natural module boundaries vs actual file organization | +| **Community detection** | axon, CodeGraphMCPServer | Leiden/Louvain algorithm to discover natural module boundaries vs actual file organization | | **Execution flow tracing** | axon, code-context-mcp | Framework-aware entry point detection + BFS flow tracing | -| **Security pattern scanning** | CKB, code-context-mcp | Detect hardcoded secrets, SQL injection patterns, XSS in parsed code | +| **Dataflow analysis** | codegraph-rust | Define/use chains and flows_to/returns/mutates edges — major analysis depth increase | +| **Architecture boundary rules** | codegraph-rust, stratify | User-defined rules for allowed/forbidden dependencies between modules | ### Not worth copying | Feature | Why skip | |---------|----------| -| Memgraph/Neo4j/KuzuDB | Our SQLite = zero Docker, simpler deployment. Query gap matters less than simplicity | +| Memgraph/Neo4j/KuzuDB/SurrealDB | Our SQLite = zero Docker, simpler deployment. Query gap matters less than simplicity. codegraph-rust's SurrealDB requirement is its biggest weakness | | SCIP indexing | Would require maintaining SCIP toolchains per language. Tree-sitter + native Rust is the right bet | +| Full CPG (AST+CFG+PDG) | Joern/cpg's approach requires fundamentally different parsing — we'd be rebuilding Joern. Tree-sitter gives us AST-level graphs; adding lightweight dataflow on top is the pragmatic path | +| Points-to analysis | Academic-grade JS analysis (jelly) — overkill for our use case and limited to JS/TS | +| Cloud-hosted graph service | mcp-code-graph (CodeGPT) requires accounts and cloud dependency — goes against our local-first philosophy | | CrewAI multi-agent | Overengineered for a code analysis tool. Keep the scope focused | | Clipboard/LLM-dump mode | Different product category (glimpse). We're a graph tool, not a context-packer | -| Cloud APIs for core features | We will add LLM provider support, but as an **optional enhancement layer** — the core graph must always work with zero API keys and zero cost. This is the opposite of code-graph-rag's approach where cloud APIs are required for core functionality | --- ## Irrelevant Repos (excluded from ranking) -These repos from the initial list were not code analysis / graph tools: +These repos from the search were not code analysis / graph tools: | Repo | What it actually is | |------|-------------------| @@ -176,4 +308,49 @@ These repos from the initial list were not code analysis / graph tools: | [shandianchengzi/tree_sitter_DataExtractor](https://github.com/shandianchengzi/tree_sitter_DataExtractor) | Academic research on program graph representations for GNNs | | [hasssanezzz/GoTypeGraph](https://github.com/hasssanezzz/GoTypeGraph) | Go-only struct/interface relationship visualizer | | [romiras/py-cmm-parser](https://github.com/romiras/py-cmm-parser) | Python-only canonical metadata parser with Pyright LSP | -| [OrkeeAI/orkee](https://github.com/OrkeeAI/orkee) | AI agent orchestration platform (CLI/TUI/Web/Desktop) — adjacent but different category | +| [OrkeeAI/orkee](https://github.com/OrkeeAI/orkee) | AI agent orchestration platform (CLI/TUI/Web/Desktop) | +| [google/binnavi](https://github.com/google/binnavi) | Binary analysis IDE for disassembled code (archived, requires IDA Pro) | +| [mhutchie/vscode-git-graph](https://github.com/mhutchie/vscode-git-graph) | VS Code extension for visualizing Git commit history as a graph | +| [utdemir/nix-tree](https://github.com/utdemir/nix-tree) | TUI for browsing Nix package dependency trees | +| [arunkumar9t2/scabbard](https://github.com/arunkumar9t2/scabbard) | Dagger 2 (Android DI framework) dependency graph visualizer | +| [iamvaibhavmehra/LangGraph-Course-freeCodeCamp](https://github.com/iamvaibhavmehra/LangGraph-Course-freeCodeCamp) | FreeCodeCamp course materials on LangGraph agent framework | +| [trailofbits/it-depends](https://github.com/trailofbits/it-depends) | Package dependency resolver and SBOM generator (supply chain, not source code) | +| [jriecken/dependency-graph](https://github.com/jriecken/dependency-graph) | Generic in-memory graph data structure library (npm package) | +| [Claudate/project-multilevel-index](https://github.com/Claudate/project-multilevel-index) | Fractal self-referential documentation system inspired by "Godel, Escher, Bach" | +| [23blocks-OS/ai-maestro](https://github.com/23blocks-OS/ai-maestro) | AI agent orchestrator/dashboard for managing multiple AI coding agents | +| [codemuse-app/codemuse](https://github.com/codemuse-app/codemuse) | SaaS VS Code extension for AI-powered code navigation (requires cloud account) | +| [microsoft/constrained-graph-variational-autoencoder](https://github.com/microsoft/constrained-graph-variational-autoencoder) | ML research: constrained graph variational autoencoders for molecule design | +| [tech-srl/Nero](https://github.com/tech-srl/Nero) | ML research: neural reverse engineering of stripped binaries | +| [tintinweb/vscode-interactive-graphviz](https://github.com/tintinweb/vscode-interactive-graphviz) | VS Code extension for interactive Graphviz DOT file preview (generic renderer) | +| [praeclarum/ShaderGraphCoder](https://github.com/praeclarum/ShaderGraphCoder) | RealityKit shader codegen for Swift (shader graphs, not code analysis) | +| [Vul-LMGNN/vul-LMGGNN](https://github.com/Vul-LMGNN/vul-LMGGNN) | ML research: vulnerability detection combining CodeBERT + GNN | +| [TeodorVecerdi/CodeGraph](https://github.com/TeodorVecerdi/CodeGraph) | Unity visual scripting tool for non-programmers (node-based, discontinued) | +| [OneGraph/graphiql-code-exporter](https://github.com/OneGraph/graphiql-code-exporter) | GraphiQL plugin to export GraphQL queries as code snippets | +| [joaompinto/vscode-graphviz](https://github.com/joaompinto/vscode-graphviz) | VS Code Graphviz DOT language support (syntax highlighting/preview) | +| [igor-elovikov/sd-sex](https://github.com/igor-elovikov/sd-sex) | Substance Designer plugin for creating function graphs from code (3D materials) | +| [HeYijia/GraphSLAM_tutorials_code](https://github.com/HeYijia/GraphSLAM_tutorials_code) | Tutorial code for GraphSLAM robotics algorithms | +| [iFindTA/NHGraphCoderPro](https://github.com/iFindTA/NHGraphCoderPro) | iOS CAPTCHA/verification code generator (Objective-C UI component) | +| [oceaneLIU/GraphCoder](https://github.com/oceaneLIU/GraphCoder) | ML research: graph-based code representation (paper code, no usable tool) | +| [omegacen/conda-depgraph](https://github.com/omegacen/conda-depgraph) | CLI to plot Conda environment dependency graphs (package manager, not source code) | +| [hosseinmoein/Lynx](https://github.com/hosseinmoein/Lynx) | Generic dependency graph library for scheduling/computation (data structure) | +| [deezer/gravity_graph_autoencoders](https://github.com/deezer/gravity_graph_autoencoders) | ML research: gravity-inspired graph autoencoders for link prediction | +| [erikw/taiga-stats](https://github.com/erikw/taiga-stats) | Statistics and burnup diagrams from Taiga project management | +| [ZhaoLiang-GitHub/KnowledgeGraph-code](https://github.com/ZhaoLiang-GitHub/KnowledgeGraph-code) | Knowledge graph project code and datasets (NLP/knowledge engineering) | +| [shenAlexy/GraphCodeView](https://github.com/shenAlexy/GraphCodeView) | iOS captcha/verification code generator using drawRect | +| [spacestation13/SS13-Codebases](https://github.com/spacestation13/SS13-Codebases) | Family tree diagram of Space Station 13 game forks | +| [jbpt/codebase](https://github.com/jbpt/codebase) | Business process analysis library (BPMN/Petri nets) | +| [Stephanvs/vscode-graphviz](https://github.com/Stephanvs/vscode-graphviz) | VS Code Graphviz DOT language syntax support | +| [ZephyrTan/low-code-flow-graph](https://github.com/ZephyrTan/low-code-flow-graph) | Low-code drag-and-drop flow/process designer UI component | +| [Bowen-n/MCBG](https://github.com/Bowen-n/MCBG) | Malware classification via control flow graphs (ML/security research) | +| [wonjunior/CodeGraph](https://github.com/wonjunior/CodeGraph) | Node-based visual programming tool (not code analysis) | +| [DumBringer/CoDeGraph](https://github.com/DumBringer/CoDeGraph) | Industrial anomaly detection/segmentation via zero-shot learning (computer vision) | +| [vul337/Callee](https://github.com/vul337/Callee) | ML research: recovering binary call graphs with transfer learning | +| [MalwareCantFly/Vba2Graph](https://github.com/MalwareCantFly/Vba2Graph) | VBA macro malware analysis call graph tool for security researchers | +| [Ramos-dev/graph4code](https://github.com/Ramos-dev/graph4code) | Tutorial/demo: ASTs in Neo4j for vulnerability tracing (educational, not a tool) | +| [microsoft/graph-based-code-modelling](https://github.com/microsoft/graph-based-code-modelling) | ML research: generative code modeling with program graphs (ICLR papers) | +| [ShiftLeftSecurity/llvm2cpg](https://github.com/ShiftLeftSecurity/llvm2cpg) | LLVM bitcode → Code Property Graph converter for Joern (binary/IR bridge) | +| [joernio/ghidra2cpg](https://github.com/joernio/ghidra2cpg) | Ghidra binary decompilation → CPG frontend for Joern | +| [rocq-community/coq-dpdgraph](https://github.com/rocq-community/coq-dpdgraph) | Coq/Rocq theorem prover dependency graph plugin for proof objects | +| [rewindio/dagwood](https://github.com/rewindio/dagwood) | Generic Ruby dependency graph library (data structure) | +| [rust-secure-code/cargo-supply-chain](https://github.com/rust-secure-code/cargo-supply-chain) | Cargo subcommand for auditing who published your Rust dependencies (supply chain people, not code) | +| [cyrus-and/chrome-page-graph](https://github.com/cyrus-and/chrome-page-graph) | Chrome extension for web page asset dependency graphs | diff --git a/crates/codegraph-core/src/import_resolution.rs b/crates/codegraph-core/src/import_resolution.rs index 89157e6e..7fa53a84 100644 --- a/crates/codegraph-core/src/import_resolution.rs +++ b/crates/codegraph-core/src/import_resolution.rs @@ -2,9 +2,11 @@ use std::path::{Path, PathBuf}; use crate::types::{AliasMapping, ImportResolutionInput, PathAliases, ResolvedImport}; -/// Normalize a path to use forward slashes (cross-platform consistency). +/// Normalize a path to use forward slashes and clean `.` / `..` segments +/// (cross-platform consistency). fn normalize_path(p: &str) -> String { - p.replace('\\', "/") + let cleaned: PathBuf = Path::new(p).components().collect(); + cleaned.display().to_string().replace('\\', "/") } /// Try resolving via path aliases (tsconfig/jsconfig paths). @@ -69,10 +71,10 @@ pub fn resolve_import_path( return import_source.to_string(); } - // Relative import + // Relative import — normalize immediately to remove `.` / `..` segments let dir = Path::new(from_file).parent().unwrap_or(Path::new("")); - let resolved = dir.join(import_source); - let resolved_str = resolved.display().to_string(); + let resolved: PathBuf = dir.join(import_source).components().collect(); + let resolved_str = resolved.display().to_string().replace('\\', "/"); // .js → .ts remap if resolved_str.ends_with(".js") { diff --git a/docs/recommended-practices.md b/docs/recommended-practices.md index 1b94f7b9..bced1ad6 100644 --- a/docs/recommended-practices.md +++ b/docs/recommended-practices.md @@ -197,14 +197,29 @@ You can configure [Claude Code hooks](https://docs.anthropic.com/en/docs/claude- "PostToolUse": [ { "matcher": "Edit|Write", - "command": "codegraph build --incremental" + "hooks": [ + { + "type": "command", + "command": "codegraph build", + "timeout": 30 + } + ] } ] } } ``` -This ensures the graph stays fresh as the AI agent modifies files. +This ensures the graph stays fresh as the AI agent modifies files. Incremental builds are automatic — only changed files are re-parsed. + +#### Parallel session safety hooks + +When multiple AI agents work on the same repo concurrently, add hooks to prevent cross-session interference: + +- **Edit tracker** (PostToolUse on Edit|Write): log every file path touched to `.claude/session-edits.log` +- **Git guard** (PreToolUse on Bash): block `git add .`, `git reset`, `git restore`, `git clean`, `git stash`, and validate that `git commit` only includes files from the session edit log + +See this repo's `.claude/hooks/track-edits.sh` and `guard-git.sh` for a working implementation. Pair with the `/worktree` command so each session gets an isolated copy of the repo. --- diff --git a/src/builder.js b/src/builder.js index 31e0eeea..c25c6f7b 100644 --- a/src/builder.js +++ b/src/builder.js @@ -1,5 +1,6 @@ import { createHash } from 'node:crypto'; import fs from 'node:fs'; +import os from 'node:os'; import path from 'node:path'; import { loadConfig } from './config.js'; import { EXTENSIONS, IGNORE_DIRS, normalizePath } from './constants.js'; @@ -581,10 +582,18 @@ export async function buildGraph(rootDir, opts = {}) { console.log(`Stored in ${dbPath}`); db.close(); - try { - const { registerRepo } = await import('./registry.js'); - registerRepo(rootDir); - } catch (err) { - debug(`Auto-registration failed: ${err.message}`); + if (!opts.skipRegistry) { + const tmpDir = path.resolve(os.tmpdir()); + const resolvedRoot = path.resolve(rootDir); + if (resolvedRoot.startsWith(tmpDir)) { + debug(`Skipping auto-registration for temp directory: ${resolvedRoot}`); + } else { + try { + const { registerRepo } = await import('./registry.js'); + registerRepo(rootDir); + } catch (err) { + debug(`Auto-registration failed: ${err.message}`); + } + } } } diff --git a/src/cli.js b/src/cli.js index e1868d0f..d26fb24c 100644 --- a/src/cli.js +++ b/src/cli.js @@ -18,6 +18,7 @@ import { impactAnalysis, moduleMap, queryName, + stats, } from './queries.js'; import { listRepos, @@ -28,11 +29,14 @@ import { } from './registry.js'; import { watchProject } from './watcher.js'; +const __cliDir = path.dirname(new URL(import.meta.url).pathname.replace(/^\/([A-Z]:)/i, '$1')); +const pkg = JSON.parse(fs.readFileSync(path.join(__cliDir, '..', 'package.json'), 'utf-8')); + const program = new Command(); program .name('codegraph') .description('Local code dependency graph tool') - .version('1.3.0') + .version(pkg.version) .option('-v, --verbose', 'Enable verbose/debug output') .option('--engine ', 'Parser engine: native, wasm, or auto (default: auto)', 'auto') .hook('preAction', (thisCommand) => { @@ -78,6 +82,15 @@ program moduleMap(opts.db, parseInt(opts.limit, 10), { json: opts.json }); }); +program + .command('stats') + .description('Show graph health overview: nodes, edges, languages, cycles, hotspots, embeddings') + .option('-d, --db ', 'Path to graph.db') + .option('-j, --json', 'Output as JSON') + .action((opts) => { + stats(opts.db, { json: opts.json }); + }); + program .command('deps ') .description('Show what this file imports and what imports it') @@ -214,6 +227,7 @@ registry .description('List all registered repositories') .option('-j, --json', 'Output as JSON') .action((opts) => { + pruneRegistry(); const repos = listRepos(); if (opts.json) { console.log(JSON.stringify(repos, null, 2)); @@ -257,14 +271,16 @@ registry registry .command('prune') - .description('Remove registry entries whose directories no longer exist') - .action(() => { - const pruned = pruneRegistry(); + .description('Remove stale registry entries (missing directories or idle beyond TTL)') + .option('--ttl ', 'Days of inactivity before pruning (default: 30)', '30') + .action((opts) => { + const pruned = pruneRegistry(undefined, parseInt(opts.ttl, 10)); if (pruned.length === 0) { console.log('No stale entries found.'); } else { for (const entry of pruned) { - console.log(`Pruned "${entry.name}" (${entry.path})`); + const tag = entry.reason === 'expired' ? 'expired' : 'missing'; + console.log(`Pruned "${entry.name}" (${entry.path}) [${tag}]`); } console.log(`\nRemoved ${pruned.length} stale ${pruned.length === 1 ? 'entry' : 'entries'}.`); } @@ -278,7 +294,7 @@ program .action(() => { console.log('\nAvailable embedding models:\n'); for (const [key, config] of Object.entries(MODELS)) { - const def = key === 'minilm' ? ' (default)' : ''; + const def = key === 'jina-code' ? ' (default)' : ''; console.log(` ${key.padEnd(12)} ${String(config.dim).padStart(4)}d ${config.desc}${def}`); } console.log('\nUsage: codegraph embed --model '); @@ -292,8 +308,8 @@ program ) .option( '-m, --model ', - 'Embedding model: minilm (default), jina-small, jina-base, jina-code, nomic, nomic-v1.5, bge-large. Run `codegraph models` for details', - 'minilm', + 'Embedding model: minilm, jina-small, jina-base, jina-code (default), nomic, nomic-v1.5, bge-large. Run `codegraph models` for details', + 'jina-code', ) .action(async (dir, opts) => { const root = path.resolve(dir || '.'); diff --git a/src/config.js b/src/config.js index 5294d25e..33764c1f 100644 --- a/src/config.js +++ b/src/config.js @@ -19,7 +19,7 @@ export const DEFAULTS = { defaultDepth: 3, defaultLimit: 20, }, - embeddings: { model: 'minilm', llmProvider: null }, + embeddings: { model: 'jina-code', llmProvider: null }, llm: { provider: null, model: null, baseUrl: null, apiKey: null, apiKeyCommand: null }, search: { defaultMinScore: 0.2, rrfK: 60, topK: 15 }, ci: { failOnCycles: false, impactThreshold: null }, diff --git a/src/embedder.js b/src/embedder.js index 847902ca..016fe4b4 100644 --- a/src/embedder.js +++ b/src/embedder.js @@ -55,7 +55,7 @@ export const MODELS = { }, }; -export const DEFAULT_MODEL = 'minilm'; +export const DEFAULT_MODEL = 'jina-code'; const BATCH_SIZE_MAP = { minilm: 32, 'jina-small': 16, @@ -173,10 +173,10 @@ function initEmbeddingsSchema(db) { /** * Build embeddings for all functions/methods/classes in the graph. */ -export async function buildEmbeddings(rootDir, modelKey) { +export async function buildEmbeddings(rootDir, modelKey, customDbPath) { // path already imported at top // fs already imported at top - const dbPath = findDbPath(null); + const dbPath = customDbPath || findDbPath(null); const db = new Database(dbPath); initEmbeddingsSchema(db); diff --git a/src/extractors/csharp.js b/src/extractors/csharp.js new file mode 100644 index 00000000..bacb9f75 --- /dev/null +++ b/src/extractors/csharp.js @@ -0,0 +1,243 @@ +import { findChild, nodeEndLine } from './helpers.js'; + +/** + * Extract symbols from C# files. + */ +export function extractCSharpSymbols(tree, _filePath) { + const definitions = []; + const calls = []; + const imports = []; + const classes = []; + const exports = []; + + function findCSharpParentType(node) { + let current = node.parent; + while (current) { + if ( + current.type === 'class_declaration' || + current.type === 'struct_declaration' || + current.type === 'interface_declaration' || + current.type === 'enum_declaration' || + current.type === 'record_declaration' + ) { + const nameNode = current.childForFieldName('name'); + return nameNode ? nameNode.text : null; + } + current = current.parent; + } + return null; + } + + function walkCSharpNode(node) { + switch (node.type) { + case 'class_declaration': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + definitions.push({ + name: nameNode.text, + kind: 'class', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + extractCSharpBaseTypes(node, nameNode.text, classes); + } + break; + } + + case 'struct_declaration': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + definitions.push({ + name: nameNode.text, + kind: 'struct', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + extractCSharpBaseTypes(node, nameNode.text, classes); + } + break; + } + + case 'record_declaration': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + definitions.push({ + name: nameNode.text, + kind: 'record', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + extractCSharpBaseTypes(node, nameNode.text, classes); + } + break; + } + + case 'interface_declaration': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + definitions.push({ + name: nameNode.text, + kind: 'interface', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + const body = node.childForFieldName('body'); + if (body) { + for (let i = 0; i < body.childCount; i++) { + const child = body.child(i); + if (child && child.type === 'method_declaration') { + const methName = child.childForFieldName('name'); + if (methName) { + definitions.push({ + name: `${nameNode.text}.${methName.text}`, + kind: 'method', + line: child.startPosition.row + 1, + endLine: child.endPosition.row + 1, + }); + } + } + } + } + } + break; + } + + case 'enum_declaration': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + definitions.push({ + name: nameNode.text, + kind: 'enum', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } + break; + } + + case 'method_declaration': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + const parentType = findCSharpParentType(node); + const fullName = parentType ? `${parentType}.${nameNode.text}` : nameNode.text; + definitions.push({ + name: fullName, + kind: 'method', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } + break; + } + + case 'constructor_declaration': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + const parentType = findCSharpParentType(node); + const fullName = parentType ? `${parentType}.${nameNode.text}` : nameNode.text; + definitions.push({ + name: fullName, + kind: 'method', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } + break; + } + + case 'property_declaration': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + const parentType = findCSharpParentType(node); + const fullName = parentType ? `${parentType}.${nameNode.text}` : nameNode.text; + definitions.push({ + name: fullName, + kind: 'method', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } + break; + } + + case 'using_directive': { + // using System.Collections.Generic; + const nameNode = + node.childForFieldName('name') || + findChild(node, 'qualified_name') || + findChild(node, 'identifier'); + if (nameNode) { + const fullPath = nameNode.text; + const lastName = fullPath.split('.').pop(); + imports.push({ + source: fullPath, + names: [lastName], + line: node.startPosition.row + 1, + csharpUsing: true, + }); + } + break; + } + + case 'invocation_expression': { + const fn = node.childForFieldName('function') || node.child(0); + if (fn) { + if (fn.type === 'identifier') { + calls.push({ name: fn.text, line: node.startPosition.row + 1 }); + } else if (fn.type === 'member_access_expression') { + const name = fn.childForFieldName('name'); + if (name) calls.push({ name: name.text, line: node.startPosition.row + 1 }); + } else if (fn.type === 'generic_name' || fn.type === 'member_binding_expression') { + const name = fn.childForFieldName('name') || fn.child(0); + if (name) calls.push({ name: name.text, line: node.startPosition.row + 1 }); + } + } + break; + } + + case 'object_creation_expression': { + const typeNode = node.childForFieldName('type'); + if (typeNode) { + const typeName = + typeNode.type === 'generic_name' + ? typeNode.childForFieldName('name')?.text || typeNode.child(0)?.text + : typeNode.text; + if (typeName) calls.push({ name: typeName, line: node.startPosition.row + 1 }); + } + break; + } + } + + for (let i = 0; i < node.childCount; i++) walkCSharpNode(node.child(i)); + } + + walkCSharpNode(tree.rootNode); + return { definitions, calls, imports, classes, exports }; +} + +function extractCSharpBaseTypes(node, className, classes) { + const baseList = node.childForFieldName('bases'); + if (!baseList) return; + for (let i = 0; i < baseList.childCount; i++) { + const child = baseList.child(i); + if (!child) continue; + if (child.type === 'identifier' || child.type === 'qualified_name') { + classes.push({ name: className, extends: child.text, line: node.startPosition.row + 1 }); + } else if (child.type === 'generic_name') { + const name = child.childForFieldName('name') || child.child(0); + if (name) + classes.push({ name: className, extends: name.text, line: node.startPosition.row + 1 }); + } else if (child.type === 'base_list') { + for (let j = 0; j < child.childCount; j++) { + const base = child.child(j); + if (base && (base.type === 'identifier' || base.type === 'qualified_name')) { + classes.push({ name: className, extends: base.text, line: node.startPosition.row + 1 }); + } else if (base && base.type === 'generic_name') { + const name = base.childForFieldName('name') || base.child(0); + if (name) + classes.push({ name: className, extends: name.text, line: node.startPosition.row + 1 }); + } + } + } + } +} diff --git a/src/extractors/go.js b/src/extractors/go.js new file mode 100644 index 00000000..767f46fa --- /dev/null +++ b/src/extractors/go.js @@ -0,0 +1,167 @@ +import { nodeEndLine } from './helpers.js'; + +/** + * Extract symbols from Go files. + */ +export function extractGoSymbols(tree, _filePath) { + const definitions = []; + const calls = []; + const imports = []; + const classes = []; + const exports = []; + + function walkGoNode(node) { + switch (node.type) { + case 'function_declaration': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + definitions.push({ + name: nameNode.text, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } + break; + } + + case 'method_declaration': { + const nameNode = node.childForFieldName('name'); + const receiver = node.childForFieldName('receiver'); + if (nameNode) { + let receiverType = null; + if (receiver) { + // receiver is a parameter_list like (r *Foo) or (r Foo) + for (let i = 0; i < receiver.childCount; i++) { + const param = receiver.child(i); + if (!param) continue; + const typeNode = param.childForFieldName('type'); + if (typeNode) { + receiverType = + typeNode.type === 'pointer_type' + ? typeNode.text.replace(/^\*/, '') + : typeNode.text; + break; + } + } + } + const fullName = receiverType ? `${receiverType}.${nameNode.text}` : nameNode.text; + definitions.push({ + name: fullName, + kind: 'method', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } + break; + } + + case 'type_declaration': { + for (let i = 0; i < node.childCount; i++) { + const spec = node.child(i); + if (!spec || spec.type !== 'type_spec') continue; + const nameNode = spec.childForFieldName('name'); + const typeNode = spec.childForFieldName('type'); + if (nameNode && typeNode) { + if (typeNode.type === 'struct_type') { + definitions.push({ + name: nameNode.text, + kind: 'struct', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } else if (typeNode.type === 'interface_type') { + definitions.push({ + name: nameNode.text, + kind: 'interface', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + for (let j = 0; j < typeNode.childCount; j++) { + const member = typeNode.child(j); + if (member && member.type === 'method_elem') { + const methName = member.childForFieldName('name'); + if (methName) { + definitions.push({ + name: `${nameNode.text}.${methName.text}`, + kind: 'method', + line: member.startPosition.row + 1, + endLine: member.endPosition.row + 1, + }); + } + } + } + } else { + definitions.push({ + name: nameNode.text, + kind: 'type', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } + } + } + break; + } + + case 'import_declaration': { + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child) continue; + if (child.type === 'import_spec') { + const pathNode = child.childForFieldName('path'); + if (pathNode) { + const importPath = pathNode.text.replace(/"/g, ''); + const nameNode = child.childForFieldName('name'); + const alias = nameNode ? nameNode.text : importPath.split('/').pop(); + imports.push({ + source: importPath, + names: [alias], + line: child.startPosition.row + 1, + goImport: true, + }); + } + } + if (child.type === 'import_spec_list') { + for (let j = 0; j < child.childCount; j++) { + const spec = child.child(j); + if (spec && spec.type === 'import_spec') { + const pathNode = spec.childForFieldName('path'); + if (pathNode) { + const importPath = pathNode.text.replace(/"/g, ''); + const nameNode = spec.childForFieldName('name'); + const alias = nameNode ? nameNode.text : importPath.split('/').pop(); + imports.push({ + source: importPath, + names: [alias], + line: spec.startPosition.row + 1, + goImport: true, + }); + } + } + } + } + } + break; + } + + case 'call_expression': { + const fn = node.childForFieldName('function'); + if (fn) { + if (fn.type === 'identifier') { + calls.push({ name: fn.text, line: node.startPosition.row + 1 }); + } else if (fn.type === 'selector_expression') { + const field = fn.childForFieldName('field'); + if (field) calls.push({ name: field.text, line: node.startPosition.row + 1 }); + } + } + break; + } + } + + for (let i = 0; i < node.childCount; i++) walkGoNode(node.child(i)); + } + + walkGoNode(tree.rootNode); + return { definitions, calls, imports, classes, exports }; +} diff --git a/src/extractors/hcl.js b/src/extractors/hcl.js new file mode 100644 index 00000000..4df5af4d --- /dev/null +++ b/src/extractors/hcl.js @@ -0,0 +1,73 @@ +import { nodeEndLine } from './helpers.js'; + +/** + * Extract symbols from HCL (Terraform) files. + */ +export function extractHCLSymbols(tree, _filePath) { + const definitions = []; + const imports = []; + + function walkHclNode(node) { + if (node.type === 'block') { + const children = []; + for (let i = 0; i < node.childCount; i++) children.push(node.child(i)); + + const identifiers = children.filter((c) => c.type === 'identifier'); + const strings = children.filter((c) => c.type === 'string_lit'); + + if (identifiers.length > 0) { + const blockType = identifiers[0].text; + let name = ''; + + if (blockType === 'resource' && strings.length >= 2) { + name = `${strings[0].text.replace(/"/g, '')}.${strings[1].text.replace(/"/g, '')}`; + } else if (blockType === 'data' && strings.length >= 2) { + name = `data.${strings[0].text.replace(/"/g, '')}.${strings[1].text.replace(/"/g, '')}`; + } else if ( + (blockType === 'variable' || blockType === 'output' || blockType === 'module') && + strings.length >= 1 + ) { + name = `${blockType}.${strings[0].text.replace(/"/g, '')}`; + } else if (blockType === 'locals') { + name = 'locals'; + } else if (blockType === 'terraform' || blockType === 'provider') { + name = blockType; + if (strings.length >= 1) name += `.${strings[0].text.replace(/"/g, '')}`; + } + + if (name) { + definitions.push({ + name, + kind: blockType, + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } + + if (blockType === 'module') { + const body = children.find((c) => c.type === 'body'); + if (body) { + for (let i = 0; i < body.childCount; i++) { + const attr = body.child(i); + if (attr && attr.type === 'attribute') { + const key = attr.childForFieldName('key') || attr.child(0); + const val = attr.childForFieldName('val') || attr.child(2); + if (key && key.text === 'source' && val) { + const src = val.text.replace(/"/g, ''); + if (src.startsWith('./') || src.startsWith('../')) { + imports.push({ source: src, names: [], line: attr.startPosition.row + 1 }); + } + } + } + } + } + } + } + } + + for (let i = 0; i < node.childCount; i++) walkHclNode(node.child(i)); + } + + walkHclNode(tree.rootNode); + return { definitions, calls: [], imports, classes: [], exports: [] }; +} diff --git a/src/extractors/helpers.js b/src/extractors/helpers.js new file mode 100644 index 00000000..34fb6900 --- /dev/null +++ b/src/extractors/helpers.js @@ -0,0 +1,10 @@ +export function nodeEndLine(node) { + return node.endPosition.row + 1; +} + +export function findChild(node, type) { + for (let i = 0; i < node.childCount; i++) { + if (node.child(i).type === type) return node.child(i); + } + return null; +} diff --git a/src/extractors/index.js b/src/extractors/index.js new file mode 100644 index 00000000..4d26db5c --- /dev/null +++ b/src/extractors/index.js @@ -0,0 +1,9 @@ +export { extractCSharpSymbols } from './csharp.js'; +export { extractGoSymbols } from './go.js'; +export { extractHCLSymbols } from './hcl.js'; +export { extractJavaSymbols } from './java.js'; +export { extractSymbols } from './javascript.js'; +export { extractPHPSymbols } from './php.js'; +export { extractPythonSymbols } from './python.js'; +export { extractRubySymbols } from './ruby.js'; +export { extractRustSymbols } from './rust.js'; diff --git a/src/extractors/java.js b/src/extractors/java.js new file mode 100644 index 00000000..b75caf48 --- /dev/null +++ b/src/extractors/java.js @@ -0,0 +1,227 @@ +import { nodeEndLine } from './helpers.js'; + +/** + * Extract symbols from Java files. + */ +export function extractJavaSymbols(tree, _filePath) { + const definitions = []; + const calls = []; + const imports = []; + const classes = []; + const exports = []; + + function findJavaParentClass(node) { + let current = node.parent; + while (current) { + if ( + current.type === 'class_declaration' || + current.type === 'enum_declaration' || + current.type === 'interface_declaration' + ) { + const nameNode = current.childForFieldName('name'); + return nameNode ? nameNode.text : null; + } + current = current.parent; + } + return null; + } + + function walkJavaNode(node) { + switch (node.type) { + case 'class_declaration': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + definitions.push({ + name: nameNode.text, + kind: 'class', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + + const superclass = node.childForFieldName('superclass'); + if (superclass) { + for (let i = 0; i < superclass.childCount; i++) { + const child = superclass.child(i); + if ( + child && + (child.type === 'type_identifier' || + child.type === 'identifier' || + child.type === 'generic_type') + ) { + const superName = child.type === 'generic_type' ? child.child(0)?.text : child.text; + if (superName) + classes.push({ + name: nameNode.text, + extends: superName, + line: node.startPosition.row + 1, + }); + break; + } + } + } + + const interfaces = node.childForFieldName('interfaces'); + if (interfaces) { + for (let i = 0; i < interfaces.childCount; i++) { + const child = interfaces.child(i); + if ( + child && + (child.type === 'type_identifier' || + child.type === 'identifier' || + child.type === 'type_list' || + child.type === 'generic_type') + ) { + if (child.type === 'type_list') { + for (let j = 0; j < child.childCount; j++) { + const t = child.child(j); + if ( + t && + (t.type === 'type_identifier' || + t.type === 'identifier' || + t.type === 'generic_type') + ) { + const ifaceName = t.type === 'generic_type' ? t.child(0)?.text : t.text; + if (ifaceName) + classes.push({ + name: nameNode.text, + implements: ifaceName, + line: node.startPosition.row + 1, + }); + } + } + } else { + const ifaceName = + child.type === 'generic_type' ? child.child(0)?.text : child.text; + if (ifaceName) + classes.push({ + name: nameNode.text, + implements: ifaceName, + line: node.startPosition.row + 1, + }); + } + } + } + } + } + break; + } + + case 'interface_declaration': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + definitions.push({ + name: nameNode.text, + kind: 'interface', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + const body = node.childForFieldName('body'); + if (body) { + for (let i = 0; i < body.childCount; i++) { + const child = body.child(i); + if (child && child.type === 'method_declaration') { + const methName = child.childForFieldName('name'); + if (methName) { + definitions.push({ + name: `${nameNode.text}.${methName.text}`, + kind: 'method', + line: child.startPosition.row + 1, + endLine: child.endPosition.row + 1, + }); + } + } + } + } + } + break; + } + + case 'enum_declaration': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + definitions.push({ + name: nameNode.text, + kind: 'enum', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } + break; + } + + case 'method_declaration': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + const parentClass = findJavaParentClass(node); + const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + definitions.push({ + name: fullName, + kind: 'method', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } + break; + } + + case 'constructor_declaration': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + const parentClass = findJavaParentClass(node); + const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + definitions.push({ + name: fullName, + kind: 'method', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } + break; + } + + case 'import_declaration': { + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child && (child.type === 'scoped_identifier' || child.type === 'identifier')) { + const fullPath = child.text; + const lastName = fullPath.split('.').pop(); + imports.push({ + source: fullPath, + names: [lastName], + line: node.startPosition.row + 1, + javaImport: true, + }); + } + if (child && child.type === 'asterisk') { + const lastImport = imports[imports.length - 1]; + if (lastImport) lastImport.names = ['*']; + } + } + break; + } + + case 'method_invocation': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + calls.push({ name: nameNode.text, line: node.startPosition.row + 1 }); + } + break; + } + + case 'object_creation_expression': { + const typeNode = node.childForFieldName('type'); + if (typeNode) { + const typeName = + typeNode.type === 'generic_type' ? typeNode.child(0)?.text : typeNode.text; + if (typeName) calls.push({ name: typeName, line: node.startPosition.row + 1 }); + } + break; + } + } + + for (let i = 0; i < node.childCount; i++) walkJavaNode(node.child(i)); + } + + walkJavaNode(tree.rootNode); + return { definitions, calls, imports, classes, exports }; +} diff --git a/src/extractors/javascript.js b/src/extractors/javascript.js new file mode 100644 index 00000000..c275bad2 --- /dev/null +++ b/src/extractors/javascript.js @@ -0,0 +1,396 @@ +import { findChild, nodeEndLine } from './helpers.js'; + +/** + * Extract symbols from a JS/TS parsed AST. + */ +export function extractSymbols(tree, _filePath) { + const definitions = []; + const calls = []; + const imports = []; + const classes = []; + const exports = []; + + function walkJavaScriptNode(node) { + switch (node.type) { + case 'function_declaration': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + definitions.push({ + name: nameNode.text, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } + break; + } + + case 'class_declaration': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + const cls = { + name: nameNode.text, + kind: 'class', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }; + definitions.push(cls); + const heritage = node.childForFieldName('heritage') || findChild(node, 'class_heritage'); + if (heritage) { + const superName = extractSuperclass(heritage); + if (superName) { + classes.push({ + name: nameNode.text, + extends: superName, + line: node.startPosition.row + 1, + }); + } + const implementsList = extractImplements(heritage); + for (const iface of implementsList) { + classes.push({ + name: nameNode.text, + implements: iface, + line: node.startPosition.row + 1, + }); + } + } + } + break; + } + + case 'method_definition': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + const parentClass = findParentClass(node); + const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + definitions.push({ + name: fullName, + kind: 'method', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } + break; + } + + case 'interface_declaration': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + definitions.push({ + name: nameNode.text, + kind: 'interface', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + const body = + node.childForFieldName('body') || + findChild(node, 'interface_body') || + findChild(node, 'object_type'); + if (body) { + extractInterfaceMethods(body, nameNode.text, definitions); + } + } + break; + } + + case 'type_alias_declaration': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + definitions.push({ + name: nameNode.text, + kind: 'type', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } + break; + } + + case 'lexical_declaration': + case 'variable_declaration': { + for (let i = 0; i < node.childCount; i++) { + const declarator = node.child(i); + if (declarator && declarator.type === 'variable_declarator') { + const nameN = declarator.childForFieldName('name'); + const valueN = declarator.childForFieldName('value'); + if ( + nameN && + valueN && + (valueN.type === 'arrow_function' || + valueN.type === 'function_expression' || + valueN.type === 'function') + ) { + definitions.push({ + name: nameN.text, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(valueN), + }); + } + } + } + break; + } + + case 'call_expression': { + const fn = node.childForFieldName('function'); + if (fn) { + const callInfo = extractCallInfo(fn, node); + if (callInfo) { + calls.push(callInfo); + } + } + break; + } + + case 'import_statement': { + const isTypeOnly = node.text.startsWith('import type'); + const source = node.childForFieldName('source') || findChild(node, 'string'); + if (source) { + const modPath = source.text.replace(/['"]/g, ''); + const names = extractImportNames(node); + imports.push({ + source: modPath, + names, + line: node.startPosition.row + 1, + typeOnly: isTypeOnly, + }); + } + break; + } + + case 'export_statement': { + const decl = node.childForFieldName('declaration'); + if (decl) { + if (decl.type === 'function_declaration') { + const n = decl.childForFieldName('name'); + if (n) + exports.push({ name: n.text, kind: 'function', line: node.startPosition.row + 1 }); + } else if (decl.type === 'class_declaration') { + const n = decl.childForFieldName('name'); + if (n) exports.push({ name: n.text, kind: 'class', line: node.startPosition.row + 1 }); + } else if (decl.type === 'interface_declaration') { + const n = decl.childForFieldName('name'); + if (n) + exports.push({ name: n.text, kind: 'interface', line: node.startPosition.row + 1 }); + } else if (decl.type === 'type_alias_declaration') { + const n = decl.childForFieldName('name'); + if (n) exports.push({ name: n.text, kind: 'type', line: node.startPosition.row + 1 }); + } + } + const source = node.childForFieldName('source') || findChild(node, 'string'); + if (source && !decl) { + const modPath = source.text.replace(/['"]/g, ''); + const reexportNames = extractImportNames(node); + const isWildcard = node.text.includes('export *') || node.text.includes('export*'); + imports.push({ + source: modPath, + names: reexportNames, + line: node.startPosition.row + 1, + reexport: true, + wildcardReexport: isWildcard && reexportNames.length === 0, + }); + } + break; + } + + case 'expression_statement': { + const expr = node.child(0); + if (expr && expr.type === 'assignment_expression') { + const left = expr.childForFieldName('left'); + const right = expr.childForFieldName('right'); + if (left && right) { + const leftText = left.text; + if (leftText.startsWith('module.exports') || leftText === 'exports') { + if (right.type === 'call_expression') { + const fn = right.childForFieldName('function'); + const args = right.childForFieldName('arguments') || findChild(right, 'arguments'); + if (fn && fn.text === 'require' && args) { + const strArg = findChild(args, 'string'); + if (strArg) { + const modPath = strArg.text.replace(/['"]/g, ''); + imports.push({ + source: modPath, + names: [], + line: node.startPosition.row + 1, + reexport: true, + wildcardReexport: true, + }); + } + } + } + if (right.type === 'object') { + for (let ci = 0; ci < right.childCount; ci++) { + const child = right.child(ci); + if (child && child.type === 'spread_element') { + const spreadExpr = child.child(1) || child.childForFieldName('value'); + if (spreadExpr && spreadExpr.type === 'call_expression') { + const fn2 = spreadExpr.childForFieldName('function'); + const args2 = + spreadExpr.childForFieldName('arguments') || + findChild(spreadExpr, 'arguments'); + if (fn2 && fn2.text === 'require' && args2) { + const strArg2 = findChild(args2, 'string'); + if (strArg2) { + const modPath2 = strArg2.text.replace(/['"]/g, ''); + imports.push({ + source: modPath2, + names: [], + line: node.startPosition.row + 1, + reexport: true, + wildcardReexport: true, + }); + } + } + } + } + } + } + } + } + } + break; + } + } + + for (let i = 0; i < node.childCount; i++) { + walkJavaScriptNode(node.child(i)); + } + } + + walkJavaScriptNode(tree.rootNode); + return { definitions, calls, imports, classes, exports }; +} + +function extractInterfaceMethods(bodyNode, interfaceName, definitions) { + for (let i = 0; i < bodyNode.childCount; i++) { + const child = bodyNode.child(i); + if (!child) continue; + if (child.type === 'method_signature' || child.type === 'property_signature') { + const nameNode = child.childForFieldName('name'); + if (nameNode) { + definitions.push({ + name: `${interfaceName}.${nameNode.text}`, + kind: 'method', + line: child.startPosition.row + 1, + endLine: child.endPosition.row + 1, + }); + } + } + } +} + +function extractImplements(heritage) { + const interfaces = []; + for (let i = 0; i < heritage.childCount; i++) { + const child = heritage.child(i); + if (!child) continue; + if (child.text === 'implements') { + for (let j = i + 1; j < heritage.childCount; j++) { + const next = heritage.child(j); + if (!next) continue; + if (next.type === 'identifier') interfaces.push(next.text); + else if (next.type === 'type_identifier') interfaces.push(next.text); + if (next.childCount > 0) interfaces.push(...extractImplementsFromNode(next)); + } + break; + } + if (child.type === 'implements_clause') { + interfaces.push(...extractImplementsFromNode(child)); + } + } + return interfaces; +} + +function extractImplementsFromNode(node) { + const result = []; + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child) continue; + if (child.type === 'identifier' || child.type === 'type_identifier') result.push(child.text); + if (child.childCount > 0) result.push(...extractImplementsFromNode(child)); + } + return result; +} + +function extractCallInfo(fn, callNode) { + if (fn.type === 'identifier') { + return { name: fn.text, line: callNode.startPosition.row + 1 }; + } + + if (fn.type === 'member_expression') { + const obj = fn.childForFieldName('object'); + const prop = fn.childForFieldName('property'); + if (!prop) return null; + + if (prop.text === 'call' || prop.text === 'apply' || prop.text === 'bind') { + if (obj && obj.type === 'identifier') + return { name: obj.text, line: callNode.startPosition.row + 1, dynamic: true }; + if (obj && obj.type === 'member_expression') { + const innerProp = obj.childForFieldName('property'); + if (innerProp) + return { name: innerProp.text, line: callNode.startPosition.row + 1, dynamic: true }; + } + } + + if (prop.type === 'string' || prop.type === 'string_fragment') { + const methodName = prop.text.replace(/['"]/g, ''); + if (methodName) + return { name: methodName, line: callNode.startPosition.row + 1, dynamic: true }; + } + + return { name: prop.text, line: callNode.startPosition.row + 1 }; + } + + if (fn.type === 'subscript_expression') { + const index = fn.childForFieldName('index'); + if (index && (index.type === 'string' || index.type === 'template_string')) { + const methodName = index.text.replace(/['"`]/g, ''); + if (methodName && !methodName.includes('$')) + return { name: methodName, line: callNode.startPosition.row + 1, dynamic: true }; + } + } + + return null; +} + +function extractSuperclass(heritage) { + for (let i = 0; i < heritage.childCount; i++) { + const child = heritage.child(i); + if (child.type === 'identifier') return child.text; + if (child.type === 'member_expression') return child.text; + const found = extractSuperclass(child); + if (found) return found; + } + return null; +} + +function findParentClass(node) { + let current = node.parent; + while (current) { + if (current.type === 'class_declaration' || current.type === 'class') { + const nameNode = current.childForFieldName('name'); + return nameNode ? nameNode.text : null; + } + current = current.parent; + } + return null; +} + +function extractImportNames(node) { + const names = []; + function scan(n) { + if (n.type === 'import_specifier' || n.type === 'export_specifier') { + const nameNode = n.childForFieldName('name') || n.childForFieldName('alias'); + if (nameNode) names.push(nameNode.text); + else names.push(n.text); + } else if (n.type === 'identifier' && n.parent && n.parent.type === 'import_clause') { + names.push(n.text); + } else if (n.type === 'namespace_import') { + names.push(n.text); + } + for (let i = 0; i < n.childCount; i++) scan(n.child(i)); + } + scan(node); + return names; +} diff --git a/src/extractors/php.js b/src/extractors/php.js new file mode 100644 index 00000000..d27c036c --- /dev/null +++ b/src/extractors/php.js @@ -0,0 +1,237 @@ +import { findChild, nodeEndLine } from './helpers.js'; + +/** + * Extract symbols from PHP files. + */ +export function extractPHPSymbols(tree, _filePath) { + const definitions = []; + const calls = []; + const imports = []; + const classes = []; + const exports = []; + + function findPHPParentClass(node) { + let current = node.parent; + while (current) { + if ( + current.type === 'class_declaration' || + current.type === 'trait_declaration' || + current.type === 'enum_declaration' + ) { + const nameNode = current.childForFieldName('name'); + return nameNode ? nameNode.text : null; + } + current = current.parent; + } + return null; + } + + function walkPhpNode(node) { + switch (node.type) { + case 'function_definition': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + definitions.push({ + name: nameNode.text, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } + break; + } + + case 'class_declaration': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + definitions.push({ + name: nameNode.text, + kind: 'class', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + + // Check base clause (extends) + const baseClause = + node.childForFieldName('base_clause') || findChild(node, 'base_clause'); + if (baseClause) { + for (let i = 0; i < baseClause.childCount; i++) { + const child = baseClause.child(i); + if (child && (child.type === 'name' || child.type === 'qualified_name')) { + classes.push({ + name: nameNode.text, + extends: child.text, + line: node.startPosition.row + 1, + }); + break; + } + } + } + + // Check class interface clause (implements) + const interfaceClause = findChild(node, 'class_interface_clause'); + if (interfaceClause) { + for (let i = 0; i < interfaceClause.childCount; i++) { + const child = interfaceClause.child(i); + if (child && (child.type === 'name' || child.type === 'qualified_name')) { + classes.push({ + name: nameNode.text, + implements: child.text, + line: node.startPosition.row + 1, + }); + } + } + } + } + break; + } + + case 'interface_declaration': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + definitions.push({ + name: nameNode.text, + kind: 'interface', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + const body = node.childForFieldName('body'); + if (body) { + for (let i = 0; i < body.childCount; i++) { + const child = body.child(i); + if (child && child.type === 'method_declaration') { + const methName = child.childForFieldName('name'); + if (methName) { + definitions.push({ + name: `${nameNode.text}.${methName.text}`, + kind: 'method', + line: child.startPosition.row + 1, + endLine: child.endPosition.row + 1, + }); + } + } + } + } + } + break; + } + + case 'trait_declaration': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + definitions.push({ + name: nameNode.text, + kind: 'trait', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } + break; + } + + case 'enum_declaration': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + definitions.push({ + name: nameNode.text, + kind: 'enum', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } + break; + } + + case 'method_declaration': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + const parentClass = findPHPParentClass(node); + const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + definitions.push({ + name: fullName, + kind: 'method', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } + break; + } + + case 'namespace_use_declaration': { + // use App\Models\User; + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child && child.type === 'namespace_use_clause') { + const nameNode = findChild(child, 'qualified_name') || findChild(child, 'name'); + if (nameNode) { + const fullPath = nameNode.text; + const lastName = fullPath.split('\\').pop(); + const alias = child.childForFieldName('alias'); + imports.push({ + source: fullPath, + names: [alias ? alias.text : lastName], + line: node.startPosition.row + 1, + phpUse: true, + }); + } + } + // Single use clause without wrapper + if (child && (child.type === 'qualified_name' || child.type === 'name')) { + const fullPath = child.text; + const lastName = fullPath.split('\\').pop(); + imports.push({ + source: fullPath, + names: [lastName], + line: node.startPosition.row + 1, + phpUse: true, + }); + } + } + break; + } + + case 'function_call_expression': { + const fn = node.childForFieldName('function') || node.child(0); + if (fn) { + if (fn.type === 'name' || fn.type === 'identifier') { + calls.push({ name: fn.text, line: node.startPosition.row + 1 }); + } else if (fn.type === 'qualified_name') { + const parts = fn.text.split('\\'); + calls.push({ name: parts[parts.length - 1], line: node.startPosition.row + 1 }); + } + } + break; + } + + case 'member_call_expression': { + const name = node.childForFieldName('name'); + if (name) { + calls.push({ name: name.text, line: node.startPosition.row + 1 }); + } + break; + } + + case 'scoped_call_expression': { + const name = node.childForFieldName('name'); + if (name) { + calls.push({ name: name.text, line: node.startPosition.row + 1 }); + } + break; + } + + case 'object_creation_expression': { + const classNode = node.child(1); // skip 'new' keyword + if (classNode && (classNode.type === 'name' || classNode.type === 'qualified_name')) { + const parts = classNode.text.split('\\'); + calls.push({ name: parts[parts.length - 1], line: node.startPosition.row + 1 }); + } + break; + } + } + + for (let i = 0; i < node.childCount; i++) walkPhpNode(node.child(i)); + } + + walkPhpNode(tree.rootNode); + return { definitions, calls, imports, classes, exports }; +} diff --git a/src/extractors/python.js b/src/extractors/python.js new file mode 100644 index 00000000..2d0ab0d0 --- /dev/null +++ b/src/extractors/python.js @@ -0,0 +1,143 @@ +import { findChild, nodeEndLine } from './helpers.js'; + +/** + * Extract symbols from Python files. + */ +export function extractPythonSymbols(tree, _filePath) { + const definitions = []; + const calls = []; + const imports = []; + const classes = []; + const exports = []; + + function walkPythonNode(node) { + switch (node.type) { + case 'function_definition': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + const decorators = []; + if (node.previousSibling && node.previousSibling.type === 'decorator') { + decorators.push(node.previousSibling.text); + } + const parentClass = findPythonParentClass(node); + const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + const kind = parentClass ? 'method' : 'function'; + definitions.push({ + name: fullName, + kind, + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + decorators, + }); + } + break; + } + + case 'class_definition': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + definitions.push({ + name: nameNode.text, + kind: 'class', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + const superclasses = + node.childForFieldName('superclasses') || findChild(node, 'argument_list'); + if (superclasses) { + for (let i = 0; i < superclasses.childCount; i++) { + const child = superclasses.child(i); + if (child && child.type === 'identifier') { + classes.push({ + name: nameNode.text, + extends: child.text, + line: node.startPosition.row + 1, + }); + } + } + } + } + break; + } + + case 'decorated_definition': { + for (let i = 0; i < node.childCount; i++) walkPythonNode(node.child(i)); + return; + } + + case 'call': { + const fn = node.childForFieldName('function'); + if (fn) { + let callName = null; + if (fn.type === 'identifier') callName = fn.text; + else if (fn.type === 'attribute') { + const attr = fn.childForFieldName('attribute'); + if (attr) callName = attr.text; + } + if (callName) calls.push({ name: callName, line: node.startPosition.row + 1 }); + } + break; + } + + case 'import_statement': { + const names = []; + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child && (child.type === 'dotted_name' || child.type === 'aliased_import')) { + const name = + child.type === 'aliased_import' + ? (child.childForFieldName('alias') || child.childForFieldName('name'))?.text + : child.text; + if (name) names.push(name); + } + } + if (names.length > 0) + imports.push({ + source: names[0], + names, + line: node.startPosition.row + 1, + pythonImport: true, + }); + break; + } + + case 'import_from_statement': { + let source = ''; + const names = []; + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child) continue; + if (child.type === 'dotted_name' || child.type === 'relative_import') { + if (!source) source = child.text; + else names.push(child.text); + } + if (child.type === 'aliased_import') { + const n = child.childForFieldName('name') || child.child(0); + if (n) names.push(n.text); + } + if (child.type === 'wildcard_import') names.push('*'); + } + if (source) + imports.push({ source, names, line: node.startPosition.row + 1, pythonImport: true }); + break; + } + } + + for (let i = 0; i < node.childCount; i++) walkPythonNode(node.child(i)); + } + + function findPythonParentClass(node) { + let current = node.parent; + while (current) { + if (current.type === 'class_definition') { + const nameNode = current.childForFieldName('name'); + return nameNode ? nameNode.text : null; + } + current = current.parent; + } + return null; + } + + walkPythonNode(tree.rootNode); + return { definitions, calls, imports, classes, exports }; +} diff --git a/src/extractors/ruby.js b/src/extractors/ruby.js new file mode 100644 index 00000000..86b8ac5d --- /dev/null +++ b/src/extractors/ruby.js @@ -0,0 +1,185 @@ +import { findChild, nodeEndLine } from './helpers.js'; + +/** + * Extract symbols from Ruby files. + */ +export function extractRubySymbols(tree, _filePath) { + const definitions = []; + const calls = []; + const imports = []; + const classes = []; + const exports = []; + + function findRubyParentClass(node) { + let current = node.parent; + while (current) { + if (current.type === 'class') { + const nameNode = current.childForFieldName('name'); + return nameNode ? nameNode.text : null; + } + if (current.type === 'module') { + const nameNode = current.childForFieldName('name'); + return nameNode ? nameNode.text : null; + } + current = current.parent; + } + return null; + } + + function walkRubyNode(node) { + switch (node.type) { + case 'class': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + definitions.push({ + name: nameNode.text, + kind: 'class', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + const superclass = node.childForFieldName('superclass'); + if (superclass) { + // superclass wraps the < token and class name + for (let i = 0; i < superclass.childCount; i++) { + const child = superclass.child(i); + if (child && (child.type === 'constant' || child.type === 'scope_resolution')) { + classes.push({ + name: nameNode.text, + extends: child.text, + line: node.startPosition.row + 1, + }); + break; + } + } + // Direct superclass node may be a constant + if (superclass.type === 'superclass') { + for (let i = 0; i < superclass.childCount; i++) { + const child = superclass.child(i); + if (child && (child.type === 'constant' || child.type === 'scope_resolution')) { + classes.push({ + name: nameNode.text, + extends: child.text, + line: node.startPosition.row + 1, + }); + break; + } + } + } + } + } + break; + } + + case 'module': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + definitions.push({ + name: nameNode.text, + kind: 'module', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } + break; + } + + case 'method': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + const parentClass = findRubyParentClass(node); + const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + definitions.push({ + name: fullName, + kind: 'method', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } + break; + } + + case 'singleton_method': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + const parentClass = findRubyParentClass(node); + const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + definitions.push({ + name: fullName, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } + break; + } + + case 'call': { + const methodNode = node.childForFieldName('method'); + if (methodNode) { + // Check for require/require_relative + if (methodNode.text === 'require' || methodNode.text === 'require_relative') { + const args = node.childForFieldName('arguments'); + if (args) { + for (let i = 0; i < args.childCount; i++) { + const arg = args.child(i); + if (arg && (arg.type === 'string' || arg.type === 'string_content')) { + const strContent = arg.text.replace(/^['"]|['"]$/g, ''); + imports.push({ + source: strContent, + names: [strContent.split('/').pop()], + line: node.startPosition.row + 1, + rubyRequire: true, + }); + break; + } + // Look inside string for string_content + if (arg && arg.type === 'string') { + const content = findChild(arg, 'string_content'); + if (content) { + imports.push({ + source: content.text, + names: [content.text.split('/').pop()], + line: node.startPosition.row + 1, + rubyRequire: true, + }); + break; + } + } + } + } + } else if ( + methodNode.text === 'include' || + methodNode.text === 'extend' || + methodNode.text === 'prepend' + ) { + // Module inclusion — treated like implements + const parentClass = findRubyParentClass(node); + if (parentClass) { + const args = node.childForFieldName('arguments'); + if (args) { + for (let i = 0; i < args.childCount; i++) { + const arg = args.child(i); + if (arg && (arg.type === 'constant' || arg.type === 'scope_resolution')) { + classes.push({ + name: parentClass, + implements: arg.text, + line: node.startPosition.row + 1, + }); + } + } + } + } + } else { + calls.push({ name: methodNode.text, line: node.startPosition.row + 1 }); + } + } + break; + } + } + + for (let i = 0; i < node.childCount; i++) walkRubyNode(node.child(i)); + } + + walkRubyNode(tree.rootNode); + return { definitions, calls, imports, classes, exports }; +} diff --git a/src/extractors/rust.js b/src/extractors/rust.js new file mode 100644 index 00000000..043f1514 --- /dev/null +++ b/src/extractors/rust.js @@ -0,0 +1,215 @@ +import { nodeEndLine } from './helpers.js'; + +/** + * Extract symbols from Rust files. + */ +export function extractRustSymbols(tree, _filePath) { + const definitions = []; + const calls = []; + const imports = []; + const classes = []; + const exports = []; + + function findCurrentImpl(node) { + let current = node.parent; + while (current) { + if (current.type === 'impl_item') { + const typeNode = current.childForFieldName('type'); + return typeNode ? typeNode.text : null; + } + current = current.parent; + } + return null; + } + + function walkRustNode(node) { + switch (node.type) { + case 'function_item': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + const implType = findCurrentImpl(node); + const fullName = implType ? `${implType}.${nameNode.text}` : nameNode.text; + const kind = implType ? 'method' : 'function'; + definitions.push({ + name: fullName, + kind, + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } + break; + } + + case 'struct_item': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + definitions.push({ + name: nameNode.text, + kind: 'struct', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } + break; + } + + case 'enum_item': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + definitions.push({ + name: nameNode.text, + kind: 'enum', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } + break; + } + + case 'trait_item': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + definitions.push({ + name: nameNode.text, + kind: 'trait', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + const body = node.childForFieldName('body'); + if (body) { + for (let i = 0; i < body.childCount; i++) { + const child = body.child(i); + if ( + child && + (child.type === 'function_signature_item' || child.type === 'function_item') + ) { + const methName = child.childForFieldName('name'); + if (methName) { + definitions.push({ + name: `${nameNode.text}.${methName.text}`, + kind: 'method', + line: child.startPosition.row + 1, + endLine: child.endPosition.row + 1, + }); + } + } + } + } + } + break; + } + + case 'impl_item': { + const typeNode = node.childForFieldName('type'); + const traitNode = node.childForFieldName('trait'); + if (typeNode && traitNode) { + classes.push({ + name: typeNode.text, + implements: traitNode.text, + line: node.startPosition.row + 1, + }); + } + break; + } + + case 'use_declaration': { + const argNode = node.child(1); + if (argNode) { + const usePaths = extractRustUsePath(argNode); + for (const imp of usePaths) { + imports.push({ + source: imp.source, + names: imp.names, + line: node.startPosition.row + 1, + rustUse: true, + }); + } + } + break; + } + + case 'call_expression': { + const fn = node.childForFieldName('function'); + if (fn) { + if (fn.type === 'identifier') { + calls.push({ name: fn.text, line: node.startPosition.row + 1 }); + } else if (fn.type === 'field_expression') { + const field = fn.childForFieldName('field'); + if (field) calls.push({ name: field.text, line: node.startPosition.row + 1 }); + } else if (fn.type === 'scoped_identifier') { + const name = fn.childForFieldName('name'); + if (name) calls.push({ name: name.text, line: node.startPosition.row + 1 }); + } + } + break; + } + + case 'macro_invocation': { + const macroNode = node.child(0); + if (macroNode) { + calls.push({ name: `${macroNode.text}!`, line: node.startPosition.row + 1 }); + } + break; + } + } + + for (let i = 0; i < node.childCount; i++) walkRustNode(node.child(i)); + } + + walkRustNode(tree.rootNode); + return { definitions, calls, imports, classes, exports }; +} + +function extractRustUsePath(node) { + if (!node) return []; + + if (node.type === 'use_list') { + const results = []; + for (let i = 0; i < node.childCount; i++) { + results.push(...extractRustUsePath(node.child(i))); + } + return results; + } + + if (node.type === 'scoped_use_list') { + const pathNode = node.childForFieldName('path'); + const listNode = node.childForFieldName('list'); + const prefix = pathNode ? pathNode.text : ''; + if (listNode) { + const names = []; + for (let i = 0; i < listNode.childCount; i++) { + const child = listNode.child(i); + if ( + child && + (child.type === 'identifier' || child.type === 'use_as_clause' || child.type === 'self') + ) { + const name = + child.type === 'use_as_clause' + ? (child.childForFieldName('alias') || child.childForFieldName('name'))?.text + : child.text; + if (name) names.push(name); + } + } + return [{ source: prefix, names }]; + } + return [{ source: prefix, names: [] }]; + } + + if (node.type === 'use_as_clause') { + const name = node.childForFieldName('alias') || node.childForFieldName('name'); + return [{ source: node.text, names: name ? [name.text] : [] }]; + } + + if (node.type === 'use_wildcard') { + const pathNode = node.childForFieldName('path'); + return [{ source: pathNode ? pathNode.text : '*', names: ['*'] }]; + } + + if (node.type === 'scoped_identifier' || node.type === 'identifier') { + const text = node.text; + const lastName = text.split('::').pop(); + return [{ source: text, names: [lastName] }]; + } + + return []; +} diff --git a/src/index.js b/src/index.js index 7ce90860..76e30bf9 100644 --- a/src/index.js +++ b/src/index.js @@ -45,6 +45,7 @@ export { impactAnalysisData, moduleMapData, queryNameData, + statsData, } from './queries.js'; // Registry (multi-repo) export { diff --git a/src/mcp.js b/src/mcp.js index d8f5be82..02a007b1 100644 --- a/src/mcp.js +++ b/src/mcp.js @@ -422,7 +422,8 @@ export async function startMCPServer(customDbPath, options = {}) { break; } case 'list_repos': { - const { listRepos } = await import('./registry.js'); + const { listRepos, pruneRegistry } = await import('./registry.js'); + pruneRegistry(); let repos = listRepos(); if (allowedRepos) { repos = repos.filter((r) => allowedRepos.includes(r.name)); diff --git a/src/parser.js b/src/parser.js index d372d4b0..d2dd469a 100644 --- a/src/parser.js +++ b/src/parser.js @@ -3,7 +3,32 @@ import path from 'node:path'; import { fileURLToPath } from 'node:url'; import { Language, Parser } from 'web-tree-sitter'; import { warn } from './logger.js'; -import { loadNative } from './native.js'; +import { getNative, loadNative } from './native.js'; + +// Re-export all extractors for backward compatibility +export { + extractCSharpSymbols, + extractGoSymbols, + extractHCLSymbols, + extractJavaSymbols, + extractPHPSymbols, + extractPythonSymbols, + extractRubySymbols, + extractRustSymbols, + extractSymbols, +} from './extractors/index.js'; + +import { + extractCSharpSymbols, + extractGoSymbols, + extractHCLSymbols, + extractJavaSymbols, + extractPHPSymbols, + extractPythonSymbols, + extractRubySymbols, + extractRustSymbols, + extractSymbols, +} from './extractors/index.js'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); @@ -44,1894 +69,6 @@ export function getParser(parsers, filePath) { return parsers.get(entry.id) || null; } -function nodeEndLine(node) { - return node.endPosition.row + 1; -} - -/** - * Extract symbols from a JS/TS parsed AST. - */ -export function extractSymbols(tree, _filePath) { - const definitions = []; - const calls = []; - const imports = []; - const classes = []; - const exports = []; - - function walk(node) { - switch (node.type) { - case 'function_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'function', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - } - break; - } - - case 'class_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const cls = { - name: nameNode.text, - kind: 'class', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }; - definitions.push(cls); - const heritage = node.childForFieldName('heritage') || findChild(node, 'class_heritage'); - if (heritage) { - const superName = extractSuperclass(heritage); - if (superName) { - classes.push({ - name: nameNode.text, - extends: superName, - line: node.startPosition.row + 1, - }); - } - const implementsList = extractImplements(heritage); - for (const iface of implementsList) { - classes.push({ - name: nameNode.text, - implements: iface, - line: node.startPosition.row + 1, - }); - } - } - } - break; - } - - case 'method_definition': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const parentClass = findParentClass(node); - const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; - definitions.push({ - name: fullName, - kind: 'method', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - } - break; - } - - case 'interface_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'interface', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - const body = - node.childForFieldName('body') || - findChild(node, 'interface_body') || - findChild(node, 'object_type'); - if (body) { - extractInterfaceMethods(body, nameNode.text, definitions); - } - } - break; - } - - case 'type_alias_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'type', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - } - break; - } - - case 'lexical_declaration': - case 'variable_declaration': { - for (let i = 0; i < node.childCount; i++) { - const declarator = node.child(i); - if (declarator && declarator.type === 'variable_declarator') { - const nameN = declarator.childForFieldName('name'); - const valueN = declarator.childForFieldName('value'); - if ( - nameN && - valueN && - (valueN.type === 'arrow_function' || - valueN.type === 'function_expression' || - valueN.type === 'function') - ) { - definitions.push({ - name: nameN.text, - kind: 'function', - line: node.startPosition.row + 1, - endLine: nodeEndLine(valueN), - }); - } - } - } - break; - } - - case 'call_expression': { - const fn = node.childForFieldName('function'); - if (fn) { - const callInfo = extractCallInfo(fn, node); - if (callInfo) { - calls.push(callInfo); - } - } - break; - } - - case 'import_statement': { - const isTypeOnly = node.text.startsWith('import type'); - const source = node.childForFieldName('source') || findChild(node, 'string'); - if (source) { - const modPath = source.text.replace(/['"]/g, ''); - const names = extractImportNames(node); - imports.push({ - source: modPath, - names, - line: node.startPosition.row + 1, - typeOnly: isTypeOnly, - }); - } - break; - } - - case 'export_statement': { - const decl = node.childForFieldName('declaration'); - if (decl) { - if (decl.type === 'function_declaration') { - const n = decl.childForFieldName('name'); - if (n) - exports.push({ name: n.text, kind: 'function', line: node.startPosition.row + 1 }); - } else if (decl.type === 'class_declaration') { - const n = decl.childForFieldName('name'); - if (n) exports.push({ name: n.text, kind: 'class', line: node.startPosition.row + 1 }); - } else if (decl.type === 'interface_declaration') { - const n = decl.childForFieldName('name'); - if (n) - exports.push({ name: n.text, kind: 'interface', line: node.startPosition.row + 1 }); - } else if (decl.type === 'type_alias_declaration') { - const n = decl.childForFieldName('name'); - if (n) exports.push({ name: n.text, kind: 'type', line: node.startPosition.row + 1 }); - } - } - const source = node.childForFieldName('source') || findChild(node, 'string'); - if (source && !decl) { - const modPath = source.text.replace(/['"]/g, ''); - const reexportNames = extractImportNames(node); - const isWildcard = node.text.includes('export *') || node.text.includes('export*'); - imports.push({ - source: modPath, - names: reexportNames, - line: node.startPosition.row + 1, - reexport: true, - wildcardReexport: isWildcard && reexportNames.length === 0, - }); - } - break; - } - - case 'expression_statement': { - const expr = node.child(0); - if (expr && expr.type === 'assignment_expression') { - const left = expr.childForFieldName('left'); - const right = expr.childForFieldName('right'); - if (left && right) { - const leftText = left.text; - if (leftText.startsWith('module.exports') || leftText === 'exports') { - if (right.type === 'call_expression') { - const fn = right.childForFieldName('function'); - const args = right.childForFieldName('arguments') || findChild(right, 'arguments'); - if (fn && fn.text === 'require' && args) { - const strArg = findChild(args, 'string'); - if (strArg) { - const modPath = strArg.text.replace(/['"]/g, ''); - imports.push({ - source: modPath, - names: [], - line: node.startPosition.row + 1, - reexport: true, - wildcardReexport: true, - }); - } - } - } - if (right.type === 'object') { - for (let ci = 0; ci < right.childCount; ci++) { - const child = right.child(ci); - if (child && child.type === 'spread_element') { - const spreadExpr = child.child(1) || child.childForFieldName('value'); - if (spreadExpr && spreadExpr.type === 'call_expression') { - const fn2 = spreadExpr.childForFieldName('function'); - const args2 = - spreadExpr.childForFieldName('arguments') || - findChild(spreadExpr, 'arguments'); - if (fn2 && fn2.text === 'require' && args2) { - const strArg2 = findChild(args2, 'string'); - if (strArg2) { - const modPath2 = strArg2.text.replace(/['"]/g, ''); - imports.push({ - source: modPath2, - names: [], - line: node.startPosition.row + 1, - reexport: true, - wildcardReexport: true, - }); - } - } - } - } - } - } - } - } - } - break; - } - } - - for (let i = 0; i < node.childCount; i++) { - walk(node.child(i)); - } - } - - walk(tree.rootNode); - return { definitions, calls, imports, classes, exports }; -} - -function extractInterfaceMethods(bodyNode, interfaceName, definitions) { - for (let i = 0; i < bodyNode.childCount; i++) { - const child = bodyNode.child(i); - if (!child) continue; - if (child.type === 'method_signature' || child.type === 'property_signature') { - const nameNode = child.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: `${interfaceName}.${nameNode.text}`, - kind: 'method', - line: child.startPosition.row + 1, - endLine: child.endPosition.row + 1, - }); - } - } - } -} - -function extractImplements(heritage) { - const interfaces = []; - for (let i = 0; i < heritage.childCount; i++) { - const child = heritage.child(i); - if (!child) continue; - if (child.text === 'implements') { - for (let j = i + 1; j < heritage.childCount; j++) { - const next = heritage.child(j); - if (!next) continue; - if (next.type === 'identifier') interfaces.push(next.text); - else if (next.type === 'type_identifier') interfaces.push(next.text); - if (next.childCount > 0) interfaces.push(...extractImplementsFromNode(next)); - } - break; - } - if (child.type === 'implements_clause') { - interfaces.push(...extractImplementsFromNode(child)); - } - } - return interfaces; -} - -function extractImplementsFromNode(node) { - const result = []; - for (let i = 0; i < node.childCount; i++) { - const child = node.child(i); - if (!child) continue; - if (child.type === 'identifier' || child.type === 'type_identifier') result.push(child.text); - if (child.childCount > 0) result.push(...extractImplementsFromNode(child)); - } - return result; -} - -function extractCallInfo(fn, callNode) { - if (fn.type === 'identifier') { - return { name: fn.text, line: callNode.startPosition.row + 1 }; - } - - if (fn.type === 'member_expression') { - const obj = fn.childForFieldName('object'); - const prop = fn.childForFieldName('property'); - if (!prop) return null; - - if (prop.text === 'call' || prop.text === 'apply' || prop.text === 'bind') { - if (obj && obj.type === 'identifier') - return { name: obj.text, line: callNode.startPosition.row + 1, dynamic: true }; - if (obj && obj.type === 'member_expression') { - const innerProp = obj.childForFieldName('property'); - if (innerProp) - return { name: innerProp.text, line: callNode.startPosition.row + 1, dynamic: true }; - } - } - - if (prop.type === 'string' || prop.type === 'string_fragment') { - const methodName = prop.text.replace(/['"]/g, ''); - if (methodName) - return { name: methodName, line: callNode.startPosition.row + 1, dynamic: true }; - } - - return { name: prop.text, line: callNode.startPosition.row + 1 }; - } - - if (fn.type === 'subscript_expression') { - const index = fn.childForFieldName('index'); - if (index && (index.type === 'string' || index.type === 'template_string')) { - const methodName = index.text.replace(/['"`]/g, ''); - if (methodName && !methodName.includes('$')) - return { name: methodName, line: callNode.startPosition.row + 1, dynamic: true }; - } - } - - return null; -} - -function findChild(node, type) { - for (let i = 0; i < node.childCount; i++) { - if (node.child(i).type === type) return node.child(i); - } - return null; -} - -function extractSuperclass(heritage) { - for (let i = 0; i < heritage.childCount; i++) { - const child = heritage.child(i); - if (child.type === 'identifier') return child.text; - if (child.type === 'member_expression') return child.text; - const found = extractSuperclass(child); - if (found) return found; - } - return null; -} - -function findParentClass(node) { - let current = node.parent; - while (current) { - if (current.type === 'class_declaration' || current.type === 'class') { - const nameNode = current.childForFieldName('name'); - return nameNode ? nameNode.text : null; - } - current = current.parent; - } - return null; -} - -function extractImportNames(node) { - const names = []; - function scan(n) { - if (n.type === 'import_specifier' || n.type === 'export_specifier') { - const nameNode = n.childForFieldName('name') || n.childForFieldName('alias'); - if (nameNode) names.push(nameNode.text); - else names.push(n.text); - } else if (n.type === 'identifier' && n.parent && n.parent.type === 'import_clause') { - names.push(n.text); - } else if (n.type === 'namespace_import') { - names.push(n.text); - } - for (let i = 0; i < n.childCount; i++) scan(n.child(i)); - } - scan(node); - return names; -} - -/** - * Extract symbols from HCL (Terraform) files. - */ -export function extractHCLSymbols(tree, _filePath) { - const definitions = []; - const imports = []; - - function walk(node) { - if (node.type === 'block') { - const children = []; - for (let i = 0; i < node.childCount; i++) children.push(node.child(i)); - - const identifiers = children.filter((c) => c.type === 'identifier'); - const strings = children.filter((c) => c.type === 'string_lit'); - - if (identifiers.length > 0) { - const blockType = identifiers[0].text; - let name = ''; - - if (blockType === 'resource' && strings.length >= 2) { - name = `${strings[0].text.replace(/"/g, '')}.${strings[1].text.replace(/"/g, '')}`; - } else if (blockType === 'data' && strings.length >= 2) { - name = `data.${strings[0].text.replace(/"/g, '')}.${strings[1].text.replace(/"/g, '')}`; - } else if ( - (blockType === 'variable' || blockType === 'output' || blockType === 'module') && - strings.length >= 1 - ) { - name = `${blockType}.${strings[0].text.replace(/"/g, '')}`; - } else if (blockType === 'locals') { - name = 'locals'; - } else if (blockType === 'terraform' || blockType === 'provider') { - name = blockType; - if (strings.length >= 1) name += `.${strings[0].text.replace(/"/g, '')}`; - } - - if (name) { - definitions.push({ - name, - kind: blockType, - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - } - - if (blockType === 'module') { - const body = children.find((c) => c.type === 'body'); - if (body) { - for (let i = 0; i < body.childCount; i++) { - const attr = body.child(i); - if (attr && attr.type === 'attribute') { - const key = attr.childForFieldName('key') || attr.child(0); - const val = attr.childForFieldName('val') || attr.child(2); - if (key && key.text === 'source' && val) { - const src = val.text.replace(/"/g, ''); - if (src.startsWith('./') || src.startsWith('../')) { - imports.push({ source: src, names: [], line: attr.startPosition.row + 1 }); - } - } - } - } - } - } - } - } - - for (let i = 0; i < node.childCount; i++) walk(node.child(i)); - } - - walk(tree.rootNode); - return { definitions, calls: [], imports, classes: [], exports: [] }; -} - -/** - * Extract symbols from Python files. - */ -export function extractPythonSymbols(tree, _filePath) { - const definitions = []; - const calls = []; - const imports = []; - const classes = []; - const exports = []; - - function walk(node) { - switch (node.type) { - case 'function_definition': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const decorators = []; - if (node.previousSibling && node.previousSibling.type === 'decorator') { - decorators.push(node.previousSibling.text); - } - const parentClass = findPythonParentClass(node); - const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; - const kind = parentClass ? 'method' : 'function'; - definitions.push({ - name: fullName, - kind, - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - decorators, - }); - } - break; - } - - case 'class_definition': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'class', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - const superclasses = - node.childForFieldName('superclasses') || findChild(node, 'argument_list'); - if (superclasses) { - for (let i = 0; i < superclasses.childCount; i++) { - const child = superclasses.child(i); - if (child && child.type === 'identifier') { - classes.push({ - name: nameNode.text, - extends: child.text, - line: node.startPosition.row + 1, - }); - } - } - } - } - break; - } - - case 'decorated_definition': { - for (let i = 0; i < node.childCount; i++) walk(node.child(i)); - return; - } - - case 'call': { - const fn = node.childForFieldName('function'); - if (fn) { - let callName = null; - if (fn.type === 'identifier') callName = fn.text; - else if (fn.type === 'attribute') { - const attr = fn.childForFieldName('attribute'); - if (attr) callName = attr.text; - } - if (callName) calls.push({ name: callName, line: node.startPosition.row + 1 }); - } - break; - } - - case 'import_statement': { - const names = []; - for (let i = 0; i < node.childCount; i++) { - const child = node.child(i); - if (child && (child.type === 'dotted_name' || child.type === 'aliased_import')) { - const name = - child.type === 'aliased_import' - ? (child.childForFieldName('alias') || child.childForFieldName('name'))?.text - : child.text; - if (name) names.push(name); - } - } - if (names.length > 0) - imports.push({ - source: names[0], - names, - line: node.startPosition.row + 1, - pythonImport: true, - }); - break; - } - - case 'import_from_statement': { - let source = ''; - const names = []; - for (let i = 0; i < node.childCount; i++) { - const child = node.child(i); - if (!child) continue; - if (child.type === 'dotted_name' || child.type === 'relative_import') { - if (!source) source = child.text; - else names.push(child.text); - } - if (child.type === 'aliased_import') { - const n = child.childForFieldName('name') || child.child(0); - if (n) names.push(n.text); - } - if (child.type === 'wildcard_import') names.push('*'); - } - if (source) - imports.push({ source, names, line: node.startPosition.row + 1, pythonImport: true }); - break; - } - } - - for (let i = 0; i < node.childCount; i++) walk(node.child(i)); - } - - function findPythonParentClass(node) { - let current = node.parent; - while (current) { - if (current.type === 'class_definition') { - const nameNode = current.childForFieldName('name'); - return nameNode ? nameNode.text : null; - } - current = current.parent; - } - return null; - } - - walk(tree.rootNode); - return { definitions, calls, imports, classes, exports }; -} - -/** - * Extract symbols from Go files. - */ -export function extractGoSymbols(tree, _filePath) { - const definitions = []; - const calls = []; - const imports = []; - const classes = []; - const exports = []; - - function walk(node) { - switch (node.type) { - case 'function_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'function', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - } - break; - } - - case 'method_declaration': { - const nameNode = node.childForFieldName('name'); - const receiver = node.childForFieldName('receiver'); - if (nameNode) { - let receiverType = null; - if (receiver) { - // receiver is a parameter_list like (r *Foo) or (r Foo) - for (let i = 0; i < receiver.childCount; i++) { - const param = receiver.child(i); - if (!param) continue; - const typeNode = param.childForFieldName('type'); - if (typeNode) { - receiverType = - typeNode.type === 'pointer_type' - ? typeNode.text.replace(/^\*/, '') - : typeNode.text; - break; - } - } - } - const fullName = receiverType ? `${receiverType}.${nameNode.text}` : nameNode.text; - definitions.push({ - name: fullName, - kind: 'method', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - } - break; - } - - case 'type_declaration': { - for (let i = 0; i < node.childCount; i++) { - const spec = node.child(i); - if (!spec || spec.type !== 'type_spec') continue; - const nameNode = spec.childForFieldName('name'); - const typeNode = spec.childForFieldName('type'); - if (nameNode && typeNode) { - if (typeNode.type === 'struct_type') { - definitions.push({ - name: nameNode.text, - kind: 'struct', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - } else if (typeNode.type === 'interface_type') { - definitions.push({ - name: nameNode.text, - kind: 'interface', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - for (let j = 0; j < typeNode.childCount; j++) { - const member = typeNode.child(j); - if (member && member.type === 'method_elem') { - const methName = member.childForFieldName('name'); - if (methName) { - definitions.push({ - name: `${nameNode.text}.${methName.text}`, - kind: 'method', - line: member.startPosition.row + 1, - endLine: member.endPosition.row + 1, - }); - } - } - } - } else { - definitions.push({ - name: nameNode.text, - kind: 'type', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - } - } - } - break; - } - - case 'import_declaration': { - for (let i = 0; i < node.childCount; i++) { - const child = node.child(i); - if (!child) continue; - if (child.type === 'import_spec') { - const pathNode = child.childForFieldName('path'); - if (pathNode) { - const importPath = pathNode.text.replace(/"/g, ''); - const nameNode = child.childForFieldName('name'); - const alias = nameNode ? nameNode.text : importPath.split('/').pop(); - imports.push({ - source: importPath, - names: [alias], - line: child.startPosition.row + 1, - goImport: true, - }); - } - } - if (child.type === 'import_spec_list') { - for (let j = 0; j < child.childCount; j++) { - const spec = child.child(j); - if (spec && spec.type === 'import_spec') { - const pathNode = spec.childForFieldName('path'); - if (pathNode) { - const importPath = pathNode.text.replace(/"/g, ''); - const nameNode = spec.childForFieldName('name'); - const alias = nameNode ? nameNode.text : importPath.split('/').pop(); - imports.push({ - source: importPath, - names: [alias], - line: spec.startPosition.row + 1, - goImport: true, - }); - } - } - } - } - } - break; - } - - case 'call_expression': { - const fn = node.childForFieldName('function'); - if (fn) { - if (fn.type === 'identifier') { - calls.push({ name: fn.text, line: node.startPosition.row + 1 }); - } else if (fn.type === 'selector_expression') { - const field = fn.childForFieldName('field'); - if (field) calls.push({ name: field.text, line: node.startPosition.row + 1 }); - } - } - break; - } - } - - for (let i = 0; i < node.childCount; i++) walk(node.child(i)); - } - - walk(tree.rootNode); - return { definitions, calls, imports, classes, exports }; -} - -/** - * Extract symbols from Rust files. - */ -export function extractRustSymbols(tree, _filePath) { - const definitions = []; - const calls = []; - const imports = []; - const classes = []; - const exports = []; - - function findCurrentImpl(node) { - let current = node.parent; - while (current) { - if (current.type === 'impl_item') { - const typeNode = current.childForFieldName('type'); - return typeNode ? typeNode.text : null; - } - current = current.parent; - } - return null; - } - - function walk(node) { - switch (node.type) { - case 'function_item': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const implType = findCurrentImpl(node); - const fullName = implType ? `${implType}.${nameNode.text}` : nameNode.text; - const kind = implType ? 'method' : 'function'; - definitions.push({ - name: fullName, - kind, - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - } - break; - } - - case 'struct_item': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'struct', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - } - break; - } - - case 'enum_item': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'enum', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - } - break; - } - - case 'trait_item': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'trait', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - const body = node.childForFieldName('body'); - if (body) { - for (let i = 0; i < body.childCount; i++) { - const child = body.child(i); - if ( - child && - (child.type === 'function_signature_item' || child.type === 'function_item') - ) { - const methName = child.childForFieldName('name'); - if (methName) { - definitions.push({ - name: `${nameNode.text}.${methName.text}`, - kind: 'method', - line: child.startPosition.row + 1, - endLine: child.endPosition.row + 1, - }); - } - } - } - } - } - break; - } - - case 'impl_item': { - const typeNode = node.childForFieldName('type'); - const traitNode = node.childForFieldName('trait'); - if (typeNode && traitNode) { - classes.push({ - name: typeNode.text, - implements: traitNode.text, - line: node.startPosition.row + 1, - }); - } - break; - } - - case 'use_declaration': { - const argNode = node.child(1); - if (argNode) { - const usePaths = extractRustUsePath(argNode); - for (const imp of usePaths) { - imports.push({ - source: imp.source, - names: imp.names, - line: node.startPosition.row + 1, - rustUse: true, - }); - } - } - break; - } - - case 'call_expression': { - const fn = node.childForFieldName('function'); - if (fn) { - if (fn.type === 'identifier') { - calls.push({ name: fn.text, line: node.startPosition.row + 1 }); - } else if (fn.type === 'field_expression') { - const field = fn.childForFieldName('field'); - if (field) calls.push({ name: field.text, line: node.startPosition.row + 1 }); - } else if (fn.type === 'scoped_identifier') { - const name = fn.childForFieldName('name'); - if (name) calls.push({ name: name.text, line: node.startPosition.row + 1 }); - } - } - break; - } - - case 'macro_invocation': { - const macroNode = node.child(0); - if (macroNode) { - calls.push({ name: `${macroNode.text}!`, line: node.startPosition.row + 1 }); - } - break; - } - } - - for (let i = 0; i < node.childCount; i++) walk(node.child(i)); - } - - walk(tree.rootNode); - return { definitions, calls, imports, classes, exports }; -} - -function extractRustUsePath(node) { - if (!node) return []; - - if (node.type === 'use_list') { - const results = []; - for (let i = 0; i < node.childCount; i++) { - results.push(...extractRustUsePath(node.child(i))); - } - return results; - } - - if (node.type === 'scoped_use_list') { - const pathNode = node.childForFieldName('path'); - const listNode = node.childForFieldName('list'); - const prefix = pathNode ? pathNode.text : ''; - if (listNode) { - const names = []; - for (let i = 0; i < listNode.childCount; i++) { - const child = listNode.child(i); - if ( - child && - (child.type === 'identifier' || child.type === 'use_as_clause' || child.type === 'self') - ) { - const name = - child.type === 'use_as_clause' - ? (child.childForFieldName('alias') || child.childForFieldName('name'))?.text - : child.text; - if (name) names.push(name); - } - } - return [{ source: prefix, names }]; - } - return [{ source: prefix, names: [] }]; - } - - if (node.type === 'use_as_clause') { - const name = node.childForFieldName('alias') || node.childForFieldName('name'); - return [{ source: node.text, names: name ? [name.text] : [] }]; - } - - if (node.type === 'use_wildcard') { - const pathNode = node.childForFieldName('path'); - return [{ source: pathNode ? pathNode.text : '*', names: ['*'] }]; - } - - if (node.type === 'scoped_identifier' || node.type === 'identifier') { - const text = node.text; - const lastName = text.split('::').pop(); - return [{ source: text, names: [lastName] }]; - } - - return []; -} - -/** - * Extract symbols from Java files. - */ -export function extractJavaSymbols(tree, _filePath) { - const definitions = []; - const calls = []; - const imports = []; - const classes = []; - const exports = []; - - function findJavaParentClass(node) { - let current = node.parent; - while (current) { - if ( - current.type === 'class_declaration' || - current.type === 'enum_declaration' || - current.type === 'interface_declaration' - ) { - const nameNode = current.childForFieldName('name'); - return nameNode ? nameNode.text : null; - } - current = current.parent; - } - return null; - } - - function walk(node) { - switch (node.type) { - case 'class_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'class', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - - const superclass = node.childForFieldName('superclass'); - if (superclass) { - for (let i = 0; i < superclass.childCount; i++) { - const child = superclass.child(i); - if ( - child && - (child.type === 'type_identifier' || - child.type === 'identifier' || - child.type === 'generic_type') - ) { - const superName = child.type === 'generic_type' ? child.child(0)?.text : child.text; - if (superName) - classes.push({ - name: nameNode.text, - extends: superName, - line: node.startPosition.row + 1, - }); - break; - } - } - } - - const interfaces = node.childForFieldName('interfaces'); - if (interfaces) { - for (let i = 0; i < interfaces.childCount; i++) { - const child = interfaces.child(i); - if ( - child && - (child.type === 'type_identifier' || - child.type === 'identifier' || - child.type === 'type_list' || - child.type === 'generic_type') - ) { - if (child.type === 'type_list') { - for (let j = 0; j < child.childCount; j++) { - const t = child.child(j); - if ( - t && - (t.type === 'type_identifier' || - t.type === 'identifier' || - t.type === 'generic_type') - ) { - const ifaceName = t.type === 'generic_type' ? t.child(0)?.text : t.text; - if (ifaceName) - classes.push({ - name: nameNode.text, - implements: ifaceName, - line: node.startPosition.row + 1, - }); - } - } - } else { - const ifaceName = - child.type === 'generic_type' ? child.child(0)?.text : child.text; - if (ifaceName) - classes.push({ - name: nameNode.text, - implements: ifaceName, - line: node.startPosition.row + 1, - }); - } - } - } - } - } - break; - } - - case 'interface_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'interface', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - const body = node.childForFieldName('body'); - if (body) { - for (let i = 0; i < body.childCount; i++) { - const child = body.child(i); - if (child && child.type === 'method_declaration') { - const methName = child.childForFieldName('name'); - if (methName) { - definitions.push({ - name: `${nameNode.text}.${methName.text}`, - kind: 'method', - line: child.startPosition.row + 1, - endLine: child.endPosition.row + 1, - }); - } - } - } - } - } - break; - } - - case 'enum_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'enum', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - } - break; - } - - case 'method_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const parentClass = findJavaParentClass(node); - const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; - definitions.push({ - name: fullName, - kind: 'method', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - } - break; - } - - case 'constructor_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const parentClass = findJavaParentClass(node); - const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; - definitions.push({ - name: fullName, - kind: 'method', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - } - break; - } - - case 'import_declaration': { - for (let i = 0; i < node.childCount; i++) { - const child = node.child(i); - if (child && (child.type === 'scoped_identifier' || child.type === 'identifier')) { - const fullPath = child.text; - const lastName = fullPath.split('.').pop(); - imports.push({ - source: fullPath, - names: [lastName], - line: node.startPosition.row + 1, - javaImport: true, - }); - } - if (child && child.type === 'asterisk') { - const lastImport = imports[imports.length - 1]; - if (lastImport) lastImport.names = ['*']; - } - } - break; - } - - case 'method_invocation': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - calls.push({ name: nameNode.text, line: node.startPosition.row + 1 }); - } - break; - } - - case 'object_creation_expression': { - const typeNode = node.childForFieldName('type'); - if (typeNode) { - const typeName = - typeNode.type === 'generic_type' ? typeNode.child(0)?.text : typeNode.text; - if (typeName) calls.push({ name: typeName, line: node.startPosition.row + 1 }); - } - break; - } - } - - for (let i = 0; i < node.childCount; i++) walk(node.child(i)); - } - - walk(tree.rootNode); - return { definitions, calls, imports, classes, exports }; -} - -/** - * Extract symbols from C# files. - */ -export function extractCSharpSymbols(tree, _filePath) { - const definitions = []; - const calls = []; - const imports = []; - const classes = []; - const exports = []; - - function findCSharpParentType(node) { - let current = node.parent; - while (current) { - if ( - current.type === 'class_declaration' || - current.type === 'struct_declaration' || - current.type === 'interface_declaration' || - current.type === 'enum_declaration' || - current.type === 'record_declaration' - ) { - const nameNode = current.childForFieldName('name'); - return nameNode ? nameNode.text : null; - } - current = current.parent; - } - return null; - } - - function walk(node) { - switch (node.type) { - case 'class_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'class', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - extractCSharpBaseTypes(node, nameNode.text, classes); - } - break; - } - - case 'struct_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'struct', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - extractCSharpBaseTypes(node, nameNode.text, classes); - } - break; - } - - case 'record_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'record', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - extractCSharpBaseTypes(node, nameNode.text, classes); - } - break; - } - - case 'interface_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'interface', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - const body = node.childForFieldName('body'); - if (body) { - for (let i = 0; i < body.childCount; i++) { - const child = body.child(i); - if (child && child.type === 'method_declaration') { - const methName = child.childForFieldName('name'); - if (methName) { - definitions.push({ - name: `${nameNode.text}.${methName.text}`, - kind: 'method', - line: child.startPosition.row + 1, - endLine: child.endPosition.row + 1, - }); - } - } - } - } - } - break; - } - - case 'enum_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'enum', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - } - break; - } - - case 'method_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const parentType = findCSharpParentType(node); - const fullName = parentType ? `${parentType}.${nameNode.text}` : nameNode.text; - definitions.push({ - name: fullName, - kind: 'method', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - } - break; - } - - case 'constructor_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const parentType = findCSharpParentType(node); - const fullName = parentType ? `${parentType}.${nameNode.text}` : nameNode.text; - definitions.push({ - name: fullName, - kind: 'method', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - } - break; - } - - case 'property_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const parentType = findCSharpParentType(node); - const fullName = parentType ? `${parentType}.${nameNode.text}` : nameNode.text; - definitions.push({ - name: fullName, - kind: 'method', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - } - break; - } - - case 'using_directive': { - // using System.Collections.Generic; - const nameNode = - node.childForFieldName('name') || - findChild(node, 'qualified_name') || - findChild(node, 'identifier'); - if (nameNode) { - const fullPath = nameNode.text; - const lastName = fullPath.split('.').pop(); - imports.push({ - source: fullPath, - names: [lastName], - line: node.startPosition.row + 1, - csharpUsing: true, - }); - } - break; - } - - case 'invocation_expression': { - const fn = node.childForFieldName('function') || node.child(0); - if (fn) { - if (fn.type === 'identifier') { - calls.push({ name: fn.text, line: node.startPosition.row + 1 }); - } else if (fn.type === 'member_access_expression') { - const name = fn.childForFieldName('name'); - if (name) calls.push({ name: name.text, line: node.startPosition.row + 1 }); - } else if (fn.type === 'generic_name' || fn.type === 'member_binding_expression') { - const name = fn.childForFieldName('name') || fn.child(0); - if (name) calls.push({ name: name.text, line: node.startPosition.row + 1 }); - } - } - break; - } - - case 'object_creation_expression': { - const typeNode = node.childForFieldName('type'); - if (typeNode) { - const typeName = - typeNode.type === 'generic_name' - ? typeNode.childForFieldName('name')?.text || typeNode.child(0)?.text - : typeNode.text; - if (typeName) calls.push({ name: typeName, line: node.startPosition.row + 1 }); - } - break; - } - } - - for (let i = 0; i < node.childCount; i++) walk(node.child(i)); - } - - walk(tree.rootNode); - return { definitions, calls, imports, classes, exports }; -} - -function extractCSharpBaseTypes(node, className, classes) { - const baseList = node.childForFieldName('bases'); - if (!baseList) return; - for (let i = 0; i < baseList.childCount; i++) { - const child = baseList.child(i); - if (!child) continue; - if (child.type === 'identifier' || child.type === 'qualified_name') { - classes.push({ name: className, extends: child.text, line: node.startPosition.row + 1 }); - } else if (child.type === 'generic_name') { - const name = child.childForFieldName('name') || child.child(0); - if (name) - classes.push({ name: className, extends: name.text, line: node.startPosition.row + 1 }); - } else if (child.type === 'base_list') { - for (let j = 0; j < child.childCount; j++) { - const base = child.child(j); - if (base && (base.type === 'identifier' || base.type === 'qualified_name')) { - classes.push({ name: className, extends: base.text, line: node.startPosition.row + 1 }); - } else if (base && base.type === 'generic_name') { - const name = base.childForFieldName('name') || base.child(0); - if (name) - classes.push({ name: className, extends: name.text, line: node.startPosition.row + 1 }); - } - } - } - } -} - -/** - * Extract symbols from Ruby files. - */ -export function extractRubySymbols(tree, _filePath) { - const definitions = []; - const calls = []; - const imports = []; - const classes = []; - const exports = []; - - function findRubyParentClass(node) { - let current = node.parent; - while (current) { - if (current.type === 'class') { - const nameNode = current.childForFieldName('name'); - return nameNode ? nameNode.text : null; - } - if (current.type === 'module') { - const nameNode = current.childForFieldName('name'); - return nameNode ? nameNode.text : null; - } - current = current.parent; - } - return null; - } - - function walk(node) { - switch (node.type) { - case 'class': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'class', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - const superclass = node.childForFieldName('superclass'); - if (superclass) { - // superclass wraps the < token and class name - for (let i = 0; i < superclass.childCount; i++) { - const child = superclass.child(i); - if (child && (child.type === 'constant' || child.type === 'scope_resolution')) { - classes.push({ - name: nameNode.text, - extends: child.text, - line: node.startPosition.row + 1, - }); - break; - } - } - // Direct superclass node may be a constant - if (superclass.type === 'superclass') { - for (let i = 0; i < superclass.childCount; i++) { - const child = superclass.child(i); - if (child && (child.type === 'constant' || child.type === 'scope_resolution')) { - classes.push({ - name: nameNode.text, - extends: child.text, - line: node.startPosition.row + 1, - }); - break; - } - } - } - } - } - break; - } - - case 'module': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'module', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - } - break; - } - - case 'method': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const parentClass = findRubyParentClass(node); - const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; - definitions.push({ - name: fullName, - kind: 'method', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - } - break; - } - - case 'singleton_method': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const parentClass = findRubyParentClass(node); - const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; - definitions.push({ - name: fullName, - kind: 'function', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - } - break; - } - - case 'call': { - const methodNode = node.childForFieldName('method'); - if (methodNode) { - // Check for require/require_relative - if (methodNode.text === 'require' || methodNode.text === 'require_relative') { - const args = node.childForFieldName('arguments'); - if (args) { - for (let i = 0; i < args.childCount; i++) { - const arg = args.child(i); - if (arg && (arg.type === 'string' || arg.type === 'string_content')) { - const strContent = arg.text.replace(/^['"]|['"]$/g, ''); - imports.push({ - source: strContent, - names: [strContent.split('/').pop()], - line: node.startPosition.row + 1, - rubyRequire: true, - }); - break; - } - // Look inside string for string_content - if (arg && arg.type === 'string') { - const content = findChild(arg, 'string_content'); - if (content) { - imports.push({ - source: content.text, - names: [content.text.split('/').pop()], - line: node.startPosition.row + 1, - rubyRequire: true, - }); - break; - } - } - } - } - } else if ( - methodNode.text === 'include' || - methodNode.text === 'extend' || - methodNode.text === 'prepend' - ) { - // Module inclusion — treated like implements - const parentClass = findRubyParentClass(node); - if (parentClass) { - const args = node.childForFieldName('arguments'); - if (args) { - for (let i = 0; i < args.childCount; i++) { - const arg = args.child(i); - if (arg && (arg.type === 'constant' || arg.type === 'scope_resolution')) { - classes.push({ - name: parentClass, - implements: arg.text, - line: node.startPosition.row + 1, - }); - } - } - } - } - } else { - calls.push({ name: methodNode.text, line: node.startPosition.row + 1 }); - } - } - break; - } - } - - for (let i = 0; i < node.childCount; i++) walk(node.child(i)); - } - - walk(tree.rootNode); - return { definitions, calls, imports, classes, exports }; -} - -/** - * Extract symbols from PHP files. - */ -export function extractPHPSymbols(tree, _filePath) { - const definitions = []; - const calls = []; - const imports = []; - const classes = []; - const exports = []; - - function findPHPParentClass(node) { - let current = node.parent; - while (current) { - if ( - current.type === 'class_declaration' || - current.type === 'trait_declaration' || - current.type === 'enum_declaration' - ) { - const nameNode = current.childForFieldName('name'); - return nameNode ? nameNode.text : null; - } - current = current.parent; - } - return null; - } - - function walk(node) { - switch (node.type) { - case 'function_definition': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'function', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - } - break; - } - - case 'class_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'class', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - - // Check base clause (extends) - const baseClause = - node.childForFieldName('base_clause') || findChild(node, 'base_clause'); - if (baseClause) { - for (let i = 0; i < baseClause.childCount; i++) { - const child = baseClause.child(i); - if (child && (child.type === 'name' || child.type === 'qualified_name')) { - classes.push({ - name: nameNode.text, - extends: child.text, - line: node.startPosition.row + 1, - }); - break; - } - } - } - - // Check class interface clause (implements) - const interfaceClause = findChild(node, 'class_interface_clause'); - if (interfaceClause) { - for (let i = 0; i < interfaceClause.childCount; i++) { - const child = interfaceClause.child(i); - if (child && (child.type === 'name' || child.type === 'qualified_name')) { - classes.push({ - name: nameNode.text, - implements: child.text, - line: node.startPosition.row + 1, - }); - } - } - } - } - break; - } - - case 'interface_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'interface', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - const body = node.childForFieldName('body'); - if (body) { - for (let i = 0; i < body.childCount; i++) { - const child = body.child(i); - if (child && child.type === 'method_declaration') { - const methName = child.childForFieldName('name'); - if (methName) { - definitions.push({ - name: `${nameNode.text}.${methName.text}`, - kind: 'method', - line: child.startPosition.row + 1, - endLine: child.endPosition.row + 1, - }); - } - } - } - } - } - break; - } - - case 'trait_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'trait', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - } - break; - } - - case 'enum_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'enum', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - } - break; - } - - case 'method_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const parentClass = findPHPParentClass(node); - const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; - definitions.push({ - name: fullName, - kind: 'method', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - } - break; - } - - case 'namespace_use_declaration': { - // use App\Models\User; - for (let i = 0; i < node.childCount; i++) { - const child = node.child(i); - if (child && child.type === 'namespace_use_clause') { - const nameNode = findChild(child, 'qualified_name') || findChild(child, 'name'); - if (nameNode) { - const fullPath = nameNode.text; - const lastName = fullPath.split('\\').pop(); - const alias = child.childForFieldName('alias'); - imports.push({ - source: fullPath, - names: [alias ? alias.text : lastName], - line: node.startPosition.row + 1, - phpUse: true, - }); - } - } - // Single use clause without wrapper - if (child && (child.type === 'qualified_name' || child.type === 'name')) { - const fullPath = child.text; - const lastName = fullPath.split('\\').pop(); - imports.push({ - source: fullPath, - names: [lastName], - line: node.startPosition.row + 1, - phpUse: true, - }); - } - } - break; - } - - case 'function_call_expression': { - const fn = node.childForFieldName('function') || node.child(0); - if (fn) { - if (fn.type === 'name' || fn.type === 'identifier') { - calls.push({ name: fn.text, line: node.startPosition.row + 1 }); - } else if (fn.type === 'qualified_name') { - const parts = fn.text.split('\\'); - calls.push({ name: parts[parts.length - 1], line: node.startPosition.row + 1 }); - } - } - break; - } - - case 'member_call_expression': { - const name = node.childForFieldName('name'); - if (name) { - calls.push({ name: name.text, line: node.startPosition.row + 1 }); - } - break; - } - - case 'scoped_call_expression': { - const name = node.childForFieldName('name'); - if (name) { - calls.push({ name: name.text, line: node.startPosition.row + 1 }); - } - break; - } - - case 'object_creation_expression': { - const classNode = node.child(1); // skip 'new' keyword - if (classNode && (classNode.type === 'name' || classNode.type === 'qualified_name')) { - const parts = classNode.text.split('\\'); - calls.push({ name: parts[parts.length - 1], line: node.startPosition.row + 1 }); - } - break; - } - } - - for (let i = 0; i < node.childCount; i++) walk(node.child(i)); - } - - walk(tree.rootNode); - return { definitions, calls, imports, classes, exports }; -} - // ── Unified API ────────────────────────────────────────────────────────────── function resolveEngine(opts = {}) { @@ -1941,7 +78,7 @@ function resolveEngine(opts = {}) { const native = loadNative(); if (native) return { name: 'native', native }; if (pref === 'native') { - warn('Native engine requested but unavailable — falling back to WASM'); + getNative(); // throws with detailed error + install instructions } } return { name: 'wasm', native: null }; diff --git a/src/queries.js b/src/queries.js index 208f390a..c0f24042 100644 --- a/src/queries.js +++ b/src/queries.js @@ -1,6 +1,9 @@ import { execFileSync } from 'node:child_process'; +import fs from 'node:fs'; import path from 'node:path'; +import { findCycles } from './cycles.js'; import { findDbPath, openReadonlyOrFail } from './db.js'; +import { LANGUAGE_REGISTRY } from './parser.js'; const TEST_PATTERN = /\.(test|spec)\.|__test__|__tests__|\.stories\./; function isTestFile(filePath) { @@ -190,14 +193,14 @@ export function moduleMapData(customDbPath, limit = 20) { const nodes = db .prepare(` SELECT n.*, - (SELECT COUNT(*) FROM edges WHERE source_id = n.id) as out_edges, - (SELECT COUNT(*) FROM edges WHERE target_id = n.id) as in_edges + (SELECT COUNT(*) FROM edges WHERE source_id = n.id AND kind != 'contains') as out_edges, + (SELECT COUNT(*) FROM edges WHERE target_id = n.id AND kind != 'contains') as in_edges FROM nodes n WHERE n.kind = 'file' AND n.file NOT LIKE '%.test.%' AND n.file NOT LIKE '%.spec.%' AND n.file NOT LIKE '%__test__%' - ORDER BY (SELECT COUNT(*) FROM edges WHERE target_id = n.id) DESC + ORDER BY (SELECT COUNT(*) FROM edges WHERE target_id = n.id AND kind != 'contains') DESC LIMIT ? `) .all(limit); @@ -451,9 +454,25 @@ export function diffImpactData(customDbPath, opts = {}) { const dbPath = findDbPath(customDbPath); const repoRoot = path.resolve(path.dirname(dbPath), '..'); + // Verify we're in a git repository before running git diff + let checkDir = repoRoot; + let isGitRepo = false; + while (checkDir) { + if (fs.existsSync(path.join(checkDir, '.git'))) { + isGitRepo = true; + break; + } + const parent = path.dirname(checkDir); + if (parent === checkDir) break; + checkDir = parent; + } + if (!isGitRepo) { + db.close(); + return { error: `Not a git repository: ${repoRoot}` }; + } + let diffOutput; try { - // FIX: Use execFileSync with array args to prevent shell injection const args = opts.staged ? ['diff', '--cached', '--unified=0', '--no-color'] : ['diff', opts.ref || 'HEAD', '--unified=0', '--no-color']; @@ -461,6 +480,7 @@ export function diffImpactData(customDbPath, opts = {}) { cwd: repoRoot, encoding: 'utf-8', maxBuffer: 10 * 1024 * 1024, + stdio: ['pipe', 'pipe', 'pipe'], }); } catch (e) { db.close(); @@ -596,6 +616,172 @@ export function listFunctionsData(customDbPath, opts = {}) { return { count: rows.length, functions: rows }; } +export function statsData(customDbPath) { + const db = openReadonlyOrFail(customDbPath); + + // Node breakdown by kind + const nodeRows = db.prepare('SELECT kind, COUNT(*) as c FROM nodes GROUP BY kind').all(); + const nodesByKind = {}; + let totalNodes = 0; + for (const r of nodeRows) { + nodesByKind[r.kind] = r.c; + totalNodes += r.c; + } + + // Edge breakdown by kind + const edgeRows = db.prepare('SELECT kind, COUNT(*) as c FROM edges GROUP BY kind').all(); + const edgesByKind = {}; + let totalEdges = 0; + for (const r of edgeRows) { + edgesByKind[r.kind] = r.c; + totalEdges += r.c; + } + + // File/language distribution — map extensions via LANGUAGE_REGISTRY + const extToLang = new Map(); + for (const entry of LANGUAGE_REGISTRY) { + for (const ext of entry.extensions) { + extToLang.set(ext, entry.id); + } + } + const fileNodes = db.prepare("SELECT file FROM nodes WHERE kind = 'file'").all(); + const byLanguage = {}; + for (const row of fileNodes) { + const ext = path.extname(row.file).toLowerCase(); + const lang = extToLang.get(ext) || 'other'; + byLanguage[lang] = (byLanguage[lang] || 0) + 1; + } + const langCount = Object.keys(byLanguage).length; + + // Cycles + const fileCycles = findCycles(db, { fileLevel: true }); + const fnCycles = findCycles(db, { fileLevel: false }); + + // Top 5 coupling hotspots (fan-in + fan-out, file nodes) + const hotspotRows = db + .prepare(` + SELECT n.file, + (SELECT COUNT(*) FROM edges WHERE target_id = n.id) as fan_in, + (SELECT COUNT(*) FROM edges WHERE source_id = n.id) as fan_out + FROM nodes n + WHERE n.kind = 'file' + ORDER BY (SELECT COUNT(*) FROM edges WHERE target_id = n.id) + + (SELECT COUNT(*) FROM edges WHERE source_id = n.id) DESC + LIMIT 5 + `) + .all(); + const hotspots = hotspotRows.map((r) => ({ + file: r.file, + fanIn: r.fan_in, + fanOut: r.fan_out, + })); + + // Embeddings metadata + let embeddings = null; + try { + const count = db.prepare('SELECT COUNT(*) as c FROM embeddings').get(); + if (count && count.c > 0) { + const meta = {}; + const metaRows = db.prepare('SELECT key, value FROM embedding_meta').all(); + for (const r of metaRows) meta[r.key] = r.value; + embeddings = { + count: count.c, + model: meta.model || null, + dim: meta.dim ? parseInt(meta.dim, 10) : null, + builtAt: meta.built_at || null, + }; + } + } catch { + /* embeddings table may not exist */ + } + + db.close(); + return { + nodes: { total: totalNodes, byKind: nodesByKind }, + edges: { total: totalEdges, byKind: edgesByKind }, + files: { total: fileNodes.length, languages: langCount, byLanguage }, + cycles: { fileLevel: fileCycles.length, functionLevel: fnCycles.length }, + hotspots, + embeddings, + }; +} + +export function stats(customDbPath, opts = {}) { + const data = statsData(customDbPath); + if (opts.json) { + console.log(JSON.stringify(data, null, 2)); + return; + } + + // Human-readable output + console.log('\n# Codegraph Stats\n'); + + // Nodes + console.log(`Nodes: ${data.nodes.total} total`); + const kindEntries = Object.entries(data.nodes.byKind).sort((a, b) => b[1] - a[1]); + const kindParts = kindEntries.map(([k, v]) => `${k} ${v}`); + // Print in rows of 3 + for (let i = 0; i < kindParts.length; i += 3) { + const row = kindParts + .slice(i, i + 3) + .map((p) => p.padEnd(18)) + .join(''); + console.log(` ${row}`); + } + + // Edges + console.log(`\nEdges: ${data.edges.total} total`); + const edgeEntries = Object.entries(data.edges.byKind).sort((a, b) => b[1] - a[1]); + const edgeParts = edgeEntries.map(([k, v]) => `${k} ${v}`); + for (let i = 0; i < edgeParts.length; i += 3) { + const row = edgeParts + .slice(i, i + 3) + .map((p) => p.padEnd(18)) + .join(''); + console.log(` ${row}`); + } + + // Files + console.log(`\nFiles: ${data.files.total} (${data.files.languages} languages)`); + const langEntries = Object.entries(data.files.byLanguage).sort((a, b) => b[1] - a[1]); + const langParts = langEntries.map(([k, v]) => `${k} ${v}`); + for (let i = 0; i < langParts.length; i += 3) { + const row = langParts + .slice(i, i + 3) + .map((p) => p.padEnd(18)) + .join(''); + console.log(` ${row}`); + } + + // Cycles + console.log( + `\nCycles: ${data.cycles.fileLevel} file-level, ${data.cycles.functionLevel} function-level`, + ); + + // Hotspots + if (data.hotspots.length > 0) { + console.log(`\nTop ${data.hotspots.length} coupling hotspots:`); + for (let i = 0; i < data.hotspots.length; i++) { + const h = data.hotspots[i]; + console.log( + ` ${String(i + 1).padStart(2)}. ${h.file.padEnd(35)} fan-in: ${String(h.fanIn).padStart(3)} fan-out: ${String(h.fanOut).padStart(3)}`, + ); + } + } + + // Embeddings + if (data.embeddings) { + const e = data.embeddings; + console.log( + `\nEmbeddings: ${e.count} vectors (${e.model || 'unknown'}, ${e.dim || '?'}d) built ${e.builtAt || 'unknown'}`, + ); + } else { + console.log('\nEmbeddings: not built'); + } + + console.log(); +} + // ─── Human-readable output (original formatting) ─────────────────────── export function queryName(name, customDbPath, opts = {}) { diff --git a/src/registry.js b/src/registry.js index 96bab195..caa970e8 100644 --- a/src/registry.js +++ b/src/registry.js @@ -3,7 +3,11 @@ import os from 'node:os'; import path from 'node:path'; import { debug, warn } from './logger.js'; -export const REGISTRY_PATH = path.join(os.homedir(), '.codegraph', 'registry.json'); +export const REGISTRY_PATH = + process.env.CODEGRAPH_REGISTRY_PATH || path.join(os.homedir(), '.codegraph', 'registry.json'); + +/** Default TTL: entries not accessed within 30 days are pruned. */ +export const DEFAULT_TTL_DAYS = 30; /** * Load the registry from disk. @@ -69,10 +73,12 @@ export function registerRepo(rootDir, name, registryPath = REGISTRY_PATH) { } } + const now = new Date().toISOString(); registry.repos[repoName] = { path: absRoot, dbPath: path.join(absRoot, '.codegraph', 'graph.db'), - addedAt: new Date().toISOString(), + addedAt: registry.repos[repoName]?.addedAt || now, + lastAccessedAt: now, }; saveRegistry(registry, registryPath); @@ -102,6 +108,7 @@ export function listRepos(registryPath = REGISTRY_PATH) { path: entry.path, dbPath: entry.dbPath, addedAt: entry.addedAt, + lastAccessedAt: entry.lastAccessedAt || entry.addedAt, })) .sort((a, b) => a.name.localeCompare(b.name)); } @@ -118,21 +125,31 @@ export function resolveRepoDbPath(name, registryPath = REGISTRY_PATH) { warn(`Registry: database missing for "${name}" at ${entry.dbPath}`); return undefined; } + // Touch lastAccessedAt on successful resolution + entry.lastAccessedAt = new Date().toISOString(); + saveRegistry(registry, registryPath); return entry.dbPath; } /** - * Remove registry entries whose repo directory no longer exists on disk. - * Only checks the repo directory (not the DB file — a missing DB is normal pre-build state). - * Returns an array of `{ name, path }` for each pruned entry. + * Remove registry entries whose repo directory no longer exists on disk, + * or that haven't been accessed within `ttlDays` days. + * Returns an array of `{ name, path, reason }` for each pruned entry. */ -export function pruneRegistry(registryPath = REGISTRY_PATH) { +export function pruneRegistry(registryPath = REGISTRY_PATH, ttlDays = DEFAULT_TTL_DAYS) { const registry = loadRegistry(registryPath); const pruned = []; + const cutoff = Date.now() - ttlDays * 24 * 60 * 60 * 1000; for (const [name, entry] of Object.entries(registry.repos)) { if (!fs.existsSync(entry.path)) { - pruned.push({ name, path: entry.path }); + pruned.push({ name, path: entry.path, reason: 'missing' }); + delete registry.repos[name]; + continue; + } + const lastAccess = Date.parse(entry.lastAccessedAt || entry.addedAt); + if (lastAccess < cutoff) { + pruned.push({ name, path: entry.path, reason: 'expired' }); delete registry.repos[name]; } } diff --git a/src/resolve.js b/src/resolve.js index c3f0d8df..15d9d207 100644 --- a/src/resolve.js +++ b/src/resolve.js @@ -31,7 +31,7 @@ function resolveViaAlias(importSource, aliases, _rootDir) { } } - for (const [pattern, targets] of Object.entries(aliases.paths)) { + for (const [pattern, targets] of Object.entries(aliases.paths || {})) { const prefix = pattern.replace(/\*$/, ''); if (!importSource.startsWith(prefix)) continue; const rest = importSource.slice(prefix.length); @@ -113,12 +113,13 @@ export function resolveImportPath(fromFile, importSource, rootDir, aliases) { const native = loadNative(); if (native) { try { - return native.resolveImport( + const result = native.resolveImport( fromFile, importSource, rootDir, convertAliasesForNative(aliases), ); + return normalizePath(path.normalize(result)); } catch { // fall through to JS } @@ -158,7 +159,7 @@ export function resolveImportsBatch(inputs, rootDir, aliases) { const results = native.resolveImports(nativeInputs, rootDir, convertAliasesForNative(aliases)); const map = new Map(); for (const r of results) { - map.set(`${r.fromFile}|${r.importSource}`, r.resolvedPath); + map.set(`${r.fromFile}|${r.importSource}`, normalizePath(path.normalize(r.resolvedPath))); } return map; } catch { diff --git a/tests/integration/build-parity.test.js b/tests/integration/build-parity.test.js index e44a099f..94097e7f 100644 --- a/tests/integration/build-parity.test.js +++ b/tests/integration/build-parity.test.js @@ -61,9 +61,9 @@ describeOrSkip('Build parity: native vs WASM', () => { copyDirSync(FIXTURE_DIR, nativeDir); // Build with WASM - await buildGraph(wasmDir, { engine: 'wasm', incremental: false }); + await buildGraph(wasmDir, { engine: 'wasm', incremental: false, skipRegistry: true }); // Build with native - await buildGraph(nativeDir, { engine: 'native', incremental: false }); + await buildGraph(nativeDir, { engine: 'native', incremental: false, skipRegistry: true }); }, 60_000); afterAll(() => { diff --git a/tests/integration/build.test.js b/tests/integration/build.test.js index 04ababf0..10177124 100644 --- a/tests/integration/build.test.js +++ b/tests/integration/build.test.js @@ -44,7 +44,7 @@ beforeAll(async () => { for (const [name, content] of Object.entries(FIXTURE_FILES)) { fs.writeFileSync(path.join(tmpDir, name), content); } - await buildGraph(tmpDir); + await buildGraph(tmpDir, { skipRegistry: true }); dbPath = path.join(tmpDir, '.codegraph', 'graph.db'); }); diff --git a/tests/integration/cli.test.js b/tests/integration/cli.test.js index d1950636..40de2144 100644 --- a/tests/integration/cli.test.js +++ b/tests/integration/cli.test.js @@ -36,7 +36,7 @@ export function main() { `.trimStart(), }; -let tmpDir, dbPath; +let tmpDir, tmpHome, dbPath; /** Run the CLI and return stdout as a string. Throws on non-zero exit. */ function run(...args) { @@ -44,11 +44,13 @@ function run(...args) { cwd: tmpDir, encoding: 'utf-8', timeout: 30_000, + env: { ...process.env, HOME: tmpHome, USERPROFILE: tmpHome }, }); } beforeAll(async () => { tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-cli-')); + tmpHome = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-clihome-')); for (const [name, content] of Object.entries(FIXTURE_FILES)) { fs.writeFileSync(path.join(tmpDir, name), content); } @@ -60,6 +62,7 @@ beforeAll(async () => { afterAll(() => { if (tmpDir) fs.rmSync(tmpDir, { recursive: true, force: true }); + if (tmpHome) fs.rmSync(tmpHome, { recursive: true, force: true }); }); describe('CLI smoke tests', () => { diff --git a/tests/integration/queries.test.js b/tests/integration/queries.test.js index 090a467e..927e852a 100644 --- a/tests/integration/queries.test.js +++ b/tests/integration/queries.test.js @@ -160,6 +160,50 @@ describe('moduleMapData', () => { const data = moduleMapData(dbPath, 2); expect(data.topNodes).toHaveLength(2); }); + + test('excludes contains edges from ranking and counts', () => { + // Build a separate DB with contains + imports edges + const tmpDir2 = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-contains-')); + fs.mkdirSync(path.join(tmpDir2, '.codegraph')); + const dbPath2 = path.join(tmpDir2, '.codegraph', 'graph.db'); + + const db2 = new Database(dbPath2); + db2.pragma('journal_mode = WAL'); + initSchema(db2); + + // Two file nodes + const fA = insertNode(db2, 'a.js', 'file', 'a.js', 0); + const fB = insertNode(db2, 'b.js', 'file', 'b.js', 0); + const fC = insertNode(db2, 'c.js', 'file', 'c.js', 0); + + // a.js gets only a contains edge (structural) + insertEdge(db2, fC, fA, 'contains'); + // b.js gets an imports edge (real dependency) + insertEdge(db2, fC, fB, 'imports'); + + db2.close(); + + try { + const data = moduleMapData(dbPath2); + const nodeA = data.topNodes.find((n) => n.file === 'a.js'); + const nodeB = data.topNodes.find((n) => n.file === 'b.js'); + + // b.js (imports edge) should have inEdges=1, a.js (contains edge) should have inEdges=0 + expect(nodeB.inEdges).toBe(1); + expect(nodeA.inEdges).toBe(0); + + // b.js should rank above a.js + const indexA = data.topNodes.indexOf(nodeA); + const indexB = data.topNodes.indexOf(nodeB); + expect(indexB).toBeLessThan(indexA); + + // c.js outEdges should only count the imports edge, not contains + const nodeC = data.topNodes.find((n) => n.file === 'c.js'); + expect(nodeC.outEdges).toBe(1); + } finally { + fs.rmSync(tmpDir2, { recursive: true, force: true }); + } + }); }); // ─── fileDepsData ────────────────────────────────────────────────────── @@ -231,6 +275,6 @@ describe('diffImpactData', () => { test('returns error when run outside a git repo', () => { const data = diffImpactData(dbPath); expect(data).toHaveProperty('error'); - expect(data.error).toMatch(/git diff/i); + expect(data.error).toMatch(/not a git repository/i); }); }); diff --git a/tests/integration/structure.test.js b/tests/integration/structure.test.js index 9bd4607b..81a38529 100644 --- a/tests/integration/structure.test.js +++ b/tests/integration/structure.test.js @@ -49,7 +49,7 @@ beforeAll(async () => { fs.writeFileSync(path.join(tmpDir, relPath), content); } - await buildGraph(tmpDir, { engine: 'wasm' }); + await buildGraph(tmpDir, { engine: 'wasm', skipRegistry: true }); dbPath = path.join(tmpDir, '.codegraph', 'graph.db'); }); diff --git a/tests/parsers/unified.test.js b/tests/parsers/unified.test.js index 69c20b5a..d5c59d66 100644 --- a/tests/parsers/unified.test.js +++ b/tests/parsers/unified.test.js @@ -22,6 +22,12 @@ describe('Unified parser API', () => { expect(engine.name).toBe('wasm'); expect(engine.version).toBeNull(); }); + + it('throws when engine=native is explicitly requested but unavailable', () => { + const engine = getActiveEngine(); + if (engine.name === 'native') return; // skip — native is available + expect(() => getActiveEngine({ engine: 'native' })).toThrow(/[Nn]ative/); + }); }); describe('parseFileAuto', () => { diff --git a/tests/search/embedding-regression.test.js b/tests/search/embedding-regression.test.js new file mode 100644 index 00000000..f1004bf3 --- /dev/null +++ b/tests/search/embedding-regression.test.js @@ -0,0 +1,134 @@ +/** + * Embedding regression test — validates the embed+search pipeline + * produces semantically meaningful results with a real ML model. + * + * Skips automatically when @huggingface/transformers is not installed. + * Run explicitly: npx vitest run tests/search/embedding-regression.test.js + */ + +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import Database from 'better-sqlite3'; +import { afterAll, beforeAll, describe, expect, test } from 'vitest'; + +// Detect whether transformers is available (optional dep) +let hasTransformers = false; +try { + await import('@huggingface/transformers'); + hasTransformers = true; +} catch { + // not installed — tests will skip +} + +// Lazy-import to avoid top-level errors when transformers is missing +const { buildGraph } = await import('../../src/builder.js'); +const { buildEmbeddings, searchData } = await import('../../src/embedder.js'); + +// Same ES-module fixture files used by build.test.js +const FIXTURE_FILES = { + 'math.js': ` +export function add(a, b) { return a + b; } +export function multiply(a, b) { return a * b; } +export function square(x) { return multiply(x, x); } +`.trimStart(), + 'utils.js': ` +import { add, square } from './math.js'; +export function sumOfSquares(a, b) { return add(square(a), square(b)); } +export class Calculator { + compute(x, y) { return sumOfSquares(x, y); } +} +`.trimStart(), + 'index.js': ` +import { sumOfSquares, Calculator } from './utils.js'; +import { add } from './math.js'; +export function main() { + console.log(add(1, 2)); + console.log(sumOfSquares(3, 4)); + const calc = new Calculator(); + console.log(calc.compute(5, 6)); +} +`.trimStart(), +}; + +let tmpDir, dbPath; + +describe.skipIf(!hasTransformers)('embedding regression (real model)', () => { + beforeAll(async () => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-embed-regression-')); + for (const [name, content] of Object.entries(FIXTURE_FILES)) { + fs.writeFileSync(path.join(tmpDir, name), content); + } + + // Build the dependency graph + await buildGraph(tmpDir, { skipRegistry: true }); + dbPath = path.join(tmpDir, '.codegraph', 'graph.db'); + + // Build embeddings with the smallest/fastest model + await buildEmbeddings(tmpDir, 'minilm', dbPath); + }, 120_000); + + afterAll(() => { + if (tmpDir) fs.rmSync(tmpDir, { recursive: true, force: true }); + }); + + describe('smoke tests', () => { + test('stored at least 6 embeddings', () => { + const db = new Database(dbPath, { readonly: true }); + const count = db.prepare('SELECT COUNT(*) as c FROM embeddings').get().c; + db.close(); + expect(count).toBeGreaterThanOrEqual(6); + }); + + test('metadata records correct model and dimension', () => { + const db = new Database(dbPath, { readonly: true }); + const model = db.prepare("SELECT value FROM embedding_meta WHERE key = 'model'").get().value; + const dim = db.prepare("SELECT value FROM embedding_meta WHERE key = 'dim'").get().value; + db.close(); + expect(model).toBe('Xenova/all-MiniLM-L6-v2'); + expect(Number(dim)).toBe(384); + }); + + test('search returns results with positive similarity', async () => { + const data = await searchData('add numbers', dbPath, { minScore: 0.01 }); + expect(data).not.toBeNull(); + expect(data.results.length).toBeGreaterThan(0); + for (const r of data.results) { + expect(r.similarity).toBeGreaterThan(0); + } + }); + }); + + describe('regression queries', () => { + /** + * Helper: search for a query and assert that a given function name + * appears within the top N results. + */ + async function expectInTopN(query, expectedName, topN) { + const data = await searchData(query, dbPath, { minScore: 0.01, limit: topN }); + expect(data).not.toBeNull(); + const names = data.results.map((r) => r.name); + expect(names).toContain(expectedName); + } + + test('"add two numbers together" finds add in top 3', async () => { + await expectInTopN('add two numbers together', 'add', 3); + }); + + test('"multiply values" finds multiply in top 3', async () => { + await expectInTopN('multiply values', 'multiply', 3); + }); + + test('"compute the square of a number" finds square in top 3', async () => { + await expectInTopN('compute the square of a number', 'square', 3); + }); + + test('"sum of squares calculation" finds sumOfSquares in top 3', async () => { + await expectInTopN('sum of squares calculation', 'sumOfSquares', 3); + }); + + test('"main entry point function" finds main in top 5', async () => { + await expectInTopN('main entry point function', 'main', 5); + }); + }); +}); diff --git a/tests/unit/config.test.js b/tests/unit/config.test.js index c005e6cb..1f32695d 100644 --- a/tests/unit/config.test.js +++ b/tests/unit/config.test.js @@ -55,7 +55,7 @@ describe('DEFAULTS', () => { }); it('has embeddings defaults', () => { - expect(DEFAULTS.embeddings).toEqual({ model: 'minilm', llmProvider: null }); + expect(DEFAULTS.embeddings).toEqual({ model: 'jina-code', llmProvider: null }); }); it('has llm defaults', () => { diff --git a/tests/unit/mcp.test.js b/tests/unit/mcp.test.js index bb54d51c..ccc539c6 100644 --- a/tests/unit/mcp.test.js +++ b/tests/unit/mcp.test.js @@ -617,6 +617,7 @@ describe('startMCPServer handler dispatch', () => { { name: 'beta', path: '/beta' }, { name: 'gamma', path: '/gamma' }, ]), + pruneRegistry: vi.fn(), })); vi.doMock('../../src/queries.js', () => ({ queryNameData: vi.fn(), @@ -665,6 +666,7 @@ describe('startMCPServer handler dispatch', () => { { name: 'alpha', path: '/alpha' }, { name: 'beta', path: '/beta' }, ]), + pruneRegistry: vi.fn(), })); vi.doMock('../../src/queries.js', () => ({ queryNameData: vi.fn(), diff --git a/tests/unit/queries-unit.test.js b/tests/unit/queries-unit.test.js index 18c5e6fe..31a300a9 100644 --- a/tests/unit/queries-unit.test.js +++ b/tests/unit/queries-unit.test.js @@ -62,6 +62,7 @@ let tmpDir, dbPath; beforeAll(() => { tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-queries-unit-')); + fs.mkdirSync(path.join(tmpDir, '.git')); fs.mkdirSync(path.join(tmpDir, '.codegraph')); dbPath = path.join(tmpDir, '.codegraph', 'graph.db'); @@ -424,7 +425,7 @@ describe('diffImpact (display)', () => { const spy = vi.spyOn(console, 'log').mockImplementation(() => {}); diffImpact(dbPath); const allOutput = spy.mock.calls.map((c) => c[0]).join('\n'); - expect(allOutput).toContain('git diff'); + expect(allOutput).toMatch(/git diff|git/i); spy.mockRestore(); mockExecFile.mockRestore(); }); diff --git a/tests/unit/registry.test.js b/tests/unit/registry.test.js index a594ea12..7623ba8f 100644 --- a/tests/unit/registry.test.js +++ b/tests/unit/registry.test.js @@ -1,8 +1,10 @@ +import { execFileSync } from 'node:child_process'; import fs from 'node:fs'; import os from 'node:os'; import path from 'node:path'; import { afterEach, beforeEach, describe, expect, it } from 'vitest'; import { + DEFAULT_TTL_DAYS, listRepos, loadRegistry, pruneRegistry, @@ -28,9 +30,27 @@ afterEach(() => { // ─── REGISTRY_PATH ────────────────────────────────────────────────── describe('REGISTRY_PATH', () => { - it('points to ~/.codegraph/registry.json', () => { + it('points to ~/.codegraph/registry.json by default', () => { expect(REGISTRY_PATH).toBe(path.join(os.homedir(), '.codegraph', 'registry.json')); }); + + it('respects CODEGRAPH_REGISTRY_PATH env var', () => { + const customPath = path.join(tmpDir, 'custom', 'registry.json'); + const result = execFileSync( + 'node', + [ + '--input-type=module', + '-e', + `import { REGISTRY_PATH } from './src/registry.js'; process.stdout.write(REGISTRY_PATH);`, + ], + { + cwd: path.resolve(import.meta.dirname, '..', '..'), + encoding: 'utf-8', + env: { ...process.env, CODEGRAPH_REGISTRY_PATH: customPath }, + }, + ); + expect(result).toBe(customPath); + }); }); // ─── loadRegistry ─────────────────────────────────────────────────── @@ -142,12 +162,24 @@ describe('registerRepo', () => { expect(Object.keys(reg.repos)).toHaveLength(1); }); - it('sets addedAt as ISO string', () => { + it('sets addedAt and lastAccessedAt as ISO strings', () => { const dir = path.join(tmpDir, 'proj'); fs.mkdirSync(dir, { recursive: true }); const { entry } = registerRepo(dir, 'proj', registryPath); expect(entry.addedAt).toMatch(/^\d{4}-\d{2}-\d{2}T/); + expect(entry.lastAccessedAt).toMatch(/^\d{4}-\d{2}-\d{2}T/); + }); + + it('preserves original addedAt on re-registration', () => { + const dir = path.join(tmpDir, 'proj'); + fs.mkdirSync(dir, { recursive: true }); + + const { entry: first } = registerRepo(dir, 'proj', registryPath); + const originalAddedAt = first.addedAt; + const { entry: second } = registerRepo(dir, 'proj', registryPath); + + expect(second.addedAt).toBe(originalAddedAt); }); it('auto-suffixes when basename collides with different path', () => { @@ -239,7 +271,7 @@ describe('listRepos', () => { expect(repos).toEqual([]); }); - it('returns repos sorted by name', () => { + it('returns repos sorted by name with lastAccessedAt', () => { const dirA = path.join(tmpDir, 'aaa'); const dirZ = path.join(tmpDir, 'zzz'); const dirM = path.join(tmpDir, 'mmm'); @@ -253,6 +285,9 @@ describe('listRepos', () => { const repos = listRepos(registryPath); expect(repos.map((r) => r.name)).toEqual(['aaa', 'mmm', 'zzz']); + for (const r of repos) { + expect(r.lastAccessedAt).toMatch(/^\d{4}-\d{2}-\d{2}T/); + } }); }); @@ -289,7 +324,7 @@ describe('resolveRepoDbPath', () => { // ─── pruneRegistry ───────────────────────────────────────────────── describe('pruneRegistry', () => { - it('removes entries whose directories no longer exist', () => { + it('removes entries whose directories no longer exist (reason: missing)', () => { const dir1 = path.join(tmpDir, 'exists'); const dir2 = path.join(tmpDir, 'gone'); fs.mkdirSync(dir1, { recursive: true }); @@ -305,12 +340,96 @@ describe('pruneRegistry', () => { expect(pruned).toHaveLength(1); expect(pruned[0].name).toBe('gone'); expect(pruned[0].path).toBe(dir2); + expect(pruned[0].reason).toBe('missing'); const reg = loadRegistry(registryPath); expect(reg.repos.exists).toBeDefined(); expect(reg.repos.gone).toBeUndefined(); }); + it('removes entries idle beyond TTL (reason: expired)', () => { + const dir = path.join(tmpDir, 'old-project'); + fs.mkdirSync(dir, { recursive: true }); + + // Manually write a registry entry with an old lastAccessedAt + const oldDate = new Date(Date.now() - 60 * 24 * 60 * 60 * 1000).toISOString(); // 60 days ago + const registry = { + repos: { + 'old-project': { + path: dir, + dbPath: path.join(dir, '.codegraph', 'graph.db'), + addedAt: oldDate, + lastAccessedAt: oldDate, + }, + }, + }; + saveRegistry(registry, registryPath); + + const pruned = pruneRegistry(registryPath, 30); + expect(pruned).toHaveLength(1); + expect(pruned[0].name).toBe('old-project'); + expect(pruned[0].reason).toBe('expired'); + }); + + it('keeps entries within TTL window', () => { + const dir = path.join(tmpDir, 'fresh'); + fs.mkdirSync(dir, { recursive: true }); + registerRepo(dir, 'fresh', registryPath); + + const pruned = pruneRegistry(registryPath, 30); + expect(pruned).toEqual([]); + + const reg = loadRegistry(registryPath); + expect(reg.repos.fresh).toBeDefined(); + }); + + it('falls back to addedAt when lastAccessedAt is missing', () => { + const dir = path.join(tmpDir, 'legacy'); + fs.mkdirSync(dir, { recursive: true }); + + const oldDate = new Date(Date.now() - 60 * 24 * 60 * 60 * 1000).toISOString(); + const registry = { + repos: { + legacy: { + path: dir, + dbPath: path.join(dir, '.codegraph', 'graph.db'), + addedAt: oldDate, + }, + }, + }; + saveRegistry(registry, registryPath); + + const pruned = pruneRegistry(registryPath, 30); + expect(pruned).toHaveLength(1); + expect(pruned[0].reason).toBe('expired'); + }); + + it('respects custom TTL', () => { + const dir = path.join(tmpDir, 'project'); + fs.mkdirSync(dir, { recursive: true }); + + // 10 days ago + const recentDate = new Date(Date.now() - 10 * 24 * 60 * 60 * 1000).toISOString(); + const registry = { + repos: { + project: { + path: dir, + dbPath: path.join(dir, '.codegraph', 'graph.db'), + addedAt: recentDate, + lastAccessedAt: recentDate, + }, + }, + }; + saveRegistry(registry, registryPath); + + // 30-day TTL: should keep + expect(pruneRegistry(registryPath, 30)).toEqual([]); + // 7-day TTL: should prune + const pruned = pruneRegistry(registryPath, 7); + expect(pruned).toHaveLength(1); + expect(pruned[0].reason).toBe('expired'); + }); + it('returns empty array when nothing to prune', () => { const dir = path.join(tmpDir, 'healthy'); fs.mkdirSync(dir, { recursive: true }); @@ -336,3 +455,36 @@ describe('pruneRegistry', () => { expect(pruned).toEqual([]); }); }); + +// ─── DEFAULT_TTL_DAYS ────────────────────────────────────────────── + +describe('DEFAULT_TTL_DAYS', () => { + it('is 30 days', () => { + expect(DEFAULT_TTL_DAYS).toBe(30); + }); +}); + +// ─── resolveRepoDbPath lastAccessedAt ────────────────────────────── + +describe('resolveRepoDbPath updates lastAccessedAt', () => { + it('touches lastAccessedAt on successful resolve', () => { + const dir = path.join(tmpDir, 'proj'); + const dbDir = path.join(dir, '.codegraph'); + const dbFile = path.join(dbDir, 'graph.db'); + fs.mkdirSync(dbDir, { recursive: true }); + fs.writeFileSync(dbFile, ''); + + registerRepo(dir, 'proj', registryPath); + + // Manually backdate lastAccessedAt + const reg = loadRegistry(registryPath); + reg.repos.proj.lastAccessedAt = '2025-01-01T00:00:00.000Z'; + saveRegistry(reg, registryPath); + + resolveRepoDbPath('proj', registryPath); + + const updated = loadRegistry(registryPath); + expect(updated.repos.proj.lastAccessedAt).not.toBe('2025-01-01T00:00:00.000Z'); + expect(new Date(updated.repos.proj.lastAccessedAt).getFullYear()).toBeGreaterThanOrEqual(2026); + }); +});