diff --git a/README.md b/README.md
index 01371097..0bb6948e 100644
--- a/README.md
+++ b/README.md
@@ -76,39 +76,24 @@ No config files, no Docker, no JVM, no API keys, no accounts. Point your agent a
### Feature comparison
-Comparison last verified: March 2026. Full analysis: COMPETITIVE_ANALYSIS.md
-
-| Capability | codegraph | [joern](https://github.com/joernio/joern) | [narsil-mcp](https://github.com/postrv/narsil-mcp) | [code-graph-rag](https://github.com/vitali87/code-graph-rag) | [cpg](https://github.com/Fraunhofer-AISEC/cpg) | [GitNexus](https://github.com/abhigyanpatwari/GitNexus) | [CodeMCP](https://github.com/SimplyLiz/CodeMCP) | [axon](https://github.com/harshkedia177/axon) |
-|---|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|
-| MCP / AI agent support | **Yes** | — | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** |
-| Batch querying | **Yes** | — | — | — | — | — | — | — |
-| Composite audit command | **Yes** | — | — | — | — | — | — | — |
-| Function-level analysis | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** |
-| Multi-language | **11** | **14** | **32** | **11** | **~10** | **12** | **12** | **3** |
-| Semantic search | **Yes** | — | **Yes** | **Yes** | — | **Yes** | — | **Yes** |
-| Hybrid BM25 + semantic | **Yes** | — | — | — | — | **Yes** | — | **Yes** |
-| CODEOWNERS integration | **Yes** | — | — | — | — | — | — | — |
-| Architecture boundary rules | **Yes** | — | — | — | — | — | — | — |
-| CI validation predicates | **Yes** | — | — | — | — | — | — | — |
-| Graph snapshots | **Yes** | — | — | — | — | — | — | — |
-| Git diff impact | **Yes** | — | — | — | — | **Yes** | **Yes** | **Yes** |
-| Branch structural diff | **Yes** | — | — | — | — | — | — | **Yes** |
-| Git co-change analysis | **Yes** | — | — | — | — | — | — | **Yes** |
-| Watch mode | **Yes** | — | **Yes** | **Yes** | — | — | **Yes** | **Yes** |
-| Dead code / role classification | **Yes** | — | **Yes** | — | — | — | **Yes** | **Yes** |
-| Cycle detection | **Yes** | — | — | — | — | — | — | — |
-| Incremental rebuilds | **O(changed)** | — | O(n) Merkle | — | — | — | Go only | **Yes** |
-| Zero config | **Yes** | — | **Yes** | — | — | **Yes** | — | **Yes** |
-| Embeddable JS library (`npm install`) | **Yes** | — | — | — | — | — | — | — |
-| LLM-optional (works without API keys) | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** |
-| Dataflow analysis | **Yes** | **Yes** | — | — | **Yes** | — | — | — |
-| Control flow graph (CFG) | **Yes** | **Yes** | — | — | **Yes** | — | — | — |
-| AST node querying | **Yes** | **Yes** | — | — | **Yes** | — | — | — |
-| Expanded node/edge types | **Yes** | **Yes** | — | — | **Yes** | — | — | — |
-| GraphML / Neo4j export | **Yes** | **Yes** | — | — | — | — | — | — |
-| Interactive graph viewer | **Yes** | — | — | — | — | — | — | — |
-| Commercial use allowed | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | No | Paid | **Yes** |
-| Open source | **Yes** | Yes | Yes | Yes | Yes | No | No | Yes |
+Comparison last verified: March 2026. Claims verified against each repo's README/docs. Full analysis: COMPETITIVE_ANALYSIS.md
+
+| Capability | codegraph | [joern](https://github.com/joernio/joern) | [narsil-mcp](https://github.com/postrv/narsil-mcp) | [cpg](https://github.com/Fraunhofer-AISEC/cpg) | [axon](https://github.com/harshkedia177/axon) | [GitNexus](https://github.com/abhigyanpatwari/GitNexus) |
+|---|:---:|:---:|:---:|:---:|:---:|:---:|
+| Languages | **11** | ~12 | **32** | ~10 | 3 | 13 |
+| MCP server | **Yes** | — | **Yes** | **Yes** | **Yes** | **Yes** |
+| Dataflow + CFG + AST querying | **Yes** | **Yes** | **Yes**¹ | **Yes** | — | — |
+| Hybrid search (BM25 + semantic) | **Yes** | — | — | — | **Yes** | **Yes** |
+| Git-aware (diff impact, co-change, branch diff) | **All 3** | — | — | — | **All 3** | — |
+| Dead code / role classification | **Yes** | — | **Yes** | — | **Yes** | — |
+| Incremental rebuilds | **O(changed)** | — | O(n) | — | **Yes** | Commit-level⁴ |
+| Architecture rules + CI gate | **Yes** | — | — | — | — | — |
+| Security scanning (SAST / vuln detection) | Intentionally out of scope² | **Yes** | **Yes** | **Yes** | — | — |
+| Zero config, `npm install` | **Yes** | — | **Yes** | — | **Yes** | **Yes** |
+| Graph export (GraphML / Neo4j / DOT) | **Yes** | **Yes** | — | — | — | — |
+| Open source + commercial use | **Yes** (Apache-2.0) | **Yes** (Apache-2.0) | **Yes** (MIT/Apache-2.0) | **Yes** (Apache-2.0) | Source-available³ | Non-commercial⁵ |
+
+¹ narsil-mcp added CFG and dataflow in recent versions. ² Codegraph focuses on structural understanding, not vulnerability detection — use dedicated SAST tools (Semgrep, CodeQL, Snyk) for that. ³ axon claims MIT in pyproject.toml but has no LICENSE file in the repo. ⁴ GitNexus skips re-index if the git commit hasn't changed, but re-processes the entire repo when it does — no per-file incremental parsing. ⁵ GitNexus uses the PolyForm Noncommercial 1.0.0 license.
### What makes codegraph different
@@ -490,16 +475,16 @@ codegraph registry remove # Unregister
|---|---|---|:---:|:---:|
|  | `.js`, `.jsx`, `.mjs`, `.cjs` | functions, classes, methods, imports, exports, call sites, constants, dataflow | ✅ | ✅ |
|  | `.ts`, `.tsx` | functions, classes, interfaces, type aliases, methods, imports, exports, call sites, dataflow | ✅ | ✅ |
-|  | `.py` | functions, classes, methods, imports, decorators, constants, call sites, dataflow | ✅ | ✅ |
+|  | `.py`, `.pyi` | functions, classes, methods, imports, decorators, constants, call sites, dataflow | ✅ | ✅ |
|  | `.go` | functions, methods, structs, interfaces, constants, imports, call sites, dataflow | ✅ | ✅ |
|  | `.rs` | functions, methods, structs, enums, traits, constants, `use` imports, call sites, dataflow | ✅ | ✅ |
|  | `.java` | classes, methods, constructors, interfaces, enums, imports, call sites, dataflow | ✅ | ✅ |
|  | `.cs` | classes, structs, records, interfaces, enums, methods, constructors, properties, using directives, call sites, dataflow | ✅ | ✅ |
-|  | `.php` | functions, classes, interfaces, traits, enums, methods, namespace use, call sites, dataflow | ✅ | ✅ |
-|  | `.rb` | classes, modules, methods, singleton methods, require/require_relative, include/extend, dataflow | — | ✅ |
-|  | `.tf`, `.hcl` | resource, data, variable, module, output blocks | — | ✅ |
+|  | `.php`, `.phtml` | functions, classes, interfaces, traits, enums, methods, namespace use, call sites, dataflow | ✅ | ✅ |
+|  | `.rb`, `.rake`, `.gemspec` | classes, modules, methods, singleton methods, require/require_relative, include/extend, dataflow | N/A⁴ | ✅ |
+|  | `.tf`, `.hcl` | resource, data, variable, module, output blocks | N/A⁴ | ✅ |
-> **Type Inference** extracts a per-file type map from annotations (`const x: Router`, `MyType x`, `x: MyType`) and `new` expressions, enabling the edge resolver to connect `x.method()` → `Type.method()`. **Parity** = WASM and native Rust engines produce identical output.
+> **Type Inference** extracts a per-file type map from annotations (`const x: Router`, `MyType x`, `x: MyType`) and `new` expressions, enabling the edge resolver to connect `x.method()` → `Type.method()`. **Parity** = WASM and native Rust engines produce identical output. ⁴ Ruby and HCL are dynamically typed / declarative — type inference does not apply.
## ⚙️ How It Works
@@ -768,7 +753,7 @@ const { results: fused } = await multiSearchData(
## ⚠️ Limitations
-- **No full type inference** — parses `.d.ts` interfaces but doesn't use TypeScript's type checker for overload resolution
+- **No TypeScript type-checker integration** — type inference resolves annotations, `new` expressions, and assignment chains, but does not invoke `tsc` for overload resolution or complex generics
- **Dynamic calls are best-effort** — complex computed property access and `eval` patterns are not resolved
- **Python imports** — resolves relative imports but doesn't follow `sys.path` or virtual environment packages
- **Dataflow analysis** — intraprocedural (single-function scope), not interprocedural
@@ -778,13 +763,18 @@ const { results: fused } = await multiSearchData(
See **[ROADMAP.md](docs/roadmap/ROADMAP.md)** for the full development roadmap and **[STABILITY.md](STABILITY.md)** for the stability policy and versioning guarantees. Current plan:
1. ~~**Rust Core**~~ — **Complete** (v1.3.0) — native tree-sitter parsing via napi-rs, parallel multi-core parsing, incremental re-parsing, import resolution & cycle detection in Rust
-2. ~~**Foundation Hardening**~~ — **Complete** (v1.4.0) — parser registry, 12-tool MCP server with multi-repo support, test coverage 62%→75%, `apiKeyCommand` secret resolution, global repo registry
-3. ~~**Deep Analysis**~~ — **Complete** (v3.0.0) — dataflow analysis (flows_to, returns, mutates), intraprocedural CFG for all 11 languages, stored AST nodes, expanded node/edge types (parameter, property, constant, contains, parameter_of, receiver), GraphML/GraphSON/Neo4j CSV export, interactive HTML viewer, CLI consolidation, stable JSON schema
-4. ~~**Architectural Refactoring**~~ — **Complete** (v3.1.5) — unified AST analysis, composable MCP, domain errors, builder pipeline, embedder subsystem, graph model, qualified names, presentation layer, InMemoryRepository, domain directory grouping, CLI composability
-5. **Natural Language Queries** — `codegraph ask` command, conversational sessions
-6. **Expanded Language Support** — 8 new languages (12 → 20)
-7. **GitHub Integration & CI** — reusable GitHub Action, PR review, SARIF output
-8. **TypeScript Migration** — gradual migration from JS to TypeScript
+2. ~~**Foundation Hardening**~~ — **Complete** (v1.5.0) — parser registry, complete MCP, test coverage, enhanced config, multi-repo MCP
+3. ~~**Analysis Expansion**~~ — **Complete** (v2.7.0) — complexity metrics, community detection, flow tracing, co-change, manifesto, boundary rules, check, triage, audit, batch, hybrid search
+4. ~~**Deep Analysis & Graph Enrichment**~~ — **Complete** (v3.0.0) — dataflow analysis, intraprocedural CFG, AST node storage, expanded node/edge types, interactive viewer, exports command
+5. ~~**Architectural Refactoring**~~ — **Complete** (v3.1.5) — unified AST analysis, composable MCP, domain errors, builder pipeline, graph model, qualified names, presentation layer, CLI composability
+6. **Native Analysis Acceleration** — move JS-only build phases to Rust, sub-100ms 1-file rebuilds
+7. **TypeScript Migration** — project setup, core type definitions, leaf → core → orchestration migration
+8. **Runtime & Extensibility** — event-driven pipeline, plugin system, query caching, pagination
+9. **Intelligent Embeddings** — LLM-generated descriptions, enhanced embeddings, module summaries
+10. **Natural Language Queries** — `codegraph ask` command, conversational sessions
+11. **Expanded Language Support** — 8 new languages (11 → 19)
+12. **GitHub Integration & CI** — reusable GitHub Action, LLM-enhanced PR review, SARIF output
+13. **Visualization & Advanced** — web UI, dead code detection, monorepo, agentic search
## 🤝 Contributing
diff --git a/crates/codegraph-core/src/extractors/go.rs b/crates/codegraph-core/src/extractors/go.rs
index d9f0c0d6..f860dbac 100644
--- a/crates/codegraph-core/src/extractors/go.rs
+++ b/crates/codegraph-core/src/extractors/go.rs
@@ -344,11 +344,15 @@ fn extract_go_type_map_depth(node: &Node, source: &[u8], symbols: &mut FileSymbo
"var_spec" => {
if let Some(type_node) = node.child_by_field_name("type") {
if let Some(type_name) = extract_go_type_name(&type_node, source) {
- if let Some(name_node) = node.child_by_field_name("name") {
- symbols.type_map.push(TypeMapEntry {
- name: node_text(&name_node, source).to_string(),
- type_name: type_name.to_string(),
- });
+ for i in 0..node.child_count() {
+ if let Some(child) = node.child(i) {
+ if child.kind() == "identifier" {
+ symbols.type_map.push(TypeMapEntry {
+ name: node_text(&child, source).to_string(),
+ type_name: type_name.to_string(),
+ });
+ }
+ }
}
}
}
diff --git a/crates/codegraph-core/src/import_resolution.rs b/crates/codegraph-core/src/import_resolution.rs
index f0071502..69480151 100644
--- a/crates/codegraph-core/src/import_resolution.rs
+++ b/crates/codegraph-core/src/import_resolution.rs
@@ -140,6 +140,7 @@ fn resolve_import_path_inner(
".jsx",
".mjs",
".py",
+ ".pyi",
"/index.ts",
"/index.tsx",
"/index.js",
diff --git a/crates/codegraph-core/src/parser_registry.rs b/crates/codegraph-core/src/parser_registry.rs
index f800b275..0dde0bd6 100644
--- a/crates/codegraph-core/src/parser_registry.rs
+++ b/crates/codegraph-core/src/parser_registry.rs
@@ -50,14 +50,14 @@ impl LanguageKind {
}
match ext {
"js" | "jsx" | "mjs" | "cjs" => Some(Self::JavaScript),
- "py" => Some(Self::Python),
+ "py" | "pyi" => Some(Self::Python),
"tf" | "hcl" => Some(Self::Hcl),
"go" => Some(Self::Go),
"rs" => Some(Self::Rust),
"java" => Some(Self::Java),
"cs" => Some(Self::CSharp),
- "rb" => Some(Self::Ruby),
- "php" => Some(Self::Php),
+ "rb" | "rake" | "gemspec" => Some(Self::Ruby),
+ "php" | "phtml" => Some(Self::Php),
_ => None,
}
}
diff --git a/docs/roadmap/BACKLOG.md b/docs/roadmap/BACKLOG.md
index f017a518..23f84128 100644
--- a/docs/roadmap/BACKLOG.md
+++ b/docs/roadmap/BACKLOG.md
@@ -23,14 +23,14 @@ Each item has a short title, description, category, expected benefit, and four a
### Tier 0 — Promote before Phase 4-5 (highest immediate impact)
-These two items directly improve agent experience and graph accuracy today, without requiring Rust porting or TypeScript migration. They should be implemented before any Phase 4+ roadmap work begins.
+Both items are now **DONE**. These directly improved agent experience and graph accuracy without requiring Rust porting or TypeScript migration.
-**Rationale:** Item #83 enriches the *passively-injected* context that agents actually see via hooks — the single highest-leverage surface for reducing blind edits. Item #71 closes the biggest accuracy gap in the graph for TypeScript and Java, where missing type-aware resolution causes hallucinated "no callers" results.
+**Rationale:** Item #83 enriches the *passively-injected* context that agents actually see via hooks — the single highest-leverage surface for reducing blind edits. Item #71 closed the biggest accuracy gap in the graph for TypeScript and Java, where missing type-aware resolution caused hallucinated "no callers" results.
| ID | Title | Description | Category | Benefit | Zero-dep | Foundation-aligned | Problem-fit (1-5) | Breaking | Depends on |
|----|-------|-------------|----------|---------|----------|-------------------|-------------------|----------|------------|
| 83 | ~~Hook-optimized `codegraph brief` command~~ | New `codegraph brief ` command designed for Claude Code hook context injection. Returns a compact, token-efficient summary per file: each symbol with its role and caller count (e.g. `buildGraph [core, 12 callers]`), blast radius count on importers (`Imported by: src/cli.js (+8 transitive)`), and overall file risk tier. Current `deps --json` output used by `enrich-context.sh` is shallow — just file-level imports/importedBy and symbol names with no role or blast radius info. The `brief` command would include: **(a)** symbol roles in the output — knowing a file defines `core` vs `leaf` symbols changes editing caution; **(b)** per-symbol transitive caller counts — makes blast radius visible without a separate `fn-impact` call; **(c)** file-level risk tier (high/medium/low based on max fan-in and role composition). Output optimized for `additionalContext` injection — single compact block, not verbose JSON. Also add `--brief` flag to `deps` as an alias. | Embeddability | The `enrich-context.sh` hook is the only codegraph context agents actually see (they ignore CLAUDE.md instructions to run commands manually). Making that passively-injected context richer — with roles, caller counts, and risk tiers — directly reduces blind edits to high-impact code. Currently the hook shows `Defines: function buildGraph` but not that it's a core symbol with 12 transitive callers | ✓ | ✓ | 4 | No | — | **DONE** — `codegraph brief ` command with symbol roles, caller counts, and risk tiers. CLI command, MCP tool, and presentation layer. ([#480](https://github.com/optave/codegraph/pull/480)) |
-| 71 | Basic type inference for typed languages | Extract type annotations from TypeScript and Java AST nodes (variable declarations, function parameters, return types, generics) to resolve method calls through typed references. Currently `const x: Router = express.Router(); x.get(...)` produces no edge because `x.get` can't be resolved without knowing `x` is a `Router`. Tree-sitter already parses type annotations — we just don't use them for resolution. Start with declared types (no flow inference), which covers the majority of TS/Java code. | Resolution | Dramatically improves call graph completeness for TypeScript and Java — the two languages where developers annotate types explicitly and expect tooling to use them. Directly prevents hallucinated "no callers" results for methods called through typed variables | ✓ | ✓ | 5 | No | — |
+| 71 | ~~Basic type inference for typed languages~~ | Extract type annotations from TypeScript and Java AST nodes (variable declarations, function parameters, return types, generics) to resolve method calls through typed references. Currently `const x: Router = express.Router(); x.get(...)` produces no edge because `x.get` can't be resolved without knowing `x` is a `Router`. Tree-sitter already parses type annotations — we just don't use them for resolution. Start with declared types (no flow inference), which covers the majority of TS/Java code. | Resolution | Dramatically improves call graph completeness for TypeScript and Java — the two languages where developers annotate types explicitly and expect tooling to use them. Directly prevents hallucinated "no callers" results for methods called through typed variables | ✓ | ✓ | 5 | No | — | **DONE** — Type inference for all typed languages (TS, Java, Go, Rust, C#, PHP, Python). WASM + native engines. ([#501](https://github.com/optave/codegraph/pull/501)) |
### Tier 1 — Zero-dep + Foundation-aligned (build these first)
diff --git a/src/domain/graph/builder/stages/build-edges.js b/src/domain/graph/builder/stages/build-edges.js
index 085717fa..47d75320 100644
--- a/src/domain/graph/builder/stages/build-edges.js
+++ b/src/domain/graph/builder/stages/build-edges.js
@@ -128,6 +128,14 @@ function buildCallEdgesNative(ctx, getNodeIdStmt, allEdgeRows, allNodes, native)
for (const e of nativeEdges) {
allEdgeRows.push([e.sourceId, e.targetId, e.kind, e.confidence, e.dynamic]);
}
+
+ // Older native binaries (< 3.2.0) don't emit receiver or type-resolved method-call
+ // edges. Supplement them on the JS side if the native binary missed them.
+ // TODO: Remove once all published native binaries handle receivers (>= 3.2.0)
+ const hasReceiver = nativeEdges.some((e) => e.kind === 'receiver');
+ if (!hasReceiver) {
+ supplementReceiverEdges(ctx, nativeFiles, getNodeIdStmt, allEdgeRows);
+ }
}
function buildImportedNamesForNative(ctx, relPath, symbols, rootDir) {
@@ -147,6 +155,50 @@ function buildImportedNamesForNative(ctx, relPath, symbols, rootDir) {
return importedNames;
}
+// ── Receiver edge supplement for older native binaries ──────────────────
+
+function supplementReceiverEdges(ctx, nativeFiles, getNodeIdStmt, allEdgeRows) {
+ const seenCallEdges = new Set();
+ // Collect existing edges to avoid duplicates
+ for (const row of allEdgeRows) {
+ seenCallEdges.add(`${row[0]}|${row[1]}|${row[2]}`);
+ }
+
+ for (const nf of nativeFiles) {
+ const relPath = nf.file;
+ const typeMap = new Map(nf.typeMap.map((t) => [t.name, t.typeName]));
+ const fileNodeRow = { id: nf.fileNodeId };
+
+ for (const call of nf.calls) {
+ if (!call.receiver || BUILTIN_RECEIVERS.has(call.receiver)) continue;
+ if (call.receiver === 'this' || call.receiver === 'self' || call.receiver === 'super')
+ continue;
+
+ const caller = findCaller(call, nf.definitions, relPath, getNodeIdStmt, fileNodeRow);
+
+ // Receiver edge: caller → receiver type node
+ buildReceiverEdge(ctx, call, caller, relPath, seenCallEdges, allEdgeRows, typeMap);
+
+ // Type-resolved method call: caller → Type.method
+ const typeName = typeMap.get(call.receiver);
+ if (typeName) {
+ const qualifiedName = `${typeName}.${call.name}`;
+ const targets = (ctx.nodesByName.get(qualifiedName) || []).filter(
+ (n) => n.kind === 'method',
+ );
+ for (const t of targets) {
+ const key = `${caller.id}|${t.id}|calls`;
+ if (t.id !== caller.id && !seenCallEdges.has(key)) {
+ seenCallEdges.add(key);
+ const confidence = computeConfidence(relPath, t.file, null);
+ allEdgeRows.push([caller.id, t.id, 'calls', confidence, call.dynamic ? 1 : 0]);
+ }
+ }
+ }
+ }
+ }
+}
+
// ── Call edges (JS fallback) ────────────────────────────────────────────
function buildCallEdgesJS(ctx, getNodeIdStmt, allEdgeRows) {
@@ -244,11 +296,6 @@ function resolveCallTargets(ctx, call, relPath, importedNames, typeMap) {
}
function resolveByMethodOrGlobal(ctx, call, relPath, typeMap) {
- const methodCandidates = (ctx.nodesByName.get(call.name) || []).filter(
- (n) => n.name.endsWith(`.${call.name}`) && n.kind === 'method',
- );
- if (methodCandidates.length > 0) return methodCandidates;
-
// Type-aware resolution: translate variable receiver to its declared type
if (call.receiver && typeMap) {
const typeName = typeMap.get(call.receiver);
diff --git a/src/domain/graph/resolve.js b/src/domain/graph/resolve.js
index 5e0ab1d3..5a82a5c6 100644
--- a/src/domain/graph/resolve.js
+++ b/src/domain/graph/resolve.js
@@ -78,6 +78,7 @@ function resolveImportPathJS(fromFile, importSource, rootDir, aliases) {
'.jsx',
'.mjs',
'.py',
+ '.pyi',
'/index.ts',
'/index.tsx',
'/index.js',
diff --git a/src/domain/parser.js b/src/domain/parser.js
index 476e6184..8ccbcd3b 100644
--- a/src/domain/parser.js
+++ b/src/domain/parser.js
@@ -320,7 +320,7 @@ export const LANGUAGE_REGISTRY = [
},
{
id: 'python',
- extensions: ['.py'],
+ extensions: ['.py', '.pyi'],
grammarFile: 'tree-sitter-python.wasm',
extractor: extractPythonSymbols,
required: false,
@@ -355,14 +355,14 @@ export const LANGUAGE_REGISTRY = [
},
{
id: 'ruby',
- extensions: ['.rb'],
+ extensions: ['.rb', '.rake', '.gemspec'],
grammarFile: 'tree-sitter-ruby.wasm',
extractor: extractRubySymbols,
required: false,
},
{
id: 'php',
- extensions: ['.php'],
+ extensions: ['.php', '.phtml'],
grammarFile: 'tree-sitter-php.wasm',
extractor: extractPHPSymbols,
required: false,
@@ -378,6 +378,31 @@ for (const entry of LANGUAGE_REGISTRY) {
export const SUPPORTED_EXTENSIONS = new Set(_extToLang.keys());
+/**
+ * WASM-based typeMap backfill for older native binaries that don't emit typeMap.
+ * Uses tree-sitter AST extraction instead of regex to avoid false positives from
+ * matches inside comments and string literals.
+ * TODO: Remove once all published native binaries include typeMap extraction (>= 3.2.0)
+ */
+async function backfillTypeMap(filePath, source) {
+ let code = source;
+ if (!code) {
+ try {
+ code = fs.readFileSync(filePath, 'utf-8');
+ } catch {
+ return { typeMap: [], backfilled: false };
+ }
+ }
+ const parsers = await createParsers();
+ const extracted = wasmExtractSymbols(parsers, filePath, code);
+ if (!extracted?.symbols?.typeMap) return { typeMap: [], backfilled: false };
+ const tm = extracted.symbols.typeMap;
+ return {
+ typeMap: tm instanceof Map ? tm : new Map(tm.map((e) => [e.name, e.typeName])),
+ backfilled: true,
+ };
+}
+
/**
* WASM extraction helper: picks the right extractor based on file extension.
*/
@@ -414,7 +439,14 @@ export async function parseFileAuto(filePath, source, opts = {}) {
if (native) {
const result = native.parseFile(filePath, source, !!opts.dataflow, opts.ast !== false);
- return result ? patchNativeResult(result) : null;
+ if (!result) return null;
+ const patched = patchNativeResult(result);
+ if (!patched.typeMap || patched.typeMap.length === 0) {
+ const { typeMap, backfilled } = await backfillTypeMap(filePath, source);
+ patched.typeMap = typeMap;
+ if (backfilled) patched._typeMapBackfilled = true;
+ }
+ return patched;
}
// WASM path
@@ -442,10 +474,35 @@ export async function parseFilesAuto(filePaths, rootDir, opts = {}) {
!!opts.dataflow,
opts.ast !== false,
);
+ const needsTypeMap = [];
for (const r of nativeResults) {
if (!r) continue;
+ const patched = patchNativeResult(r);
const relPath = path.relative(rootDir, r.file).split(path.sep).join('/');
- result.set(relPath, patchNativeResult(r));
+ result.set(relPath, patched);
+ if (!patched.typeMap || patched.typeMap.length === 0) {
+ needsTypeMap.push({ filePath: r.file, relPath });
+ }
+ }
+ // Backfill typeMap via WASM for native binaries that predate the type-map feature
+ if (needsTypeMap.length > 0) {
+ const parsers = await createParsers();
+ for (const { filePath, relPath } of needsTypeMap) {
+ try {
+ const code = fs.readFileSync(filePath, 'utf-8');
+ const extracted = wasmExtractSymbols(parsers, filePath, code);
+ if (extracted?.symbols?.typeMap) {
+ const symbols = result.get(relPath);
+ symbols.typeMap =
+ extracted.symbols.typeMap instanceof Map
+ ? extracted.symbols.typeMap
+ : new Map(extracted.symbols.typeMap.map((e) => [e.name, e.typeName]));
+ symbols._typeMapBackfilled = true;
+ }
+ } catch {
+ /* skip — typeMap is a best-effort backfill */
+ }
+ }
}
return result;
}
@@ -519,7 +576,14 @@ export function createParseTreeCache() {
export async function parseFileIncremental(cache, filePath, source, opts = {}) {
if (cache) {
const result = cache.parseFile(filePath, source);
- return result ? patchNativeResult(result) : null;
+ if (!result) return null;
+ const patched = patchNativeResult(result);
+ if (!patched.typeMap || patched.typeMap.length === 0) {
+ const { typeMap, backfilled } = await backfillTypeMap(filePath, source);
+ patched.typeMap = typeMap;
+ if (backfilled) patched._typeMapBackfilled = true;
+ }
+ return patched;
}
return parseFileAuto(filePath, source, opts);
}
diff --git a/src/extractors/go.js b/src/extractors/go.js
index 33cf44e6..23b5f1b0 100644
--- a/src/extractors/go.js
+++ b/src/extractors/go.js
@@ -211,13 +211,19 @@ function extractGoTypeMap(node, ctx) {
function extractGoTypeMapDepth(node, ctx, depth) {
if (depth >= 200) return;
- // var x MyType = ... → var_declaration > var_spec
+ // var x MyType = ... or var x, y MyType → var_declaration > var_spec
if (node.type === 'var_spec') {
- const nameNode = node.childForFieldName('name');
const typeNode = node.childForFieldName('type');
- if (nameNode && typeNode) {
+ if (typeNode) {
const typeName = extractGoTypeName(typeNode);
- if (typeName) ctx.typeMap.set(nameNode.text, typeName);
+ if (typeName) {
+ for (let i = 0; i < node.childCount; i++) {
+ const child = node.child(i);
+ if (child && child.type === 'identifier') {
+ ctx.typeMap.set(child.text, typeName);
+ }
+ }
+ }
}
}
diff --git a/src/extractors/javascript.js b/src/extractors/javascript.js
index fc52d117..7762959c 100644
--- a/src/extractors/javascript.js
+++ b/src/extractors/javascript.js
@@ -825,7 +825,8 @@ function extractNewExprTypeName(newExprNode) {
}
function extractTypeMapWalk(rootNode, typeMap) {
- function walk(node) {
+ function walk(node, depth) {
+ if (depth >= 200) return;
const t = node.type;
if (t === 'variable_declarator') {
const nameN = node.childForFieldName('name');
@@ -854,10 +855,10 @@ function extractTypeMapWalk(rootNode, typeMap) {
}
}
for (let i = 0; i < node.childCount; i++) {
- walk(node.child(i));
+ walk(node.child(i), depth + 1);
}
}
- walk(rootNode);
+ walk(rootNode, 0);
}
function extractReceiverName(objNode) {
diff --git a/tests/integration/build.test.js b/tests/integration/build.test.js
index 0d0b3d64..a4148642 100644
--- a/tests/integration/build.test.js
+++ b/tests/integration/build.test.js
@@ -497,7 +497,7 @@ describe('typed method call resolution', () => {
'',
].join('\n'),
);
- // Force WASM engine — typeMap resolution is JS-only (native deferred)
+ // Force WASM engine — native binary may not be present in all test environments
await buildGraph(typedDir, { skipRegistry: true, engine: 'wasm' });
typedDbPath = path.join(typedDir, '.codegraph', 'graph.db');
});