Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions docs/roadmap/ROADMAP.md
Original file line number Diff line number Diff line change
Expand Up @@ -1297,17 +1297,22 @@ Structure building is unchanged — at 22ms it's already fast.

**Why after Phase 6:** The native analysis acceleration work (Phase 6) establishes the dual-engine pipeline that new language grammars plug into. Adding languages before the engine is complete would mean porting extractors twice. With Phase 6 done, each new language needs only a `LANGUAGE_REGISTRY` entry + extractor function, and both engines support it automatically.

### 7.1 -- Parser Abstraction Layer
### 7.1 -- Parser Abstraction Layer

Extract shared patterns from existing extractors into reusable helpers to reduce per-language boilerplate from ~200 lines to ~80 lines.

| Helper | Purpose |
|--------|---------|
| `findParentNode(node, typeNames)` | Walk parent chain to find enclosing class/struct |
| `extractBodyMethods(bodyNode, parentName)` | Extract method definitions from a body block |
| `normalizeImportPath(importText)` | Cross-language import path normalization |
| ✅ `findParentNode(node, typeNames, nameField?)` | Walk parent chain to find enclosing class/struct |
| ✅ `extractBodyMembers(node, bodyFields, memberType, kind, nameField?, visibility?)` | Extract child declarations from a body block |
| ✅ `stripQuotes(text)` | Strip leading/trailing quotes from string literals |
| ✅ `lastPathSegment(path, separator?)` | Extract last segment of a delimited import path |

**New file:** `src/parser-utils.js`
**File:** `src/extractors/helpers.ts` (extended existing helper module)

- `findParentNode` replaces 6 per-language `findParent*` functions (JS, Python, Java, C#, Ruby, Rust)
- `extractBodyMembers` replaces 5 body-iteration patterns (Rust struct/enum, Java enum, C# enum, PHP enum)
- `stripQuotes` + `lastPathSegment` replace inline `.replace(/"/g, '')` and `.split('.').pop()` patterns across 7 extractors

### 7.2 -- Batch 1: High Demand

Expand Down
54 changes: 24 additions & 30 deletions src/extractors/csharp.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,15 @@ import type {
TreeSitterNode,
TreeSitterTree,
} from '../types.js';
import { extractModifierVisibility, findChild, MAX_WALK_DEPTH, nodeEndLine } from './helpers.js';
import {
extractBodyMembers,
extractModifierVisibility,
findChild,
findParentNode,
lastPathSegment,
MAX_WALK_DEPTH,
nodeEndLine,
} from './helpers.js';

/**
* Extract symbols from C# files.
Expand Down Expand Up @@ -208,7 +216,7 @@ function handleCsUsingDirective(node: TreeSitterNode, ctx: ExtractorOutput): voi
findChild(node, 'identifier');
if (!nameNode) return;
const fullPath = nameNode.text;
const lastName = fullPath.split('.').pop() ?? fullPath;
const lastName = lastPathSegment(fullPath, '.');
ctx.imports.push({
source: fullPath,
names: [lastName],
Expand Down Expand Up @@ -246,22 +254,15 @@ function handleCsObjectCreation(node: TreeSitterNode, ctx: ExtractorOutput): voi
if (typeName) ctx.calls.push({ name: typeName, line: node.startPosition.row + 1 });
}

const CS_PARENT_TYPES = [
'class_declaration',
'struct_declaration',
'interface_declaration',
'enum_declaration',
'record_declaration',
] as const;
function findCSharpParentType(node: TreeSitterNode): string | null {
let current = node.parent;
while (current) {
if (
current.type === 'class_declaration' ||
current.type === 'struct_declaration' ||
current.type === 'interface_declaration' ||
current.type === 'enum_declaration' ||
current.type === 'record_declaration'
) {
const nameNode = current.childForFieldName('name');
return nameNode ? nameNode.text : null;
}
current = current.parent;
}
return null;
return findParentNode(node, CS_PARENT_TYPES);
}

// ── Child extraction helpers ────────────────────────────────────────────────
Expand Down Expand Up @@ -307,19 +308,12 @@ function extractCSharpClassFields(classNode: TreeSitterNode): SubDeclaration[] {
}

function extractCSharpEnumMembers(enumNode: TreeSitterNode): SubDeclaration[] {
const constants: SubDeclaration[] = [];
const body =
enumNode.childForFieldName('body') || findChild(enumNode, 'enum_member_declaration_list');
if (!body) return constants;
for (let i = 0; i < body.childCount; i++) {
const member = body.child(i);
if (!member || member.type !== 'enum_member_declaration') continue;
const nameNode = member.childForFieldName('name');
if (nameNode) {
constants.push({ name: nameNode.text, kind: 'constant', line: member.startPosition.row + 1 });
}
}
return constants;
return extractBodyMembers(
enumNode,
['body', 'enum_member_declaration_list'],
'enum_member_declaration',
'constant',
);
}

// ── Type map extraction ──────────────────────────────────────────────────────
Expand Down
13 changes: 10 additions & 3 deletions src/extractors/go.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,14 @@ import type {
TreeSitterTree,
TypeMapEntry,
} from '../types.js';
import { findChild, goVisibility, MAX_WALK_DEPTH, nodeEndLine } from './helpers.js';
import {
findChild,
goVisibility,
lastPathSegment,
MAX_WALK_DEPTH,
nodeEndLine,
stripQuotes,
} from './helpers.js';

/**
* Extract symbols from Go files.
Expand Down Expand Up @@ -170,9 +177,9 @@ function handleGoImportDecl(node: TreeSitterNode, ctx: ExtractorOutput): void {
function extractGoImportSpec(spec: TreeSitterNode, ctx: ExtractorOutput): void {
const pathNode = spec.childForFieldName('path');
if (pathNode) {
const importPath = pathNode.text.replace(/"/g, '');
const importPath = stripQuotes(pathNode.text);
const nameNode = spec.childForFieldName('name');
const alias = nameNode ? nameNode.text : (importPath.split('/').pop() ?? importPath);
const alias = nameNode ? nameNode.text : lastPathSegment(importPath);
ctx.imports.push({
source: importPath,
names: [alias],
Expand Down
12 changes: 6 additions & 6 deletions src/extractors/hcl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import type {
TreeSitterNode,
TreeSitterTree,
} from '../types.js';
import { nodeEndLine } from './helpers.js';
import { nodeEndLine, stripQuotes } from './helpers.js';

/**
* Extract symbols from HCL (Terraform) files.
Expand Down Expand Up @@ -80,18 +80,18 @@ function resolveHclBlockName(blockType: string, strings: TreeSitterNode[]): stri
const s0 = strings[0];
const s1 = strings[1];
if (blockType === 'resource' && s0 && s1) {
return `${s0.text.replace(/"/g, '')}.${s1.text.replace(/"/g, '')}`;
return `${stripQuotes(s0.text)}.${stripQuotes(s1.text)}`;
}
if (blockType === 'data' && s0 && s1) {
return `data.${s0.text.replace(/"/g, '')}.${s1.text.replace(/"/g, '')}`;
return `data.${stripQuotes(s0.text)}.${stripQuotes(s1.text)}`;
}
if ((blockType === 'variable' || blockType === 'output' || blockType === 'module') && s0) {
return `${blockType}.${s0.text.replace(/"/g, '')}`;
return `${blockType}.${stripQuotes(s0.text)}`;
}
if (blockType === 'locals') return 'locals';
if (blockType === 'terraform' || blockType === 'provider') {
let name = blockType;
if (s0) name += `.${s0.text.replace(/"/g, '')}`;
if (s0) name += `.${stripQuotes(s0.text)}`;
return name;
}
return '';
Expand Down Expand Up @@ -126,7 +126,7 @@ function extractHclModuleSource(
const key = attr.childForFieldName('key') || attr.child(0);
const val = attr.childForFieldName('val') || attr.child(2);
if (key && key.text === 'source' && val) {
const src = val.text.replace(/"/g, '');
const src = stripQuotes(val.text);
if (src.startsWith('./') || src.startsWith('../')) {
ctx.imports.push({ source: src, names: [], line: attr.startPosition.row + 1 });
}
Expand Down
78 changes: 77 additions & 1 deletion src/extractors/helpers.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import type { TreeSitterNode } from '../types.js';
import type { SubDeclaration, TreeSitterNode } from '../types.js';

/**
* Maximum recursion depth for tree-sitter AST walkers.
Expand Down Expand Up @@ -70,6 +70,82 @@ export function rustVisibility(node: TreeSitterNode): 'public' | 'private' {
return 'private';
}

// ── Parser abstraction helpers ─────────────────────────────────────────────

/**
* Walk up the parent chain to find an enclosing node whose type is in `typeNames`.
* Returns the text of `nameField` (default `'name'`) on the matching ancestor, or null.
*
* Replaces per-language `findParentClass` / `findParentType` / `findCurrentImpl` helpers.
*/
export function findParentNode(
node: TreeSitterNode,
typeNames: readonly string[],
nameField: string = 'name',
): string | null {
let current = node.parent;
while (current) {
if (typeNames.includes(current.type)) {
const nameNode = current.childForFieldName(nameField);
return nameNode ? nameNode.text : null;
}
current = current.parent;
}
return null;
}

/**
* Extract child declarations from a container node's body.
* Finds the body via `bodyFields` (tries childForFieldName then findChild for each),
* iterates its children, filters by `memberType`, extracts `nameField`, and returns SubDeclarations.
*
* Replaces per-language extractStructFields / extractEnumVariants / extractEnumConstants helpers
* for the common case where each member has a direct name field.
*/
export function extractBodyMembers(
containerNode: TreeSitterNode,
bodyFields: readonly string[],
memberType: string,
kind: SubDeclaration['kind'],
nameField: string = 'name',
visibility?: (member: TreeSitterNode) => SubDeclaration['visibility'],
): SubDeclaration[] {
const members: SubDeclaration[] = [];
let body: TreeSitterNode | null = null;
for (const field of bodyFields) {
body = containerNode.childForFieldName(field) || findChild(containerNode, field);
if (body) break;
}
if (!body) return members;
for (let i = 0; i < body.childCount; i++) {
const member = body.child(i);
if (!member || member.type !== memberType) continue;
const nn = member.childForFieldName(nameField);
if (nn) {
const entry: SubDeclaration = { name: nn.text, kind, line: member.startPosition.row + 1 };
if (visibility) entry.visibility = visibility(member);
members.push(entry);
}
}
return members;
}

/**
* Strip leading/trailing quotes (single, double, or backtick) from a string.
* Strips only the leading/trailing delimiter; interior quotes are untouched.
*/
export function stripQuotes(text: string): string {
return text.replace(/^['"`]|['"`]$/g, '');
}
Comment on lines +137 to +139
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 stripQuotes semantics differ subtly from replaced HCL regex

The HCL extractor previously used .replace(/"/g, '') — a global replace that removes every double-quote character in the text. stripQuotes uses anchored replacements (^ / $) that strip only the leading and trailing character. For well-formed Tree-sitter string literal nodes (always delimited with a single pair of outer quotes) the result is identical, so this is not a bug in practice.

Worth noting as a potential surprise if someone later passes a node whose .text value contains interior quote characters. A short inline comment on the "anchored, not global" choice would clarify the intentional scope:

Suggested change
export function stripQuotes(text: string): string {
return text.replace(/^['"`]|['"`]$/g, '');
}
export function stripQuotes(text: string): string {
// Strips only the leading/trailing delimiter; interior quotes are untouched.
return text.replace(/^['"`]|['"`]$/g, '');
}

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed — added clarifying JSDoc line: Strips only the leading/trailing delimiter; interior quotes are untouched.


/**
* Extract the last segment of a delimited path.
* e.g. `lastPathSegment('java.util.List', '.')` → `'List'`
*/
export function lastPathSegment(path: string, separator: string = '/'): string {
return path.split(separator).pop() ?? path;
}

export function extractModifierVisibility(
node: TreeSitterNode,
modifierTypes: Set<string> = DEFAULT_MODIFIER_TYPES,
Expand Down
43 changes: 16 additions & 27 deletions src/extractors/java.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,14 @@ import type {
TreeSitterTree,
TypeMapEntry,
} from '../types.js';
import { extractModifierVisibility, findChild, nodeEndLine } from './helpers.js';
import {
extractBodyMembers,
extractModifierVisibility,
findChild,
findParentNode,
lastPathSegment,
nodeEndLine,
} from './helpers.js';

/**
* Extract symbols from Java files.
Expand Down Expand Up @@ -218,7 +225,7 @@ function handleJavaImportDecl(node: TreeSitterNode, ctx: ExtractorOutput): void
const child = node.child(i);
if (child && (child.type === 'scoped_identifier' || child.type === 'identifier')) {
const fullPath = child.text;
const lastName = fullPath.split('.').pop() ?? fullPath;
const lastName = lastPathSegment(fullPath, '.');
ctx.imports.push({
source: fullPath,
names: [lastName],
Expand Down Expand Up @@ -263,20 +270,13 @@ function handleJavaObjectCreation(node: TreeSitterNode, ctx: ExtractorOutput): v
if (typeName) ctx.calls.push({ name: typeName, line: node.startPosition.row + 1 });
}

const JAVA_PARENT_TYPES = [
'class_declaration',
'enum_declaration',
'interface_declaration',
] as const;
function findJavaParentClass(node: TreeSitterNode): string | null {
let current = node.parent;
while (current) {
if (
current.type === 'class_declaration' ||
current.type === 'enum_declaration' ||
current.type === 'interface_declaration'
) {
const nameNode = current.childForFieldName('name');
return nameNode ? nameNode.text : null;
}
current = current.parent;
}
return null;
return findParentNode(node, JAVA_PARENT_TYPES);
}

// ── Child extraction helpers ────────────────────────────────────────────────
Expand Down Expand Up @@ -333,16 +333,5 @@ function extractClassFields(classNode: TreeSitterNode): SubDeclaration[] {
}

function extractEnumConstants(enumNode: TreeSitterNode): SubDeclaration[] {
const constants: SubDeclaration[] = [];
const body = enumNode.childForFieldName('body') || findChild(enumNode, 'enum_body');
if (!body) return constants;
for (let i = 0; i < body.childCount; i++) {
const member = body.child(i);
if (!member || member.type !== 'enum_constant') continue;
const nameNode = member.childForFieldName('name');
if (nameNode) {
constants.push({ name: nameNode.text, kind: 'constant', line: member.startPosition.row + 1 });
}
}
return constants;
return extractBodyMembers(enumNode, ['body', 'enum_body'], 'enum_constant', 'constant');
}
14 changes: 3 additions & 11 deletions src/extractors/javascript.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import type {
TreeSitterTree,
TypeMapEntry,
} from '../types.js';
import { findChild, MAX_WALK_DEPTH, nodeEndLine } from './helpers.js';
import { findChild, findParentNode, MAX_WALK_DEPTH, nodeEndLine } from './helpers.js';

/** Built-in globals that start with uppercase but are not user-defined types. */
const BUILTIN_GLOBALS: Set<string> = new Set([
Expand Down Expand Up @@ -1191,17 +1191,9 @@ function extractSuperclass(heritage: TreeSitterNode): string | null {
return null;
}

const JS_CLASS_TYPES = ['class_declaration', 'class'] as const;
function findParentClass(node: TreeSitterNode): string | null {
let current = node.parent;
while (current) {
const t = current.type;
if (t === 'class_declaration' || t === 'class') {
const nameNode = current.childForFieldName('name');
return nameNode ? nameNode.text : null;
}
current = current.parent;
}
return null;
return findParentNode(node, JS_CLASS_TYPES);
}

function extractImportNames(node: TreeSitterNode): string[] {
Expand Down
Loading
Loading