diff --git a/docs/roadmap/ROADMAP.md b/docs/roadmap/ROADMAP.md index 88966106..442e1023 100644 --- a/docs/roadmap/ROADMAP.md +++ b/docs/roadmap/ROADMAP.md @@ -1297,17 +1297,22 @@ Structure building is unchanged — at 22ms it's already fast. **Why after Phase 6:** The native analysis acceleration work (Phase 6) establishes the dual-engine pipeline that new language grammars plug into. Adding languages before the engine is complete would mean porting extractors twice. With Phase 6 done, each new language needs only a `LANGUAGE_REGISTRY` entry + extractor function, and both engines support it automatically. -### 7.1 -- Parser Abstraction Layer +### 7.1 -- Parser Abstraction Layer ✅ Extract shared patterns from existing extractors into reusable helpers to reduce per-language boilerplate from ~200 lines to ~80 lines. | Helper | Purpose | |--------|---------| -| `findParentNode(node, typeNames)` | Walk parent chain to find enclosing class/struct | -| `extractBodyMethods(bodyNode, parentName)` | Extract method definitions from a body block | -| `normalizeImportPath(importText)` | Cross-language import path normalization | +| ✅ `findParentNode(node, typeNames, nameField?)` | Walk parent chain to find enclosing class/struct | +| ✅ `extractBodyMembers(node, bodyFields, memberType, kind, nameField?, visibility?)` | Extract child declarations from a body block | +| ✅ `stripQuotes(text)` | Strip leading/trailing quotes from string literals | +| ✅ `lastPathSegment(path, separator?)` | Extract last segment of a delimited import path | -**New file:** `src/parser-utils.js` +**File:** `src/extractors/helpers.ts` (extended existing helper module) + +- `findParentNode` replaces 6 per-language `findParent*` functions (JS, Python, Java, C#, Ruby, Rust) +- `extractBodyMembers` replaces 5 body-iteration patterns (Rust struct/enum, Java enum, C# enum, PHP enum) +- `stripQuotes` + `lastPathSegment` replace inline `.replace(/"/g, '')` and `.split('.').pop()` patterns across 7 extractors ### 7.2 -- Batch 1: High Demand diff --git a/src/extractors/csharp.ts b/src/extractors/csharp.ts index 3a79bb28..16ed0b90 100644 --- a/src/extractors/csharp.ts +++ b/src/extractors/csharp.ts @@ -6,7 +6,15 @@ import type { TreeSitterNode, TreeSitterTree, } from '../types.js'; -import { extractModifierVisibility, findChild, MAX_WALK_DEPTH, nodeEndLine } from './helpers.js'; +import { + extractBodyMembers, + extractModifierVisibility, + findChild, + findParentNode, + lastPathSegment, + MAX_WALK_DEPTH, + nodeEndLine, +} from './helpers.js'; /** * Extract symbols from C# files. @@ -208,7 +216,7 @@ function handleCsUsingDirective(node: TreeSitterNode, ctx: ExtractorOutput): voi findChild(node, 'identifier'); if (!nameNode) return; const fullPath = nameNode.text; - const lastName = fullPath.split('.').pop() ?? fullPath; + const lastName = lastPathSegment(fullPath, '.'); ctx.imports.push({ source: fullPath, names: [lastName], @@ -246,22 +254,15 @@ function handleCsObjectCreation(node: TreeSitterNode, ctx: ExtractorOutput): voi if (typeName) ctx.calls.push({ name: typeName, line: node.startPosition.row + 1 }); } +const CS_PARENT_TYPES = [ + 'class_declaration', + 'struct_declaration', + 'interface_declaration', + 'enum_declaration', + 'record_declaration', +] as const; function findCSharpParentType(node: TreeSitterNode): string | null { - let current = node.parent; - while (current) { - if ( - current.type === 'class_declaration' || - current.type === 'struct_declaration' || - current.type === 'interface_declaration' || - current.type === 'enum_declaration' || - current.type === 'record_declaration' - ) { - const nameNode = current.childForFieldName('name'); - return nameNode ? nameNode.text : null; - } - current = current.parent; - } - return null; + return findParentNode(node, CS_PARENT_TYPES); } // ── Child extraction helpers ──────────────────────────────────────────────── @@ -307,19 +308,12 @@ function extractCSharpClassFields(classNode: TreeSitterNode): SubDeclaration[] { } function extractCSharpEnumMembers(enumNode: TreeSitterNode): SubDeclaration[] { - const constants: SubDeclaration[] = []; - const body = - enumNode.childForFieldName('body') || findChild(enumNode, 'enum_member_declaration_list'); - if (!body) return constants; - for (let i = 0; i < body.childCount; i++) { - const member = body.child(i); - if (!member || member.type !== 'enum_member_declaration') continue; - const nameNode = member.childForFieldName('name'); - if (nameNode) { - constants.push({ name: nameNode.text, kind: 'constant', line: member.startPosition.row + 1 }); - } - } - return constants; + return extractBodyMembers( + enumNode, + ['body', 'enum_member_declaration_list'], + 'enum_member_declaration', + 'constant', + ); } // ── Type map extraction ────────────────────────────────────────────────────── diff --git a/src/extractors/go.ts b/src/extractors/go.ts index 3e832b37..3e857b28 100644 --- a/src/extractors/go.ts +++ b/src/extractors/go.ts @@ -6,7 +6,14 @@ import type { TreeSitterTree, TypeMapEntry, } from '../types.js'; -import { findChild, goVisibility, MAX_WALK_DEPTH, nodeEndLine } from './helpers.js'; +import { + findChild, + goVisibility, + lastPathSegment, + MAX_WALK_DEPTH, + nodeEndLine, + stripQuotes, +} from './helpers.js'; /** * Extract symbols from Go files. @@ -170,9 +177,9 @@ function handleGoImportDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { function extractGoImportSpec(spec: TreeSitterNode, ctx: ExtractorOutput): void { const pathNode = spec.childForFieldName('path'); if (pathNode) { - const importPath = pathNode.text.replace(/"/g, ''); + const importPath = stripQuotes(pathNode.text); const nameNode = spec.childForFieldName('name'); - const alias = nameNode ? nameNode.text : (importPath.split('/').pop() ?? importPath); + const alias = nameNode ? nameNode.text : lastPathSegment(importPath); ctx.imports.push({ source: importPath, names: [alias], diff --git a/src/extractors/hcl.ts b/src/extractors/hcl.ts index a37792f9..cf69687a 100644 --- a/src/extractors/hcl.ts +++ b/src/extractors/hcl.ts @@ -6,7 +6,7 @@ import type { TreeSitterNode, TreeSitterTree, } from '../types.js'; -import { nodeEndLine } from './helpers.js'; +import { nodeEndLine, stripQuotes } from './helpers.js'; /** * Extract symbols from HCL (Terraform) files. @@ -80,18 +80,18 @@ function resolveHclBlockName(blockType: string, strings: TreeSitterNode[]): stri const s0 = strings[0]; const s1 = strings[1]; if (blockType === 'resource' && s0 && s1) { - return `${s0.text.replace(/"/g, '')}.${s1.text.replace(/"/g, '')}`; + return `${stripQuotes(s0.text)}.${stripQuotes(s1.text)}`; } if (blockType === 'data' && s0 && s1) { - return `data.${s0.text.replace(/"/g, '')}.${s1.text.replace(/"/g, '')}`; + return `data.${stripQuotes(s0.text)}.${stripQuotes(s1.text)}`; } if ((blockType === 'variable' || blockType === 'output' || blockType === 'module') && s0) { - return `${blockType}.${s0.text.replace(/"/g, '')}`; + return `${blockType}.${stripQuotes(s0.text)}`; } if (blockType === 'locals') return 'locals'; if (blockType === 'terraform' || blockType === 'provider') { let name = blockType; - if (s0) name += `.${s0.text.replace(/"/g, '')}`; + if (s0) name += `.${stripQuotes(s0.text)}`; return name; } return ''; @@ -126,7 +126,7 @@ function extractHclModuleSource( const key = attr.childForFieldName('key') || attr.child(0); const val = attr.childForFieldName('val') || attr.child(2); if (key && key.text === 'source' && val) { - const src = val.text.replace(/"/g, ''); + const src = stripQuotes(val.text); if (src.startsWith('./') || src.startsWith('../')) { ctx.imports.push({ source: src, names: [], line: attr.startPosition.row + 1 }); } diff --git a/src/extractors/helpers.ts b/src/extractors/helpers.ts index 56b05543..589cb2da 100644 --- a/src/extractors/helpers.ts +++ b/src/extractors/helpers.ts @@ -1,4 +1,4 @@ -import type { TreeSitterNode } from '../types.js'; +import type { SubDeclaration, TreeSitterNode } from '../types.js'; /** * Maximum recursion depth for tree-sitter AST walkers. @@ -70,6 +70,82 @@ export function rustVisibility(node: TreeSitterNode): 'public' | 'private' { return 'private'; } +// ── Parser abstraction helpers ───────────────────────────────────────────── + +/** + * Walk up the parent chain to find an enclosing node whose type is in `typeNames`. + * Returns the text of `nameField` (default `'name'`) on the matching ancestor, or null. + * + * Replaces per-language `findParentClass` / `findParentType` / `findCurrentImpl` helpers. + */ +export function findParentNode( + node: TreeSitterNode, + typeNames: readonly string[], + nameField: string = 'name', +): string | null { + let current = node.parent; + while (current) { + if (typeNames.includes(current.type)) { + const nameNode = current.childForFieldName(nameField); + return nameNode ? nameNode.text : null; + } + current = current.parent; + } + return null; +} + +/** + * Extract child declarations from a container node's body. + * Finds the body via `bodyFields` (tries childForFieldName then findChild for each), + * iterates its children, filters by `memberType`, extracts `nameField`, and returns SubDeclarations. + * + * Replaces per-language extractStructFields / extractEnumVariants / extractEnumConstants helpers + * for the common case where each member has a direct name field. + */ +export function extractBodyMembers( + containerNode: TreeSitterNode, + bodyFields: readonly string[], + memberType: string, + kind: SubDeclaration['kind'], + nameField: string = 'name', + visibility?: (member: TreeSitterNode) => SubDeclaration['visibility'], +): SubDeclaration[] { + const members: SubDeclaration[] = []; + let body: TreeSitterNode | null = null; + for (const field of bodyFields) { + body = containerNode.childForFieldName(field) || findChild(containerNode, field); + if (body) break; + } + if (!body) return members; + for (let i = 0; i < body.childCount; i++) { + const member = body.child(i); + if (!member || member.type !== memberType) continue; + const nn = member.childForFieldName(nameField); + if (nn) { + const entry: SubDeclaration = { name: nn.text, kind, line: member.startPosition.row + 1 }; + if (visibility) entry.visibility = visibility(member); + members.push(entry); + } + } + return members; +} + +/** + * Strip leading/trailing quotes (single, double, or backtick) from a string. + * Strips only the leading/trailing delimiter; interior quotes are untouched. + */ +export function stripQuotes(text: string): string { + return text.replace(/^['"`]|['"`]$/g, ''); +} + +/** + * Extract the last segment of a delimited path. + * e.g. `lastPathSegment('java.util.List', '.')` → `'List'` + */ +export function lastPathSegment(path: string, separator: string = '/'): string { + return path.split(separator).pop() ?? path; +} + export function extractModifierVisibility( node: TreeSitterNode, modifierTypes: Set = DEFAULT_MODIFIER_TYPES, diff --git a/src/extractors/java.ts b/src/extractors/java.ts index 6277ff02..b29d053c 100644 --- a/src/extractors/java.ts +++ b/src/extractors/java.ts @@ -6,7 +6,14 @@ import type { TreeSitterTree, TypeMapEntry, } from '../types.js'; -import { extractModifierVisibility, findChild, nodeEndLine } from './helpers.js'; +import { + extractBodyMembers, + extractModifierVisibility, + findChild, + findParentNode, + lastPathSegment, + nodeEndLine, +} from './helpers.js'; /** * Extract symbols from Java files. @@ -218,7 +225,7 @@ function handleJavaImportDecl(node: TreeSitterNode, ctx: ExtractorOutput): void const child = node.child(i); if (child && (child.type === 'scoped_identifier' || child.type === 'identifier')) { const fullPath = child.text; - const lastName = fullPath.split('.').pop() ?? fullPath; + const lastName = lastPathSegment(fullPath, '.'); ctx.imports.push({ source: fullPath, names: [lastName], @@ -263,20 +270,13 @@ function handleJavaObjectCreation(node: TreeSitterNode, ctx: ExtractorOutput): v if (typeName) ctx.calls.push({ name: typeName, line: node.startPosition.row + 1 }); } +const JAVA_PARENT_TYPES = [ + 'class_declaration', + 'enum_declaration', + 'interface_declaration', +] as const; function findJavaParentClass(node: TreeSitterNode): string | null { - let current = node.parent; - while (current) { - if ( - current.type === 'class_declaration' || - current.type === 'enum_declaration' || - current.type === 'interface_declaration' - ) { - const nameNode = current.childForFieldName('name'); - return nameNode ? nameNode.text : null; - } - current = current.parent; - } - return null; + return findParentNode(node, JAVA_PARENT_TYPES); } // ── Child extraction helpers ──────────────────────────────────────────────── @@ -333,16 +333,5 @@ function extractClassFields(classNode: TreeSitterNode): SubDeclaration[] { } function extractEnumConstants(enumNode: TreeSitterNode): SubDeclaration[] { - const constants: SubDeclaration[] = []; - const body = enumNode.childForFieldName('body') || findChild(enumNode, 'enum_body'); - if (!body) return constants; - for (let i = 0; i < body.childCount; i++) { - const member = body.child(i); - if (!member || member.type !== 'enum_constant') continue; - const nameNode = member.childForFieldName('name'); - if (nameNode) { - constants.push({ name: nameNode.text, kind: 'constant', line: member.startPosition.row + 1 }); - } - } - return constants; + return extractBodyMembers(enumNode, ['body', 'enum_body'], 'enum_constant', 'constant'); } diff --git a/src/extractors/javascript.ts b/src/extractors/javascript.ts index fc32576c..3b083ed7 100644 --- a/src/extractors/javascript.ts +++ b/src/extractors/javascript.ts @@ -12,7 +12,7 @@ import type { TreeSitterTree, TypeMapEntry, } from '../types.js'; -import { findChild, MAX_WALK_DEPTH, nodeEndLine } from './helpers.js'; +import { findChild, findParentNode, MAX_WALK_DEPTH, nodeEndLine } from './helpers.js'; /** Built-in globals that start with uppercase but are not user-defined types. */ const BUILTIN_GLOBALS: Set = new Set([ @@ -1191,17 +1191,9 @@ function extractSuperclass(heritage: TreeSitterNode): string | null { return null; } +const JS_CLASS_TYPES = ['class_declaration', 'class'] as const; function findParentClass(node: TreeSitterNode): string | null { - let current = node.parent; - while (current) { - const t = current.type; - if (t === 'class_declaration' || t === 'class') { - const nameNode = current.childForFieldName('name'); - return nameNode ? nameNode.text : null; - } - current = current.parent; - } - return null; + return findParentNode(node, JS_CLASS_TYPES); } function extractImportNames(node: TreeSitterNode): string[] { diff --git a/src/extractors/php.ts b/src/extractors/php.ts index 653971ee..dc2820fd 100644 --- a/src/extractors/php.ts +++ b/src/extractors/php.ts @@ -5,7 +5,14 @@ import type { TreeSitterNode, TreeSitterTree, } from '../types.js'; -import { extractModifierVisibility, findChild, MAX_WALK_DEPTH, nodeEndLine } from './helpers.js'; +import { + extractBodyMembers, + extractModifierVisibility, + findChild, + lastPathSegment, + MAX_WALK_DEPTH, + nodeEndLine, +} from './helpers.js'; function extractPhpParameters(fnNode: TreeSitterNode): SubDeclaration[] { const params: SubDeclaration[] = []; @@ -65,18 +72,7 @@ function extractPhpClassChildren(classNode: TreeSitterNode): SubDeclaration[] { } function extractPhpEnumCases(enumNode: TreeSitterNode): SubDeclaration[] { - const children: SubDeclaration[] = []; - const body = enumNode.childForFieldName('body') || findChild(enumNode, 'enum_declaration_list'); - if (!body) return children; - for (let i = 0; i < body.childCount; i++) { - const member = body.child(i); - if (!member || member.type !== 'enum_case') continue; - const nameNode = member.childForFieldName('name'); - if (nameNode) { - children.push({ name: nameNode.text, kind: 'constant', line: member.startPosition.row + 1 }); - } - } - return children; + return extractBodyMembers(enumNode, ['body', 'enum_declaration_list'], 'enum_case', 'constant'); } /** @@ -272,7 +268,7 @@ function handlePhpNamespaceUse(node: TreeSitterNode, ctx: ExtractorOutput): void const nameNode = findChild(child, 'qualified_name') || findChild(child, 'name'); if (nameNode) { const fullPath = nameNode.text; - const lastName = fullPath.split('\\').pop() ?? fullPath; + const lastName = lastPathSegment(fullPath, '\\'); const alias = child.childForFieldName('alias'); ctx.imports.push({ source: fullPath, @@ -284,7 +280,7 @@ function handlePhpNamespaceUse(node: TreeSitterNode, ctx: ExtractorOutput): void } if (child && (child.type === 'qualified_name' || child.type === 'name')) { const fullPath = child.text; - const lastName = fullPath.split('\\').pop() ?? fullPath; + const lastName = lastPathSegment(fullPath, '\\'); ctx.imports.push({ source: fullPath, names: [lastName], diff --git a/src/extractors/python.ts b/src/extractors/python.ts index b1d8804a..8f98ca34 100644 --- a/src/extractors/python.ts +++ b/src/extractors/python.ts @@ -6,7 +6,13 @@ import type { TreeSitterTree, TypeMapEntry, } from '../types.js'; -import { findChild, MAX_WALK_DEPTH, nodeEndLine, pythonVisibility } from './helpers.js'; +import { + findChild, + findParentNode, + MAX_WALK_DEPTH, + nodeEndLine, + pythonVisibility, +} from './helpers.js'; /** Built-in globals that start with uppercase but are not user-defined types. */ const BUILTIN_GLOBALS_PY: Set = new Set([ @@ -441,14 +447,7 @@ function extractPythonTypeName(typeNode: TreeSitterNode): string | null { return null; } +const PY_CLASS_TYPES = ['class_definition'] as const; function findPythonParentClass(node: TreeSitterNode): string | null { - let current = node.parent; - while (current) { - if (current.type === 'class_definition') { - const nameNode = current.childForFieldName('name'); - return nameNode ? nameNode.text : null; - } - current = current.parent; - } - return null; + return findParentNode(node, PY_CLASS_TYPES); } diff --git a/src/extractors/ruby.ts b/src/extractors/ruby.ts index 6b7ba20a..2c9bb2d5 100644 --- a/src/extractors/ruby.ts +++ b/src/extractors/ruby.ts @@ -5,7 +5,7 @@ import type { TreeSitterNode, TreeSitterTree, } from '../types.js'; -import { findChild, nodeEndLine } from './helpers.js'; +import { findChild, findParentNode, lastPathSegment, nodeEndLine, stripQuotes } from './helpers.js'; /** * Extract symbols from Ruby files. @@ -176,10 +176,10 @@ function handleRubyRequire(node: TreeSitterNode, ctx: ExtractorOutput): void { for (let i = 0; i < args.childCount; i++) { const arg = args.child(i); if (arg && (arg.type === 'string' || arg.type === 'string_content')) { - const strContent = arg.text.replace(/^['"]|['"]$/g, ''); + const strContent = stripQuotes(arg.text); ctx.imports.push({ source: strContent, - names: [strContent.split('/').pop() ?? strContent], + names: [lastPathSegment(strContent)], line: node.startPosition.row + 1, rubyRequire: true, }); @@ -190,7 +190,7 @@ function handleRubyRequire(node: TreeSitterNode, ctx: ExtractorOutput): void { if (content) { ctx.imports.push({ source: content.text, - names: [content.text.split('/').pop() ?? content.text], + names: [lastPathSegment(content.text)], line: node.startPosition.row + 1, rubyRequire: true, }); @@ -221,16 +221,9 @@ function handleRubyModuleInclusion( } } +const RUBY_PARENT_TYPES = ['class', 'module'] as const; function findRubyParentClass(node: TreeSitterNode): string | null { - let current = node.parent; - while (current) { - if (current.type === 'class' || current.type === 'module') { - const nameNode = current.childForFieldName('name'); - return nameNode ? nameNode.text : null; - } - current = current.parent; - } - return null; + return findParentNode(node, RUBY_PARENT_TYPES); } // ── Child extraction helpers ──────────────────────────────────────────────── diff --git a/src/extractors/rust.ts b/src/extractors/rust.ts index 3f40737e..169ef1e5 100644 --- a/src/extractors/rust.ts +++ b/src/extractors/rust.ts @@ -5,7 +5,14 @@ import type { TreeSitterNode, TreeSitterTree, } from '../types.js'; -import { findChild, MAX_WALK_DEPTH, nodeEndLine, rustVisibility } from './helpers.js'; +import { + extractBodyMembers, + findParentNode, + lastPathSegment, + MAX_WALK_DEPTH, + nodeEndLine, + rustVisibility, +} from './helpers.js'; /** * Extract symbols from Rust files. @@ -206,16 +213,9 @@ function handleRustMacroInvocation(node: TreeSitterNode, ctx: ExtractorOutput): } } +const RUST_IMPL_TYPES = ['impl_item'] as const; function findCurrentImpl(node: TreeSitterNode): string | null { - let current = node.parent; - while (current) { - if (current.type === 'impl_item') { - const typeNode = current.childForFieldName('type'); - return typeNode ? typeNode.text : null; - } - current = current.parent; - } - return null; + return findParentNode(node, RUST_IMPL_TYPES, 'type'); } // ── Child extraction helpers ──────────────────────────────────────────────── @@ -227,8 +227,7 @@ function extractRustParameters(paramListNode: TreeSitterNode | null): SubDeclara const param = paramListNode.child(i); if (!param) continue; if (param.type === 'self_parameter') { - // Skip self parameters — matches native engine behaviour - continue; + // Skip self — matches native engine behaviour } else if (param.type === 'parameter') { const pattern = param.childForFieldName('pattern'); if (pattern) { @@ -240,34 +239,16 @@ function extractRustParameters(paramListNode: TreeSitterNode | null): SubDeclara } function extractStructFields(structNode: TreeSitterNode): SubDeclaration[] { - const fields: SubDeclaration[] = []; - const fieldList = - structNode.childForFieldName('body') || findChild(structNode, 'field_declaration_list'); - if (!fieldList) return fields; - for (let i = 0; i < fieldList.childCount; i++) { - const field = fieldList.child(i); - if (!field || field.type !== 'field_declaration') continue; - const nameNode = field.childForFieldName('name'); - if (nameNode) { - fields.push({ name: nameNode.text, kind: 'property', line: field.startPosition.row + 1 }); - } - } - return fields; + return extractBodyMembers( + structNode, + ['body', 'field_declaration_list'], + 'field_declaration', + 'property', + ); } function extractEnumVariants(enumNode: TreeSitterNode): SubDeclaration[] { - const variants: SubDeclaration[] = []; - const body = enumNode.childForFieldName('body') || findChild(enumNode, 'enum_variant_list'); - if (!body) return variants; - for (let i = 0; i < body.childCount; i++) { - const variant = body.child(i); - if (!variant || variant.type !== 'enum_variant') continue; - const nameNode = variant.childForFieldName('name'); - if (nameNode) { - variants.push({ name: nameNode.text, kind: 'constant', line: variant.startPosition.row + 1 }); - } - } - return variants; + return extractBodyMembers(enumNode, ['body', 'enum_variant_list'], 'enum_variant', 'constant'); } function extractRustTypeMap(node: TreeSitterNode, ctx: ExtractorOutput): void { @@ -375,7 +356,7 @@ function extractRustUsePath(node: TreeSitterNode | null): { source: string; name if (node.type === 'scoped_identifier' || node.type === 'identifier') { const text = node.text; - const lastName = text.split('::').pop() ?? text; + const lastName = lastPathSegment(text, '::'); return [{ source: text, names: [lastName] }]; }