diff --git a/crates/codegraph-core/src/cfg.rs b/crates/codegraph-core/src/cfg.rs index 08df6956..c60a8876 100644 --- a/crates/codegraph-core/src/cfg.rs +++ b/crates/codegraph-core/src/cfg.rs @@ -452,168 +452,209 @@ impl<'a> CfgBuilder<'a> { cur } - /// Process a single statement. + /// Process a single statement — thin dispatcher delegating to focused handlers. fn process_statement(&mut self, stmt: &Node, current: u32) -> Option { let kind = stmt.kind(); // Unwrap expression_statement (Rust uses expressions for control flow) - if kind == "expression_statement" && stmt.named_child_count() == 1 { - if let Some(inner) = stmt.named_child(0) { - let t = inner.kind(); - if matches_opt(t, self.rules.if_node) - || matches_slice(t, self.rules.if_nodes) - || matches_slice(t, self.rules.for_nodes) - || matches_opt(t, self.rules.while_node) - || matches_slice(t, self.rules.while_nodes) - || matches_opt(t, self.rules.do_node) - || matches_opt(t, self.rules.infinite_loop_node) - || matches_opt(t, self.rules.switch_node) - || matches_slice(t, self.rules.switch_nodes) - || matches_opt(t, self.rules.return_node) - || matches_opt(t, self.rules.throw_node) - || matches_opt(t, self.rules.break_node) - || matches_opt(t, self.rules.continue_node) - || matches_opt(t, self.rules.unless_node) - || matches_opt(t, self.rules.until_node) - { - return self.process_statement(&inner, current); - } - } + if let Some(result) = self.try_unwrap_expr_stmt(stmt, kind, current) { + return result; } // Labeled statement - if matches_opt(kind, self.rules.labeled_node) { - let label_node = stmt.child_by_field_name("label"); - let body = stmt.child_by_field_name("body"); - if let (Some(label_node), Some(body)) = (label_node, body) { - let label_name = label_node.utf8_text(self.source).unwrap_or("").to_string(); - // We can't know the loop blocks yet — push a placeholder - self.label_map.push((label_name.clone(), LabelCtx { header_idx: None, exit_idx: None })); - let result = self.process_statement(&body, current); - self.label_map.retain(|(n, _)| n != &label_name); - return result; - } - return Some(current); + if let Some(result) = self.try_process_labeled(stmt, kind, current) { + return result; } - // If statement - if matches_opt(kind, self.rules.if_node) || matches_slice(kind, self.rules.if_nodes) { - return self.process_if(stmt, current); + // Compound control flow + if let Some(result) = self.try_process_control_flow(stmt, kind, current) { + return result; } - // Unless (Ruby) - if matches_opt(kind, self.rules.unless_node) { - return self.process_if(stmt, current); + // Terminal statements (return, throw, break, continue) + if let Some(result) = self.try_process_terminal(stmt, kind, current) { + return result; } - // For loops - if matches_slice(kind, self.rules.for_nodes) { - return self.process_for_loop(stmt, current); + // Regular statement — extend current block + self.set_start_line_if_empty(current, node_line(stmt)); + self.set_end_line(current, node_end_line(stmt)); + Some(current) + } + + /// Unwrap expression_statement wrappers (Rust uses expressions for control flow). + /// Returns `Some(result)` if unwrapped and processed, `None` if not applicable. + fn try_unwrap_expr_stmt(&mut self, stmt: &Node, kind: &str, current: u32) -> Option> { + if kind != "expression_statement" || stmt.named_child_count() != 1 { + return None; } + let inner = stmt.named_child(0)?; + let t = inner.kind(); + let is_control = matches_opt(t, self.rules.if_node) + || matches_slice(t, self.rules.if_nodes) + || matches_slice(t, self.rules.for_nodes) + || matches_opt(t, self.rules.while_node) + || matches_slice(t, self.rules.while_nodes) + || matches_opt(t, self.rules.do_node) + || matches_opt(t, self.rules.infinite_loop_node) + || matches_opt(t, self.rules.switch_node) + || matches_slice(t, self.rules.switch_nodes) + || matches_opt(t, self.rules.return_node) + || matches_opt(t, self.rules.throw_node) + || matches_opt(t, self.rules.break_node) + || matches_opt(t, self.rules.continue_node) + || matches_opt(t, self.rules.unless_node) + || matches_opt(t, self.rules.until_node); + if is_control { + Some(self.process_statement(&inner, current)) + } else { + None + } + } - // While loop - if matches_opt(kind, self.rules.while_node) || matches_slice(kind, self.rules.while_nodes) { - return self.process_while_loop(stmt, current); + /// Process labeled statements. Returns `Some(result)` if this was a labeled + /// statement, `None` otherwise. + fn try_process_labeled(&mut self, stmt: &Node, kind: &str, current: u32) -> Option> { + if !matches_opt(kind, self.rules.labeled_node) { + return None; + } + let label_node = stmt.child_by_field_name("label"); + let body = stmt.child_by_field_name("body"); + if let (Some(label_node), Some(body)) = (label_node, body) { + let label_name = label_node.utf8_text(self.source).unwrap_or("").to_string(); + self.label_map.push((label_name.clone(), LabelCtx { header_idx: None, exit_idx: None })); + let result = self.process_statement(&body, current); + self.label_map.retain(|(n, _)| n != &label_name); + Some(result) + } else { + Some(Some(current)) + } + } + + /// Dispatch compound control flow (if, for, while, switch, try, etc.). + /// Returns `Some(result)` if handled, `None` if not a control flow node. + fn try_process_control_flow(&mut self, stmt: &Node, kind: &str, current: u32) -> Option> { + // If / unless + if matches_opt(kind, self.rules.if_node) || matches_slice(kind, self.rules.if_nodes) + || matches_opt(kind, self.rules.unless_node) + { + return Some(self.process_if(stmt, current)); + } + + // For loops + if matches_slice(kind, self.rules.for_nodes) { + return Some(self.process_for_loop(stmt, current)); } - // Until (Ruby) - if matches_opt(kind, self.rules.until_node) { - return self.process_while_loop(stmt, current); + // While / until + if matches_opt(kind, self.rules.while_node) || matches_slice(kind, self.rules.while_nodes) + || matches_opt(kind, self.rules.until_node) + { + return Some(self.process_while_loop(stmt, current)); } // Do-while if matches_opt(kind, self.rules.do_node) { - return self.process_do_while_loop(stmt, current); + return Some(self.process_do_while_loop(stmt, current)); } // Infinite loop (Rust loop {}) if matches_opt(kind, self.rules.infinite_loop_node) { - return self.process_infinite_loop(stmt, current); + return Some(self.process_infinite_loop(stmt, current)); } // Switch/match if matches_opt(kind, self.rules.switch_node) || matches_slice(kind, self.rules.switch_nodes) { - return self.process_switch(stmt, current); + return Some(self.process_switch(stmt, current)); } // Try/catch/finally if matches_opt(kind, self.rules.try_node) { - return self.process_try_catch(stmt, current); + return Some(self.process_try_catch(stmt, current)); } // Additional try nodes (e.g. Ruby body_statement with rescue) if matches_slice(kind, self.rules.try_nodes) { - // Only treat as try if it actually contains a catch/rescue child let cursor = &mut stmt.walk(); let has_rescue = stmt.named_children(cursor) .any(|c| matches_opt(c.kind(), self.rules.catch_node)); if has_rescue { - return self.process_try_catch(stmt, current); + return Some(self.process_try_catch(stmt, current)); } } - // Return + None + } + + /// Handle terminal statements: return, throw, break, continue. + /// Returns `Some(result)` if handled, `None` if not a terminal node. + fn try_process_terminal(&mut self, stmt: &Node, kind: &str, current: u32) -> Option> { if matches_opt(kind, self.rules.return_node) { self.set_end_line(current, node_line(stmt)); self.add_edge(current, self.exit_idx, "return"); - return None; + return Some(None); } - // Throw if matches_opt(kind, self.rules.throw_node) { self.set_end_line(current, node_line(stmt)); self.add_edge(current, self.exit_idx, "exception"); - return None; + return Some(None); } - // Break if matches_opt(kind, self.rules.break_node) { - let label_name = stmt.child_by_field_name("label") - .map(|n| n.utf8_text(self.source).unwrap_or("").to_string()); - - let target = if let Some(ref name) = label_name { - self.label_map.iter().rev() - .find(|(n, _)| n == name) - .and_then(|(_, ctx)| ctx.exit_idx) - } else { - self.loop_stack.last().map(|ctx| ctx.exit_idx) - }; - - if let Some(target) = target { - self.set_end_line(current, node_line(stmt)); - self.add_edge(current, target, "break"); - return None; - } - return Some(current); + return Some(self.process_break(stmt, current)); } - // Continue if matches_opt(kind, self.rules.continue_node) { - let label_name = stmt.child_by_field_name("label") - .map(|n| n.utf8_text(self.source).unwrap_or("").to_string()); + return Some(self.process_continue(stmt, current)); + } - let target = if let Some(ref name) = label_name { - self.label_map.iter().rev() - .find(|(n, _)| n == name) - .and_then(|(_, ctx)| ctx.header_idx) - } else { - // Walk back to find the nearest actual loop (skip switch entries) - self.loop_stack.iter().rev() - .find(|ctx| ctx.is_loop) - .map(|ctx| ctx.header_idx) - }; + None + } - if let Some(target) = target { - self.set_end_line(current, node_line(stmt)); - self.add_edge(current, target, "continue"); - return None; - } - return Some(current); + /// Process a break statement: resolve label or loop target. + fn process_break(&mut self, stmt: &Node, current: u32) -> Option { + let label_name = stmt.child_by_field_name("label") + .map(|n| n.utf8_text(self.source).unwrap_or("").to_string()); + + let target = if let Some(ref name) = label_name { + self.label_map.iter().rev() + .find(|(n, _)| n == name) + .and_then(|(_, ctx)| ctx.exit_idx) + } else { + self.loop_stack.last().map(|ctx| ctx.exit_idx) + }; + + if let Some(target) = target { + self.set_end_line(current, node_line(stmt)); + self.add_edge(current, target, "break"); + None + } else { + Some(current) } + } - // Regular statement — extend current block - self.set_start_line_if_empty(current, node_line(stmt)); - self.set_end_line(current, node_end_line(stmt)); - Some(current) + /// Process a continue statement: resolve label or nearest loop header. + fn process_continue(&mut self, stmt: &Node, current: u32) -> Option { + let label_name = stmt.child_by_field_name("label") + .map(|n| n.utf8_text(self.source).unwrap_or("").to_string()); + + let target = if let Some(ref name) = label_name { + self.label_map.iter().rev() + .find(|(n, _)| n == name) + .and_then(|(_, ctx)| ctx.header_idx) + } else { + self.loop_stack.iter().rev() + .find(|ctx| ctx.is_loop) + .map(|ctx| ctx.header_idx) + }; + + if let Some(target) = target { + self.set_end_line(current, node_line(stmt)); + self.add_edge(current, target, "continue"); + None + } else { + Some(current) + } } /// Process if/else-if/else chain (handles patterns A, B, C). diff --git a/crates/codegraph-core/src/complexity.rs b/crates/codegraph-core/src/complexity.rs index 9b8f4f49..ce81e2b7 100644 --- a/crates/codegraph-core/src/complexity.rs +++ b/crates/codegraph-core/src/complexity.rs @@ -409,6 +409,142 @@ fn walk_children( } } +// ─── Shared complexity classification helpers ──────────────────────────── + +/// Detect whether this node is an else-if via Pattern A (JS/C#/Rust: if inside +/// else_clause), Pattern B (Python/Ruby/PHP: explicit elif node), or Pattern C +/// (Go/Java: if_statement as `alternative` of parent if). +/// +/// Returns a `BranchAction` telling the caller what cognitive/cyclomatic +/// adjustments to make and what nesting delta to apply to children. +enum BranchAction { + /// Node handled — walk children at the given nesting delta, then return. + Handled { cognitive_delta: u32, cyclomatic_delta: u32, nesting_delta: u32 }, + /// Not a special branch pattern — fall through to normal processing. + NotHandled, +} + +/// Classify a branch node (one where `rules.is_branch(kind)` is true). +fn classify_branch(node: &Node, kind: &str, rules: &LangRules, nesting_level: u32) -> BranchAction { + // Pattern A: else clause wraps if (JS/C#/Rust) + if let Some(else_type) = rules.else_node_type { + if kind == else_type { + let is_else_if = node.named_child(0).map_or(false, |c| { + rules.if_node_type.map_or(false, |if_t| c.kind() == if_t) + }); + if is_else_if { + // else-if: the if_statement child handles its own increment + return BranchAction::Handled { cognitive_delta: 0, cyclomatic_delta: 0, nesting_delta: 0 }; + } + // Plain else + return BranchAction::Handled { cognitive_delta: 1, cyclomatic_delta: 0, nesting_delta: 0 }; + } + } + + // Pattern B: explicit elif node (Python/Ruby/PHP) + if let Some(elif_type) = rules.elif_node_type { + if kind == elif_type { + return BranchAction::Handled { cognitive_delta: 1, cyclomatic_delta: 1, nesting_delta: 0 }; + } + } + + // Detect else-if via Pattern A or C + if detect_else_if(node, kind, rules) { + return BranchAction::Handled { cognitive_delta: 1, cyclomatic_delta: 1, nesting_delta: 0 }; + } + + // Regular branch node + let mut cyc = 1u32; + if rules.is_switch_like(kind) { + cyc = 0; // Cases handle cyclomatic, not the switch itself + } + let nest = if rules.is_nesting(kind) { 1u32 } else { 0u32 }; + BranchAction::Handled { + cognitive_delta: 1 + nesting_level, + cyclomatic_delta: cyc, + nesting_delta: nest, + } +} + +/// Detect whether an if-node is actually an else-if (Pattern A or C). +fn detect_else_if(node: &Node, kind: &str, rules: &LangRules) -> bool { + if !rules.if_node_type.map_or(false, |if_t| kind == if_t) { + return false; + } + if rules.else_via_alternative { + // Pattern C (Go/Java): if_statement is the alternative of parent if_statement + if let Some(parent) = node.parent() { + if rules.if_node_type.map_or(false, |if_t| parent.kind() == if_t) { + if let Some(alt) = parent.child_by_field_name("alternative") { + if alt.id() == node.id() { + return true; + } + } + } + } + } else if rules.else_node_type.is_some() { + // Pattern A (JS/C#/Rust): if_statement inside else_clause + if let Some(parent) = node.parent() { + if rules.else_node_type.map_or(false, |else_t| parent.kind() == else_t) { + return true; + } + } + } + false +} + +/// Detect Pattern C plain else: a non-if block that is the `alternative` of an +/// if_statement (Go/Java). +fn is_pattern_c_else(node: &Node, kind: &str, rules: &LangRules) -> bool { + if !rules.else_via_alternative { + return false; + } + if rules.if_node_type.map_or(false, |if_t| kind == if_t) { + return false; // This is an if, not a plain else block + } + if let Some(parent) = node.parent() { + if rules.if_node_type.map_or(false, |if_t| parent.kind() == if_t) { + if let Some(alt) = parent.child_by_field_name("alternative") { + return alt.id() == node.id(); + } + } + } + false +} + +/// Handle logical operator nodes: returns true if the node was a logical op +/// (caller should walk children and return). +fn handle_logical_op( + node: &Node, + kind: &str, + rules: &LangRules, + cognitive: &mut u32, + cyclomatic: &mut u32, +) -> bool { + if kind != rules.logical_node_type { + return false; + } + let Some(op_node) = node.child(1) else { return false }; + let op = op_node.kind(); + if !rules.is_logical_op(op) { + return false; + } + + *cyclomatic += 1; + + // Cognitive: +1 only when operator changes from the previous sibling sequence + let same_sequence = node.parent().map_or(false, |parent| { + parent.kind() == rules.logical_node_type + && parent.child(1).map_or(false, |pop| pop.kind() == op) + }); + if !same_sequence { + *cognitive += 1; + } + true +} + +// ─── walk (complexity-only DFS) ───────────────────────────────────────── + fn walk( node: &Node, nesting_level: u32, @@ -424,244 +560,54 @@ fn walk( } let kind = node.kind(); - // Track nesting depth if nesting_level > *max_nesting { *max_nesting = nesting_level; } - // Handle logical operators in binary expressions - if kind == rules.logical_node_type { - if let Some(op_node) = node.child(1) { - let op = op_node.kind(); - if rules.is_logical_op(op) { - // Cyclomatic: +1 for every logical operator - *cyclomatic += 1; - - // Cognitive: +1 only when operator changes from the previous sibling sequence - let mut same_sequence = false; - if let Some(parent) = node.parent() { - if parent.kind() == rules.logical_node_type { - if let Some(parent_op) = parent.child(1) { - if parent_op.kind() == op { - same_sequence = true; - } - } - } - } - if !same_sequence { - *cognitive += 1; - } - - // Walk children manually to avoid double-counting - walk_children( - node, - nesting_level, - false, - rules, - cognitive, - cyclomatic, - max_nesting, - depth, - ); - return; - } - } + // Logical operators + if handle_logical_op(node, kind, rules, cognitive, cyclomatic) { + walk_children(node, nesting_level, false, rules, cognitive, cyclomatic, max_nesting, depth); + return; } - // Handle optional chaining (cyclomatic only) + // Optional chaining (cyclomatic only) if let Some(opt_type) = rules.optional_chain_type { if kind == opt_type { *cyclomatic += 1; } } - // Handle branch/control flow nodes (skip keyword leaf tokens — childCount > 0 guard) + // Branch/control flow nodes (skip keyword leaf tokens) if rules.is_branch(kind) && node.child_count() > 0 { - // Pattern A: else clause wraps if (JS/C#/Rust) - if let Some(else_type) = rules.else_node_type { - if kind == else_type { - let first_child = node.named_child(0); - if first_child.map_or(false, |c| { - rules.if_node_type.map_or(false, |if_t| c.kind() == if_t) - }) { - // else-if: the if_statement child handles its own increment - walk_children( - node, - nesting_level, - false, - rules, - cognitive, - cyclomatic, - max_nesting, - depth, - ); - return; - } - // Plain else - *cognitive += 1; - walk_children( - node, - nesting_level, - false, - rules, - cognitive, - cyclomatic, - max_nesting, - depth, - ); - return; - } - } - - // Pattern B: explicit elif node (Python/Ruby/PHP) - if let Some(elif_type) = rules.elif_node_type { - if kind == elif_type { - *cognitive += 1; - *cyclomatic += 1; - walk_children( - node, - nesting_level, - false, - rules, - cognitive, - cyclomatic, - max_nesting, - depth, - ); - return; - } - } - - // Detect else-if via Pattern A or C - let mut is_else_if = false; - if rules.if_node_type.map_or(false, |if_t| kind == if_t) { - if rules.else_via_alternative { - // Pattern C (Go/Java): if_statement is the alternative of parent if_statement - if let Some(parent) = node.parent() { - if rules - .if_node_type - .map_or(false, |if_t| parent.kind() == if_t) - { - if let Some(alt) = parent.child_by_field_name("alternative") { - if alt.id() == node.id() { - is_else_if = true; - } - } - } - } - } else if rules.else_node_type.is_some() { - // Pattern A (JS/C#/Rust): if_statement inside else_clause - if let Some(parent) = node.parent() { - if rules - .else_node_type - .map_or(false, |else_t| parent.kind() == else_t) - { - is_else_if = true; - } - } - } - } - - if is_else_if { - *cognitive += 1; - *cyclomatic += 1; - walk_children( - node, - nesting_level, - false, - rules, - cognitive, - cyclomatic, - max_nesting, - depth, - ); - return; - } - - // Regular branch node - *cognitive += 1 + nesting_level; // structural + nesting - *cyclomatic += 1; - - // Switch-like nodes don't add cyclomatic themselves (cases do) - if rules.is_switch_like(kind) { - *cyclomatic -= 1; // Undo the ++ above; cases handle cyclomatic - } - - if rules.is_nesting(kind) { - walk_children( - node, - nesting_level + 1, - false, - rules, - cognitive, - cyclomatic, - max_nesting, - depth, - ); + if let BranchAction::Handled { cognitive_delta, cyclomatic_delta, nesting_delta } = + classify_branch(node, kind, rules, nesting_level) + { + *cognitive += cognitive_delta; + *cyclomatic += cyclomatic_delta; + walk_children(node, nesting_level + nesting_delta, false, rules, cognitive, cyclomatic, max_nesting, depth); return; } } - // Pattern C plain else: block that is the alternative of an if_statement (Go/Java) - if rules.else_via_alternative { - if rules.if_node_type.map_or(false, |if_t| kind != if_t) { - if let Some(parent) = node.parent() { - if rules - .if_node_type - .map_or(false, |if_t| parent.kind() == if_t) - { - if let Some(alt) = parent.child_by_field_name("alternative") { - if alt.id() == node.id() { - *cognitive += 1; - walk_children( - node, - nesting_level, - false, - rules, - cognitive, - cyclomatic, - max_nesting, - depth, - ); - return; - } - } - } - } - } + // Pattern C plain else (Go/Java) + if is_pattern_c_else(node, kind, rules) { + *cognitive += 1; + walk_children(node, nesting_level, false, rules, cognitive, cyclomatic, max_nesting, depth); + return; } - // Handle case nodes (cyclomatic only, skip keyword leaves) + // Case nodes (cyclomatic only, skip keyword leaves) if rules.is_case(kind) && node.child_count() > 0 { *cyclomatic += 1; } - // Handle nested function definitions (increase nesting) + // Nested function definitions (increase nesting) if !is_top_function && rules.is_function(kind) { - walk_children( - node, - nesting_level + 1, - false, - rules, - cognitive, - cyclomatic, - max_nesting, - depth, - ); + walk_children(node, nesting_level + 1, false, rules, cognitive, cyclomatic, max_nesting, depth); return; } - // Walk children - walk_children( - node, - nesting_level, - false, - rules, - cognitive, - cyclomatic, - max_nesting, - depth, - ); + walk_children(node, nesting_level, false, rules, cognitive, cyclomatic, max_nesting, depth); } // ─── Halstead Operator/Operand Classification ───────────────────────────── @@ -1070,6 +1016,34 @@ fn walk_all_children( } } +/// Classify a single node for Halstead operator/operand counting. +fn classify_halstead( + node: &Node, + kind: &str, + source: &[u8], + hr: &HalsteadRules, + operators: &mut HashMap, + operands: &mut HashMap, +) { + // Compound operators (non-leaf): count node type as operator + if hr.compound_operators.contains(&kind) { + *operators.entry(kind.to_string()).or_insert(0) += 1; + } + // Leaf nodes: classify as operator or operand + if node.child_count() == 0 { + if hr.operator_leaf_types.contains(&kind) { + *operators.entry(kind.to_string()).or_insert(0) += 1; + } else if hr.operand_leaf_types.contains(&kind) { + let start = node.start_byte(); + let end = node.end_byte().min(source.len()); + let text = String::from_utf8_lossy(&source[start..end]).to_string(); + *operands.entry(text).or_insert(0) += 1; + } + } +} + +// ─── walk_all (merged complexity + Halstead DFS) ──────────────────────── + #[allow(clippy::too_many_arguments)] fn walk_all( node: &Node, @@ -1093,21 +1067,7 @@ fn walk_all( if let Some(hr) = h_rules { if !skip_h { - // Compound operators (non-leaf): count node type as operator - if hr.compound_operators.contains(&kind) { - *operators.entry(kind.to_string()).or_insert(0) += 1; - } - // Leaf nodes: classify as operator or operand - if node.child_count() == 0 { - if hr.operator_leaf_types.contains(&kind) { - *operators.entry(kind.to_string()).or_insert(0) += 1; - } else if hr.operand_leaf_types.contains(&kind) { - let start = node.start_byte(); - let end = node.end_byte().min(source.len()); - let text = String::from_utf8_lossy(&source[start..end]).to_string(); - *operands.entry(text).or_insert(0) += 1; - } - } + classify_halstead(node, kind, source, hr, operators, operands); } } @@ -1116,155 +1076,53 @@ fn walk_all( *max_nesting = nesting_level; } - // Handle logical operators in binary expressions - if kind == c_rules.logical_node_type { - if let Some(op_node) = node.child(1) { - let op = op_node.kind(); - if c_rules.is_logical_op(op) { - *cyclomatic += 1; - - let mut same_sequence = false; - if let Some(parent) = node.parent() { - if parent.kind() == c_rules.logical_node_type { - if let Some(parent_op) = parent.child(1) { - if parent_op.kind() == op { - same_sequence = true; - } - } - } - } - if !same_sequence { - *cognitive += 1; - } - - walk_all_children( - node, source, nesting_level, false, skip_h, - c_rules, h_rules, cognitive, cyclomatic, max_nesting, operators, operands, - ); - return; - } - } + // Logical operators + if handle_logical_op(node, kind, c_rules, cognitive, cyclomatic) { + walk_all_children( + node, source, nesting_level, false, skip_h, + c_rules, h_rules, cognitive, cyclomatic, max_nesting, operators, operands, + ); + return; } - // Handle optional chaining (cyclomatic only) + // Optional chaining (cyclomatic only) if let Some(opt_type) = c_rules.optional_chain_type { if kind == opt_type { *cyclomatic += 1; } } - // Handle branch/control flow nodes (skip keyword leaf tokens — childCount > 0 guard) + // Branch/control flow nodes (skip keyword leaf tokens) if c_rules.is_branch(kind) && node.child_count() > 0 { - // Pattern A: else clause wraps if (JS/C#/Rust) - if let Some(else_type) = c_rules.else_node_type { - if kind == else_type { - let first_child = node.named_child(0); - if first_child.map_or(false, |c| { - c_rules.if_node_type.map_or(false, |if_t| c.kind() == if_t) - }) { - walk_all_children( - node, source, nesting_level, false, skip_h, - c_rules, h_rules, cognitive, cyclomatic, max_nesting, operators, operands, - ); - return; - } - *cognitive += 1; - walk_all_children( - node, source, nesting_level, false, skip_h, - c_rules, h_rules, cognitive, cyclomatic, max_nesting, operators, operands, - ); - return; - } - } - - // Pattern B: explicit elif node (Python/Ruby/PHP) - if let Some(elif_type) = c_rules.elif_node_type { - if kind == elif_type { - *cognitive += 1; - *cyclomatic += 1; - walk_all_children( - node, source, nesting_level, false, skip_h, - c_rules, h_rules, cognitive, cyclomatic, max_nesting, operators, operands, - ); - return; - } - } - - // Detect else-if via Pattern A or C - let mut is_else_if = false; - if c_rules.if_node_type.map_or(false, |if_t| kind == if_t) { - if c_rules.else_via_alternative { - if let Some(parent) = node.parent() { - if c_rules.if_node_type.map_or(false, |if_t| parent.kind() == if_t) { - if let Some(alt) = parent.child_by_field_name("alternative") { - if alt.id() == node.id() { - is_else_if = true; - } - } - } - } - } else if c_rules.else_node_type.is_some() { - if let Some(parent) = node.parent() { - if c_rules.else_node_type.map_or(false, |else_t| parent.kind() == else_t) { - is_else_if = true; - } - } - } - } - - if is_else_if { - *cognitive += 1; - *cyclomatic += 1; + if let BranchAction::Handled { cognitive_delta, cyclomatic_delta, nesting_delta } = + classify_branch(node, kind, c_rules, nesting_level) + { + *cognitive += cognitive_delta; + *cyclomatic += cyclomatic_delta; walk_all_children( - node, source, nesting_level, false, skip_h, - c_rules, h_rules, cognitive, cyclomatic, max_nesting, operators, operands, - ); - return; - } - - // Regular branch node - *cognitive += 1 + nesting_level; - *cyclomatic += 1; - - if c_rules.is_switch_like(kind) { - *cyclomatic -= 1; - } - - if c_rules.is_nesting(kind) { - walk_all_children( - node, source, nesting_level + 1, false, skip_h, + node, source, nesting_level + nesting_delta, false, skip_h, c_rules, h_rules, cognitive, cyclomatic, max_nesting, operators, operands, ); return; } } - // Pattern C plain else: block that is the alternative of an if_statement (Go/Java) - if c_rules.else_via_alternative { - if c_rules.if_node_type.map_or(false, |if_t| kind != if_t) { - if let Some(parent) = node.parent() { - if c_rules.if_node_type.map_or(false, |if_t| parent.kind() == if_t) { - if let Some(alt) = parent.child_by_field_name("alternative") { - if alt.id() == node.id() { - *cognitive += 1; - walk_all_children( - node, source, nesting_level, false, skip_h, - c_rules, h_rules, cognitive, cyclomatic, max_nesting, operators, operands, - ); - return; - } - } - } - } - } + // Pattern C plain else (Go/Java) + if is_pattern_c_else(node, kind, c_rules) { + *cognitive += 1; + walk_all_children( + node, source, nesting_level, false, skip_h, + c_rules, h_rules, cognitive, cyclomatic, max_nesting, operators, operands, + ); + return; } - // Handle case nodes (cyclomatic only, skip keyword leaves) + // Case nodes (cyclomatic only, skip keyword leaves) if c_rules.is_case(kind) && node.child_count() > 0 { *cyclomatic += 1; } - // Handle nested function definitions (increase nesting) + // Nested function definitions (increase nesting) if !is_top_function && c_rules.is_function(kind) { walk_all_children( node, source, nesting_level + 1, false, skip_h, @@ -1273,7 +1131,6 @@ fn walk_all( return; } - // Walk children walk_all_children( node, source, nesting_level, false, skip_h, c_rules, h_rules, cognitive, cyclomatic, max_nesting, operators, operands, diff --git a/crates/codegraph-core/src/dataflow.rs b/crates/codegraph-core/src/dataflow.rs index 35bff96a..af736be0 100644 --- a/crates/codegraph-core/src/dataflow.rs +++ b/crates/codegraph-core/src/dataflow.rs @@ -918,11 +918,13 @@ fn binding_confidence(binding: &Option) -> f64 { pub fn extract_dataflow(tree: &Tree, source: &[u8], lang_id: &str) -> Option { let rules = get_dataflow_rules(lang_id)?; - let mut parameters = Vec::new(); - let mut returns = Vec::new(); - let mut assignments = Vec::new(); - let mut arg_flows = Vec::new(); - let mut mutations = Vec::new(); + let mut out = DataflowOutput { + parameters: Vec::new(), + returns: Vec::new(), + assignments: Vec::new(), + arg_flows: Vec::new(), + mutations: Vec::new(), + }; let mut scope_stack: Vec = Vec::new(); @@ -931,34 +933,35 @@ pub fn extract_dataflow(tree: &Tree, source: &[u8], lang_id: &str) -> Option, + returns: Vec, + assignments: Vec, + arg_flows: Vec, + mutations: Vec, +} + #[allow(clippy::too_many_arguments)] fn visit( node: &Node, rules: &DataflowRules, source: &[u8], scope_stack: &mut Vec, - parameters: &mut Vec, - returns: &mut Vec, - assignments: &mut Vec, - arg_flows: &mut Vec, - mutations: &mut Vec, + out: &mut DataflowOutput, depth: usize, ) { if depth >= MAX_WALK_DEPTH { @@ -969,94 +972,93 @@ fn visit( // Enter function scope if is_function_node(rules, t) { - enter_scope(node, rules, source, scope_stack, parameters); - let cursor = &mut node.walk(); - for child in node.named_children(cursor) { - visit(&child, rules, source, scope_stack, parameters, returns, assignments, arg_flows, mutations, depth + 1); - } + enter_scope(node, rules, source, scope_stack, &mut out.parameters); + visit_children(node, rules, source, scope_stack, out, depth); scope_stack.pop(); return; } // Return statements if rules.return_node.is_some_and(|r| r == t) { - if let Some(scope) = scope_stack.last() { - if let Some(ref func_name) = scope.func_name { - let expr = node.named_child(0); - let mut referenced_names = Vec::new(); - if let Some(ref e) = expr { - collect_identifiers(e, &mut referenced_names, rules, source, depth + 1); - } - returns.push(DataflowReturn { - func_name: func_name.clone(), - expression: truncate( - expr.map(|e| node_text(&e, source)).unwrap_or(""), - 120, - ), - referenced_names, - line: node_line(node), - }); - } - } - let cursor = &mut node.walk(); - for child in node.named_children(cursor) { - visit(&child, rules, source, scope_stack, parameters, returns, assignments, arg_flows, mutations, depth + 1); - } + handle_return_stmt(node, rules, source, scope_stack, &mut out.returns, depth); + visit_children(node, rules, source, scope_stack, out, depth); return; } - // Variable declarations (single type) - if rules.var_declarator_node.is_some_and(|v| v == t) { - handle_var_declarator(node, rules, source, scope_stack, assignments); - let cursor = &mut node.walk(); - for child in node.named_children(cursor) { - visit(&child, rules, source, scope_stack, parameters, returns, assignments, arg_flows, mutations, depth + 1); - } - return; - } - - // Variable declarations (multi-type, e.g., Go) - if !rules.var_declarator_nodes.is_empty() && rules.var_declarator_nodes.contains(&t) { - handle_var_declarator(node, rules, source, scope_stack, assignments); - let cursor = &mut node.walk(); - for child in node.named_children(cursor) { - visit(&child, rules, source, scope_stack, parameters, returns, assignments, arg_flows, mutations, depth + 1); - } + // Variable declarations (single or multi-type) + if rules.var_declarator_node.is_some_and(|v| v == t) + || (!rules.var_declarator_nodes.is_empty() && rules.var_declarator_nodes.contains(&t)) + { + handle_var_declarator(node, rules, source, scope_stack, &mut out.assignments); + visit_children(node, rules, source, scope_stack, out, depth); return; } // Call expressions if is_call_node(rules, t) { - handle_call_expr(node, rules, source, scope_stack, arg_flows); - let cursor = &mut node.walk(); - for child in node.named_children(cursor) { - visit(&child, rules, source, scope_stack, parameters, returns, assignments, arg_flows, mutations, depth + 1); - } + handle_call_expr(node, rules, source, scope_stack, &mut out.arg_flows); + visit_children(node, rules, source, scope_stack, out, depth); return; } // Assignment expressions if rules.assignment_node.is_some_and(|a| a == t) { - handle_assignment(node, rules, source, scope_stack, assignments, mutations); - let cursor = &mut node.walk(); - for child in node.named_children(cursor) { - visit(&child, rules, source, scope_stack, parameters, returns, assignments, arg_flows, mutations, depth + 1); - } + handle_assignment(node, rules, source, scope_stack, &mut out.assignments, &mut out.mutations); + visit_children(node, rules, source, scope_stack, out, depth); return; } // Mutation detection via expression_statement if t == rules.expression_stmt_node { - handle_expr_stmt_mutation(node, rules, source, scope_stack, mutations); + handle_expr_stmt_mutation(node, rules, source, scope_stack, &mut out.mutations); } - // Default: visit children + visit_children(node, rules, source, scope_stack, out, depth); +} + +/// Visit all named children of a node (shared DFS recursion helper). +fn visit_children( + node: &Node, + rules: &DataflowRules, + source: &[u8], + scope_stack: &mut Vec, + out: &mut DataflowOutput, + depth: usize, +) { let cursor = &mut node.walk(); for child in node.named_children(cursor) { - visit(&child, rules, source, scope_stack, parameters, returns, assignments, arg_flows, mutations, depth + 1); + visit(&child, rules, source, scope_stack, out, depth + 1); } } +/// Handle a return statement: extract expression and referenced names. +fn handle_return_stmt( + node: &Node, + rules: &DataflowRules, + source: &[u8], + scope_stack: &[ScopeFrame], + returns: &mut Vec, + depth: usize, +) { + let Some(scope) = scope_stack.last() else { return }; + let Some(ref func_name) = scope.func_name else { return }; + + let expr = node.named_child(0); + let mut referenced_names = Vec::new(); + if let Some(ref e) = expr { + collect_identifiers(e, &mut referenced_names, rules, source, depth + 1); + } + returns.push(DataflowReturn { + func_name: func_name.clone(), + expression: truncate( + expr.map(|e| node_text(&e, source)).unwrap_or(""), + 120, + ), + referenced_names, + line: node_line(node), + }); +} + fn enter_scope( fn_node: &Node, rules: &DataflowRules, diff --git a/crates/codegraph-core/src/edge_builder.rs b/crates/codegraph-core/src/edge_builder.rs index 68faba21..ccf1fdc0 100644 --- a/crates/codegraph-core/src/edge_builder.rs +++ b/crates/codegraph-core/src/edge_builder.rs @@ -83,6 +83,40 @@ pub struct ComputedEdge { pub dynamic: u32, } +/// Internal struct for caller resolution (def line range → node ID). +struct DefWithId<'a> { + _name: &'a str, + line: u32, + end_line: u32, + node_id: Option, +} + +/// Shared lookup context built once per `build_call_edges` invocation. +struct EdgeContext<'a> { + nodes_by_name: HashMap<&'a str, Vec<&'a NodeInfo>>, + nodes_by_name_and_file: HashMap<(&'a str, &'a str), Vec<&'a NodeInfo>>, + builtin_set: HashSet<&'a str>, + receiver_kinds: HashSet<&'a str>, +} + +impl<'a> EdgeContext<'a> { + fn new(all_nodes: &'a [NodeInfo], builtin_receivers: &'a [String]) -> Self { + let mut nodes_by_name: HashMap<&str, Vec<&NodeInfo>> = HashMap::new(); + let mut nodes_by_name_and_file: HashMap<(&str, &str), Vec<&NodeInfo>> = HashMap::new(); + for node in all_nodes { + nodes_by_name.entry(&node.name).or_default().push(node); + nodes_by_name_and_file + .entry((&node.name, &node.file)) + .or_default() + .push(node); + } + let builtin_set: HashSet<&str> = builtin_receivers.iter().map(|s| s.as_str()).collect(); + let receiver_kinds: HashSet<&str> = ["class", "struct", "interface", "type", "module"] + .iter().copied().collect(); + Self { nodes_by_name, nodes_by_name_and_file, builtin_set, receiver_kinds } + } +} + /// Build call, receiver, extends, and implements edges in Rust. /// /// Mirrors the algorithm in builder.js `buildEdges` transaction (call edges @@ -93,298 +127,241 @@ pub fn build_call_edges( all_nodes: Vec, builtin_receivers: Vec, ) -> Vec { - let builtin_set: HashSet<&str> = builtin_receivers.iter().map(|s| s.as_str()).collect(); - - // Build lookup maps (mirrors nodesByName / nodesByNameAndFile in JS) - let mut nodes_by_name: HashMap<&str, Vec<&NodeInfo>> = HashMap::new(); - let mut nodes_by_name_and_file: HashMap<(&str, &str), Vec<&NodeInfo>> = HashMap::new(); - - for node in &all_nodes { - nodes_by_name.entry(&node.name).or_default().push(node); - nodes_by_name_and_file - .entry((&node.name, &node.file)) - .or_default() - .push(node); + let ctx = EdgeContext::new(&all_nodes, &builtin_receivers); + let mut edges = Vec::new(); + + for file_input in &files { + process_file(&ctx, file_input, &all_nodes, &mut edges); } - let receiver_kinds: HashSet<&str> = ["class", "struct", "interface", "type", "module"] - .iter() - .copied() + edges +} + +/// Process a single file: build per-file maps and emit call/receiver/hierarchy edges. +fn process_file<'a>( + ctx: &EdgeContext<'a>, + file_input: &'a FileEdgeInput, + all_nodes: &'a [NodeInfo], + edges: &mut Vec, +) { + let rel_path = &file_input.file; + let file_node_id = file_input.file_node_id; + + let imported_names: HashMap<&str, &str> = file_input + .imported_names.iter() + .map(|im| (im.name.as_str(), im.file.as_str())) .collect(); - let mut edges = Vec::new(); + let type_map: HashMap<&str, &str> = file_input + .type_map.iter() + .map(|tm| (tm.name.as_str(), tm.type_name.as_str())) + .collect(); - for file_input in &files { - let rel_path = &file_input.file; - let file_node_id = file_input.file_node_id; - - // Build imported names map (pre-resolved including barrels by JS) - let imported_names: HashMap<&str, &str> = file_input - .imported_names - .iter() - .map(|im| (im.name.as_str(), im.file.as_str())) - .collect(); - - // Build type map (variable name → declared type name) - let type_map: HashMap<&str, &str> = file_input - .type_map - .iter() - .map(|tm| (tm.name.as_str(), tm.type_name.as_str())) - .collect(); - - // Build def → node ID map for caller resolution (match by name+kind+file+line) - let file_nodes: Vec<&NodeInfo> = all_nodes.iter().filter(|n| n.file == *rel_path).collect(); - - struct DefWithId<'a> { - _name: &'a str, - line: u32, - end_line: u32, - node_id: Option, + let file_nodes: Vec<&NodeInfo> = all_nodes.iter().filter(|n| n.file == *rel_path).collect(); + let defs_with_ids: Vec = file_input.definitions.iter().map(|d| { + let node_id = file_nodes.iter() + .find(|n| n.name == d.name && n.kind == d.kind && n.line == d.line) + .map(|n| n.id); + DefWithId { _name: &d.name, line: d.line, end_line: d.end_line.unwrap_or(u32::MAX), node_id } + }).collect(); + + let mut seen_edges: HashSet = HashSet::new(); + + for call in &file_input.calls { + if let Some(ref receiver) = call.receiver { + if ctx.builtin_set.contains(receiver.as_str()) { continue; } } - let defs_with_ids: Vec = file_input - .definitions - .iter() - .map(|d| { - let node_id = file_nodes - .iter() - .find(|n| n.name == d.name && n.kind == d.kind && n.line == d.line) - .map(|n| n.id); - DefWithId { - _name: &d.name, - line: d.line, - end_line: d.end_line.unwrap_or(u32::MAX), - node_id, - } - }) - .collect(); + let caller_id = find_enclosing_caller(&defs_with_ids, call.line, file_node_id); + let is_dynamic = if call.dynamic.unwrap_or(false) { 1u32 } else { 0u32 }; + let imported_from = imported_names.get(call.name.as_str()).copied(); - // Call + receiver edge dedup (single set, matching JS seenCallEdges with recv| prefix) - let mut seen_edges: HashSet = HashSet::new(); + let mut targets = resolve_call_targets(ctx, call, rel_path, imported_from, &type_map); + sort_targets_by_confidence(&mut targets, rel_path, imported_from); + emit_call_edges(&targets, caller_id, is_dynamic, rel_path, imported_from, &mut seen_edges, edges); + emit_receiver_edge(ctx, call, caller_id, rel_path, &type_map, &mut seen_edges, edges); + } - for call in &file_input.calls { - if let Some(ref receiver) = call.receiver { - if builtin_set.contains(receiver.as_str()) { - continue; - } - } + emit_hierarchy_edges(ctx, file_input, rel_path, edges); +} - // Find enclosing caller (narrowest span) - let mut caller_id = file_node_id; - let mut caller_span = u32::MAX; - - for def in &defs_with_ids { - if def.line <= call.line && call.line <= def.end_line { - let span = def.end_line - def.line; - if span < caller_span { - if let Some(id) = def.node_id { - caller_id = id; - caller_span = span; - } - } +/// Find the narrowest enclosing definition for a call at the given line. +fn find_enclosing_caller(defs: &[DefWithId], call_line: u32, file_node_id: u32) -> u32 { + let mut caller_id = file_node_id; + let mut caller_span = u32::MAX; + for def in defs { + if def.line <= call_line && call_line <= def.end_line { + let span = def.end_line - def.line; + if span < caller_span { + if let Some(id) = def.node_id { + caller_id = id; + caller_span = span; } } + } + } + caller_id +} - let is_dynamic = if call.dynamic.unwrap_or(false) { - 1u32 - } else { - 0u32 - }; - let imported_from = imported_names.get(call.name.as_str()).copied(); - - // Resolve targets - let mut targets: Vec<&NodeInfo> = Vec::new(); - - if let Some(imp_file) = imported_from { - targets = nodes_by_name_and_file - .get(&(call.name.as_str(), imp_file)) - .cloned() - .unwrap_or_default(); - } +/// Multi-strategy call target resolution: import-aware → same-file → method → type-aware → scoped. +fn resolve_call_targets<'a>( + ctx: &EdgeContext<'a>, + call: &CallInfo, + rel_path: &str, + imported_from: Option<&str>, + type_map: &HashMap<&str, &str>, +) -> Vec<&'a NodeInfo> { + // 1. Import-aware resolution + if let Some(imp_file) = imported_from { + let targets = ctx.nodes_by_name_and_file + .get(&(call.name.as_str(), imp_file)) + .cloned().unwrap_or_default(); + if !targets.is_empty() { return targets; } + } - if targets.is_empty() { - // Same file - targets = nodes_by_name_and_file - .get(&(call.name.as_str(), rel_path.as_str())) - .cloned() - .unwrap_or_default(); - - if targets.is_empty() { - // Method name match - let suffix = format!(".{}", call.name); - let method_candidates: Vec<&NodeInfo> = nodes_by_name - .get(call.name.as_str()) - .map(|v| { - v.iter() - .filter(|n| n.kind == "method" && n.name.ends_with(&suffix)) - .copied() - .collect() - }) - .unwrap_or_default(); - - if !method_candidates.is_empty() { - targets = method_candidates; - } else if let Some(ref receiver) = call.receiver { - // Type-aware resolution: translate variable receiver to declared type - if let Some(type_name) = type_map.get(receiver.as_str()) { - let qualified = format!("{}.{}", type_name, call.name); - let typed: Vec<&NodeInfo> = nodes_by_name - .get(qualified.as_str()) - .map(|v| v.iter().filter(|n| n.kind == "method").copied().collect()) - .unwrap_or_default(); - if !typed.is_empty() { - targets = typed; - } - } - } - - if targets.is_empty() - && (call.receiver.is_none() - || call.receiver.as_deref() == Some("this") - || call.receiver.as_deref() == Some("self") - || call.receiver.as_deref() == Some("super")) - { - // Scoped fallback — same-dir or parent-dir only - targets = nodes_by_name - .get(call.name.as_str()) - .map(|v| { - v.iter() - .filter(|n| { - import_resolution::compute_confidence( - rel_path, &n.file, None, - ) >= 0.5 - }) - .copied() - .collect() - }) - .unwrap_or_default(); - } - } - } + // 2. Same-file resolution + let targets = ctx.nodes_by_name_and_file + .get(&(call.name.as_str(), rel_path)) + .cloned().unwrap_or_default(); + if !targets.is_empty() { return targets; } + + // 3. Method name match + let suffix = format!(".{}", call.name); + let method_candidates: Vec<&NodeInfo> = ctx.nodes_by_name + .get(call.name.as_str()) + .map(|v| v.iter().filter(|n| n.kind == "method" && n.name.ends_with(&suffix)).copied().collect()) + .unwrap_or_default(); + if !method_candidates.is_empty() { return method_candidates; } + + // 4. Type-aware resolution via receiver → type map + if let Some(ref receiver) = call.receiver { + if let Some(type_name) = type_map.get(receiver.as_str()) { + let qualified = format!("{}.{}", type_name, call.name); + let typed: Vec<&NodeInfo> = ctx.nodes_by_name + .get(qualified.as_str()) + .map(|v| v.iter().filter(|n| n.kind == "method").copied().collect()) + .unwrap_or_default(); + if !typed.is_empty() { return typed; } + } + } - // Sort by confidence (descending) - if targets.len() > 1 { - targets.sort_by(|a, b| { - let conf_a = - import_resolution::compute_confidence(rel_path, &a.file, imported_from); - let conf_b = - import_resolution::compute_confidence(rel_path, &b.file, imported_from); - conf_b - .partial_cmp(&conf_a) - .unwrap_or(std::cmp::Ordering::Equal) - }); - } + // 5. Scoped fallback (this/self/super or no receiver) + if call.receiver.is_none() + || call.receiver.as_deref() == Some("this") + || call.receiver.as_deref() == Some("self") + || call.receiver.as_deref() == Some("super") + { + return ctx.nodes_by_name + .get(call.name.as_str()) + .map(|v| v.iter() + .filter(|n| import_resolution::compute_confidence(rel_path, &n.file, None) >= 0.5) + .copied().collect()) + .unwrap_or_default(); + } - for t in &targets { - let edge_key = ((caller_id as u64) << 32) | (t.id as u64); - if t.id != caller_id && !seen_edges.contains(&edge_key) { - seen_edges.insert(edge_key); - let confidence = - import_resolution::compute_confidence(rel_path, &t.file, imported_from); - edges.push(ComputedEdge { - source_id: caller_id, - target_id: t.id, - kind: "calls".to_string(), - confidence, - dynamic: is_dynamic, - }); - } - } + Vec::new() +} - // Receiver edge: caller → receiver type node - if let Some(ref receiver) = call.receiver { - if !builtin_set.contains(receiver.as_str()) - && receiver != "this" - && receiver != "self" - && receiver != "super" - { - // Resolve variable to its declared type via typeMap - let effective_receiver = type_map.get(receiver.as_str()).copied().unwrap_or(receiver.as_str()); - let type_resolved = effective_receiver != receiver.as_str(); - - let samefile = nodes_by_name_and_file - .get(&(effective_receiver, rel_path.as_str())) - .cloned() - .unwrap_or_default(); - let candidates = if !samefile.is_empty() { - samefile - } else { - nodes_by_name - .get(effective_receiver) - .cloned() - .unwrap_or_default() - }; - let receiver_nodes: Vec<&NodeInfo> = candidates - .into_iter() - .filter(|n| receiver_kinds.contains(n.kind.as_str())) - .collect(); - - if let Some(recv_target) = receiver_nodes.first() { - // Use high bit to separate receiver keys from call keys (matches JS recv| prefix) - let recv_key = - (1u64 << 63) | ((caller_id as u64) << 32) | (recv_target.id as u64); - if !seen_edges.contains(&recv_key) { - seen_edges.insert(recv_key); - let confidence = if type_resolved { 0.9 } else { 0.7 }; - edges.push(ComputedEdge { - source_id: caller_id, - target_id: recv_target.id, - kind: "receiver".to_string(), - confidence, - dynamic: 0, - }); - } - } - } - } +/// Sort targets by confidence descending. +fn sort_targets_by_confidence(targets: &mut Vec<&NodeInfo>, rel_path: &str, imported_from: Option<&str>) { + if targets.len() > 1 { + targets.sort_by(|a, b| { + let conf_a = import_resolution::compute_confidence(rel_path, &a.file, imported_from); + let conf_b = import_resolution::compute_confidence(rel_path, &b.file, imported_from); + conf_b.partial_cmp(&conf_a).unwrap_or(std::cmp::Ordering::Equal) + }); + } +} + +/// Emit call edges from caller to resolved targets (deduped). +fn emit_call_edges( + targets: &[&NodeInfo], caller_id: u32, is_dynamic: u32, + rel_path: &str, imported_from: Option<&str>, + seen_edges: &mut HashSet, edges: &mut Vec, +) { + for t in targets { + let edge_key = ((caller_id as u64) << 32) | (t.id as u64); + if t.id != caller_id && !seen_edges.contains(&edge_key) { + seen_edges.insert(edge_key); + let confidence = import_resolution::compute_confidence(rel_path, &t.file, imported_from); + edges.push(ComputedEdge { + source_id: caller_id, target_id: t.id, + kind: "calls".to_string(), confidence, dynamic: is_dynamic, + }); } + } +} - // Class extends/implements edges - for cls in &file_input.classes { - let source_row = nodes_by_name_and_file - .get(&(cls.name.as_str(), rel_path.as_str())) - .and_then(|v| v.iter().find(|n| HIERARCHY_SOURCE_KINDS.contains(&n.kind.as_str()))); - - if let Some(source) = source_row { - if let Some(ref extends_name) = cls.extends { - let targets = nodes_by_name - .get(extends_name.as_str()) - .map(|v| v.iter().filter(|n| { - EXTENDS_TARGET_KINDS.contains(&n.kind.as_str()) - }).collect::>()) - .unwrap_or_default(); - for t in targets { - edges.push(ComputedEdge { - source_id: source.id, - target_id: t.id, - kind: "extends".to_string(), - confidence: 1.0, - dynamic: 0, - }); - } - } - if let Some(ref implements_name) = cls.implements { - let targets = nodes_by_name - .get(implements_name.as_str()) - .map(|v| { - v.iter() - .filter(|n| IMPLEMENTS_TARGET_KINDS.contains(&n.kind.as_str())) - .collect::>() - }) - .unwrap_or_default(); - for t in targets { - edges.push(ComputedEdge { - source_id: source.id, - target_id: t.id, - kind: "implements".to_string(), - confidence: 1.0, - dynamic: 0, - }); - } - } - } +/// Emit a receiver edge from caller to the receiver's type node (if applicable). +fn emit_receiver_edge( + ctx: &EdgeContext, call: &CallInfo, caller_id: u32, rel_path: &str, + type_map: &HashMap<&str, &str>, + seen_edges: &mut HashSet, edges: &mut Vec, +) { + let Some(ref receiver) = call.receiver else { return }; + if ctx.builtin_set.contains(receiver.as_str()) + || receiver == "this" || receiver == "self" || receiver == "super" + { return; } + + let effective_receiver = type_map.get(receiver.as_str()).copied().unwrap_or(receiver.as_str()); + let type_resolved = effective_receiver != receiver.as_str(); + + let samefile = ctx.nodes_by_name_and_file + .get(&(effective_receiver, rel_path)) + .cloned().unwrap_or_default(); + let candidates = if !samefile.is_empty() { samefile } else { + ctx.nodes_by_name.get(effective_receiver).cloned().unwrap_or_default() + }; + let receiver_nodes: Vec<&NodeInfo> = candidates.into_iter() + .filter(|n| ctx.receiver_kinds.contains(n.kind.as_str())).collect(); + + if let Some(recv_target) = receiver_nodes.first() { + // High bit separates receiver keys from call keys (matches JS recv| prefix) + let recv_key = (1u64 << 63) | ((caller_id as u64) << 32) | (recv_target.id as u64); + if !seen_edges.contains(&recv_key) { + seen_edges.insert(recv_key); + let confidence = if type_resolved { 0.9 } else { 0.7 }; + edges.push(ComputedEdge { + source_id: caller_id, target_id: recv_target.id, + kind: "receiver".to_string(), confidence, dynamic: 0, + }); } } +} - edges +/// Emit extends and implements edges for class hierarchy declarations. +fn emit_hierarchy_edges( + ctx: &EdgeContext, file_input: &FileEdgeInput, rel_path: &str, + edges: &mut Vec, +) { + for cls in &file_input.classes { + let source_row = ctx.nodes_by_name_and_file + .get(&(cls.name.as_str(), rel_path)) + .and_then(|v| v.iter().find(|n| HIERARCHY_SOURCE_KINDS.contains(&n.kind.as_str()))); + + let Some(source) = source_row else { continue }; + + if let Some(ref extends_name) = cls.extends { + let targets = ctx.nodes_by_name.get(extends_name.as_str()) + .map(|v| v.iter().filter(|n| EXTENDS_TARGET_KINDS.contains(&n.kind.as_str())).collect::>()) + .unwrap_or_default(); + for t in targets { + edges.push(ComputedEdge { + source_id: source.id, target_id: t.id, + kind: "extends".to_string(), confidence: 1.0, dynamic: 0, + }); + } + } + if let Some(ref implements_name) = cls.implements { + let targets = ctx.nodes_by_name.get(implements_name.as_str()) + .map(|v| v.iter().filter(|n| IMPLEMENTS_TARGET_KINDS.contains(&n.kind.as_str())).collect::>()) + .unwrap_or_default(); + for t in targets { + edges.push(ComputedEdge { + source_id: source.id, target_id: t.id, + kind: "implements".to_string(), confidence: 1.0, dynamic: 0, + }); + } + } + } } diff --git a/crates/codegraph-core/src/extractors/csharp.rs b/crates/codegraph-core/src/extractors/csharp.rs index 68025699..5a33c460 100644 --- a/crates/codegraph-core/src/extractors/csharp.rs +++ b/crates/codegraph-core/src/extractors/csharp.rs @@ -1,9 +1,9 @@ -use tree_sitter::{Node, Tree}; +use super::helpers::*; +use super::SymbolExtractor; use crate::cfg::build_function_cfg; use crate::complexity::compute_all_metrics; use crate::types::*; -use super::helpers::*; -use super::SymbolExtractor; +use tree_sitter::{Node, Tree}; pub struct CSharpExtractor; @@ -17,282 +17,253 @@ impl SymbolExtractor for CSharpExtractor { } } -fn find_csharp_parent_type<'a>(node: &Node<'a>, source: &[u8]) -> Option { - let mut current = node.parent(); - while let Some(parent) = current { - match parent.kind() { - "class_declaration" | "struct_declaration" | "interface_declaration" - | "enum_declaration" | "record_declaration" => { - return parent - .child_by_field_name("name") - .map(|n| node_text(&n, source).to_string()); - } - _ => {} - } - current = parent.parent(); - } - None +const CSHARP_TYPE_KINDS: &[&str] = &[ + "class_declaration", "struct_declaration", "interface_declaration", + "enum_declaration", "record_declaration", +]; + +fn find_csharp_parent_type(node: &Node, source: &[u8]) -> Option { + find_enclosing_type_name(node, CSHARP_TYPE_KINDS, source) } fn match_csharp_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { match node.kind() { - "class_declaration" => { - if let Some(name_node) = node.child_by_field_name("name") { - let class_name = node_text(&name_node, source).to_string(); - let children = extract_csharp_class_fields(node, source); - symbols.definitions.push(Definition { - name: class_name.clone(), - kind: "class".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: None, - cfg: None, - children: opt_children(children), - }); - extract_csharp_base_types(node, &class_name, source, symbols); - } - } + "class_declaration" => handle_class_decl(node, source, symbols), + "struct_declaration" => handle_struct_decl(node, source, symbols), + "record_declaration" => handle_record_decl(node, source, symbols), + "interface_declaration" => handle_interface_decl(node, source, symbols), + "enum_declaration" => handle_enum_decl(node, source, symbols), + "method_declaration" => handle_method_decl(node, source, symbols), + "constructor_declaration" => handle_constructor_decl(node, source, symbols), + "property_declaration" => handle_property_decl(node, source, symbols), + "using_directive" => handle_using_directive(node, source, symbols), + "invocation_expression" => handle_invocation_expr(node, source, symbols), + "object_creation_expression" => handle_object_creation(node, source, symbols), + _ => {} + } +} - "struct_declaration" => { - if let Some(name_node) = node.child_by_field_name("name") { - let name = node_text(&name_node, source).to_string(); - symbols.definitions.push(Definition { - name: name.clone(), - kind: "struct".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: None, - cfg: None, - children: None, - }); - extract_csharp_base_types(node, &name, source, symbols); - } - } +// ── Per-node-kind handlers for walk_node_depth ─────────────────────────────── - "record_declaration" => { - if let Some(name_node) = node.child_by_field_name("name") { - let name = node_text(&name_node, source).to_string(); - symbols.definitions.push(Definition { - name: name.clone(), - kind: "record".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: None, - cfg: None, - children: None, - }); - extract_csharp_base_types(node, &name, source, symbols); - } - } +fn handle_class_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(name_node) = node.child_by_field_name("name") else { return }; + let class_name = node_text(&name_node, source).to_string(); + let children = extract_csharp_class_fields(node, source); + symbols.definitions.push(Definition { + name: class_name.clone(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: opt_children(children), + }); + extract_csharp_base_types(node, &class_name, source, symbols); +} - "interface_declaration" => { - if let Some(name_node) = node.child_by_field_name("name") { - let iface_name = node_text(&name_node, source).to_string(); - symbols.definitions.push(Definition { - name: iface_name.clone(), - kind: "interface".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: None, - cfg: None, - children: None, - }); - if let Some(body) = node.child_by_field_name("body") { - for i in 0..body.child_count() { - if let Some(child) = body.child(i) { - if child.kind() == "method_declaration" { - if let Some(meth_name) = child.child_by_field_name("name") { - symbols.definitions.push(Definition { - name: format!( - "{}.{}", - iface_name, - node_text(&meth_name, source) - ), - kind: "method".to_string(), - line: start_line(&child), - end_line: Some(end_line(&child)), - decorators: None, - complexity: compute_all_metrics(&child, source, "csharp"), - cfg: build_function_cfg(&child, "csharp", source), - children: None, - }); - } - } - } - } - } - } - } +fn handle_struct_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(name_node) = node.child_by_field_name("name") else { return }; + let name = node_text(&name_node, source).to_string(); + symbols.definitions.push(Definition { + name: name.clone(), + kind: "struct".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); + extract_csharp_base_types(node, &name, source, symbols); +} - "enum_declaration" => { - if let Some(name_node) = node.child_by_field_name("name") { - let enum_name = node_text(&name_node, source).to_string(); - let children = extract_csharp_enum_members(node, source); - symbols.definitions.push(Definition { - name: enum_name, - kind: "enum".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: None, - cfg: None, - children: opt_children(children), - }); - } - } +fn handle_record_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(name_node) = node.child_by_field_name("name") else { return }; + let name = node_text(&name_node, source).to_string(); + symbols.definitions.push(Definition { + name: name.clone(), + kind: "record".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); + extract_csharp_base_types(node, &name, source, symbols); +} - "method_declaration" => { - if let Some(name_node) = node.child_by_field_name("name") { - let parent_type = find_csharp_parent_type(node, source); - let name = node_text(&name_node, source); - let full_name = match &parent_type { - Some(pt) => format!("{}.{}", pt, name), - None => name.to_string(), - }; - let children = extract_csharp_parameters(node, source); +fn handle_interface_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(name_node) = node.child_by_field_name("name") else { return }; + let iface_name = node_text(&name_node, source).to_string(); + symbols.definitions.push(Definition { + name: iface_name.clone(), + kind: "interface".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); + if let Some(body) = node.child_by_field_name("body") { + for i in 0..body.child_count() { + let Some(child) = body.child(i) else { continue }; + if child.kind() != "method_declaration" { continue; } + if let Some(meth_name) = child.child_by_field_name("name") { symbols.definitions.push(Definition { - name: full_name, + name: format!("{}.{}", iface_name, node_text(&meth_name, source)), kind: "method".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), + line: start_line(&child), + end_line: Some(end_line(&child)), decorators: None, - complexity: compute_all_metrics(node, source, "csharp"), - cfg: build_function_cfg(node, "csharp", source), - children: opt_children(children), + complexity: compute_all_metrics(&child, source, "csharp"), + cfg: build_function_cfg(&child, "csharp", source), + children: None, }); } } + } +} - "constructor_declaration" => { - if let Some(name_node) = node.child_by_field_name("name") { - let parent_type = find_csharp_parent_type(node, source); - let name = node_text(&name_node, source); - let full_name = match &parent_type { - Some(pt) => format!("{}.{}", pt, name), - None => name.to_string(), - }; - let children = extract_csharp_parameters(node, source); - symbols.definitions.push(Definition { - name: full_name, - kind: "method".to_string(), +fn handle_enum_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(name_node) = node.child_by_field_name("name") { + let enum_name = node_text(&name_node, source).to_string(); + let children = extract_csharp_enum_members(node, source); + symbols.definitions.push(Definition { + name: enum_name, + kind: "enum".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: opt_children(children), + }); + } +} + +fn handle_method_or_ctor(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(name_node) = node.child_by_field_name("name") else { return }; + let parent_type = find_csharp_parent_type(node, source); + let name = node_text(&name_node, source); + let full_name = match &parent_type { + Some(pt) => format!("{}.{}", pt, name), + None => name.to_string(), + }; + let children = extract_csharp_parameters(node, source); + symbols.definitions.push(Definition { + name: full_name, + kind: "method".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: compute_all_metrics(node, source, "csharp"), + cfg: build_function_cfg(node, "csharp", source), + children: opt_children(children), + }); +} + +fn handle_method_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + handle_method_or_ctor(node, source, symbols); +} + +fn handle_constructor_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + handle_method_or_ctor(node, source, symbols); +} + +fn handle_property_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(name_node) = node.child_by_field_name("name") else { return }; + let parent_type = find_csharp_parent_type(node, source); + let name = node_text(&name_node, source); + let full_name = match &parent_type { + Some(pt) => format!("{}.{}", pt, name), + None => name.to_string(), + }; + symbols.definitions.push(Definition { + name: full_name, + kind: "method".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: compute_all_metrics(node, source, "csharp"), + cfg: build_function_cfg(node, "csharp", source), + children: None, + }); +} + +fn handle_using_directive(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = node + .child_by_field_name("name") + .or_else(|| find_child(node, "qualified_name")) + .or_else(|| find_child(node, "identifier")); + if let Some(name_node) = name_node { + let full_path = node_text(&name_node, source).to_string(); + let last_name = full_path.split('.').last().unwrap_or("").to_string(); + let mut imp = Import::new(full_path, vec![last_name], start_line(node)); + imp.csharp_using = Some(true); + symbols.imports.push(imp); + } +} + +fn handle_invocation_expr(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let fn_node = node.child_by_field_name("function").or_else(|| node.child(0)); + let Some(fn_node) = fn_node else { return }; + match fn_node.kind() { + "identifier" => { + symbols.calls.push(Call { + name: node_text(&fn_node, source).to_string(), + line: start_line(node), + dynamic: None, + receiver: None, + }); + } + "member_access_expression" => { + if let Some(name) = fn_node.child_by_field_name("name") { + let receiver = fn_node.child_by_field_name("expression") + .map(|expr| node_text(&expr, source).to_string()); + symbols.calls.push(Call { + name: node_text(&name, source).to_string(), line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: compute_all_metrics(node, source, "csharp"), - cfg: build_function_cfg(node, "csharp", source), - children: opt_children(children), + dynamic: None, + receiver, }); } } - - "property_declaration" => { - if let Some(name_node) = node.child_by_field_name("name") { - let parent_type = find_csharp_parent_type(node, source); - let name = node_text(&name_node, source); - let full_name = match &parent_type { - Some(pt) => format!("{}.{}", pt, name), - None => name.to_string(), - }; - symbols.definitions.push(Definition { - name: full_name, - kind: "method".to_string(), + "generic_name" | "member_binding_expression" => { + let name = fn_node.child_by_field_name("name").or_else(|| fn_node.child(0)); + if let Some(name) = name { + symbols.calls.push(Call { + name: node_text(&name, source).to_string(), line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: compute_all_metrics(node, source, "csharp"), - cfg: build_function_cfg(node, "csharp", source), - children: None, + dynamic: None, + receiver: None, }); } } - - "using_directive" => { - let name_node = node - .child_by_field_name("name") - .or_else(|| find_child(node, "qualified_name")) - .or_else(|| find_child(node, "identifier")); - if let Some(name_node) = name_node { - let full_path = node_text(&name_node, source).to_string(); - let last_name = full_path.split('.').last().unwrap_or("").to_string(); - let mut imp = Import::new(full_path, vec![last_name], start_line(node)); - imp.csharp_using = Some(true); - symbols.imports.push(imp); - } - } - - "invocation_expression" => { - let fn_node = node - .child_by_field_name("function") - .or_else(|| node.child(0)); - if let Some(fn_node) = fn_node { - match fn_node.kind() { - "identifier" => { - symbols.calls.push(Call { - name: node_text(&fn_node, source).to_string(), - line: start_line(node), - dynamic: None, - receiver: None, - }); - } - "member_access_expression" => { - if let Some(name) = fn_node.child_by_field_name("name") { - let receiver = fn_node.child_by_field_name("expression") - .map(|expr| node_text(&expr, source).to_string()); - symbols.calls.push(Call { - name: node_text(&name, source).to_string(), - line: start_line(node), - dynamic: None, - receiver, - }); - } - } - "generic_name" | "member_binding_expression" => { - let name = fn_node - .child_by_field_name("name") - .or_else(|| fn_node.child(0)); - if let Some(name) = name { - symbols.calls.push(Call { - name: node_text(&name, source).to_string(), - line: start_line(node), - dynamic: None, - receiver: None, - }); - } - } - _ => {} - } - } - } - - "object_creation_expression" => { - if let Some(type_node) = node.child_by_field_name("type") { - let type_name = if type_node.kind() == "generic_name" { - type_node - .child_by_field_name("name") - .or_else(|| type_node.child(0)) - .map(|n| node_text(&n, source).to_string()) - } else { - Some(node_text(&type_node, source).to_string()) - }; - if let Some(name) = type_name { - symbols.calls.push(Call { - name, - line: start_line(node), - dynamic: None, - receiver: None, - }); - } - } - } - _ => {} } } +fn handle_object_creation(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(type_node) = node.child_by_field_name("type") else { return }; + let type_name = if type_node.kind() == "generic_name" { + type_node.child_by_field_name("name").or_else(|| type_node.child(0)) + .map(|n| node_text(&n, source).to_string()) + } else { + Some(node_text(&type_node, source).to_string()) + }; + if let Some(name) = type_name { + symbols.calls.push(Call { + name, + line: start_line(node), + dynamic: None, + receiver: None, + }); + } +} + // ── Extended kinds helpers ────────────────────────────────────────────────── fn extract_csharp_parameters(node: &Node, source: &[u8]) -> Vec { diff --git a/crates/codegraph-core/src/extractors/go.rs b/crates/codegraph-core/src/extractors/go.rs index 13c9866c..3676e47d 100644 --- a/crates/codegraph-core/src/extractors/go.rs +++ b/crates/codegraph-core/src/extractors/go.rs @@ -1,9 +1,9 @@ -use tree_sitter::{Node, Tree}; +use super::helpers::*; +use super::SymbolExtractor; use crate::cfg::build_function_cfg; use crate::complexity::compute_all_metrics; use crate::types::*; -use super::helpers::*; -use super::SymbolExtractor; +use tree_sitter::{Node, Tree}; pub struct GoExtractor; @@ -19,211 +19,204 @@ impl SymbolExtractor for GoExtractor { fn match_go_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { match node.kind() { - "function_declaration" => { - if let Some(name_node) = node.child_by_field_name("name") { - let children = extract_go_parameters(node, source); + "function_declaration" => handle_function_decl(node, source, symbols), + "method_declaration" => handle_method_decl(node, source, symbols), + "type_declaration" => handle_type_decl(node, source, symbols), + "const_declaration" => handle_const_decl(node, source, symbols), + "import_declaration" => handle_import_decl(node, source, symbols), + "call_expression" => handle_call_expr(node, source, symbols), + _ => {} + } +} + +// ── Per-node-kind handlers for walk_node_depth ─────────────────────────────── + +fn handle_function_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(name_node) = node.child_by_field_name("name") { + let children = extract_go_parameters(node, source); + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "function".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: compute_all_metrics(node, source, "go"), + cfg: build_function_cfg(node, "go", source), + children: opt_children(children), + }); + } +} + +fn handle_method_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(name_node) = node.child_by_field_name("name") else { return }; + let receiver_type = extract_go_receiver_type(node, source); + let name = node_text(&name_node, source); + let full_name = match &receiver_type { + Some(rt) => format!("{}.{}", rt, name), + None => name.to_string(), + }; + let children = extract_go_parameters(node, source); + symbols.definitions.push(Definition { + name: full_name, + kind: "method".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: compute_all_metrics(node, source, "go"), + cfg: build_function_cfg(node, "go", source), + children: opt_children(children), + }); +} + +fn extract_go_receiver_type(node: &Node, source: &[u8]) -> Option { + let receiver = node.child_by_field_name("receiver")?; + for i in 0..receiver.child_count() { + if let Some(param) = receiver.child(i) { + if let Some(type_node) = param.child_by_field_name("type") { + return Some(if type_node.kind() == "pointer_type" { + node_text(&type_node, source).trim_start_matches('*').to_string() + } else { + node_text(&type_node, source).to_string() + }); + } + } + } + None +} + +fn handle_type_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + for i in 0..node.child_count() { + let Some(spec) = node.child(i) else { continue }; + if spec.kind() != "type_spec" { continue; } + let name_node = spec.child_by_field_name("name"); + let type_node = spec.child_by_field_name("type"); + let (Some(name_node), Some(type_node)) = (name_node, type_node) else { continue }; + let name = node_text(&name_node, source).to_string(); + match type_node.kind() { + "struct_type" => { + let children = extract_go_struct_fields(&type_node, source); symbols.definitions.push(Definition { - name: node_text(&name_node, source).to_string(), - kind: "function".to_string(), + name, + kind: "struct".to_string(), line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: compute_all_metrics(node, source, "go"), - cfg: build_function_cfg(node, "go", source), + complexity: None, + cfg: None, children: opt_children(children), }); } - } - - "method_declaration" => { - if let Some(name_node) = node.child_by_field_name("name") { - let receiver = node.child_by_field_name("receiver"); - let mut receiver_type: Option = None; - if let Some(receiver) = receiver { - for i in 0..receiver.child_count() { - if let Some(param) = receiver.child(i) { - if let Some(type_node) = param.child_by_field_name("type") { - receiver_type = Some(if type_node.kind() == "pointer_type" { - node_text(&type_node, source) - .trim_start_matches('*') - .to_string() - } else { - node_text(&type_node, source).to_string() - }); - break; - } - } - } - } - let name = node_text(&name_node, source); - let full_name = match &receiver_type { - Some(rt) => format!("{}.{}", rt, name), - None => name.to_string(), - }; - let children = extract_go_parameters(node, source); + "interface_type" => { symbols.definitions.push(Definition { - name: full_name, - kind: "method".to_string(), + name: name.clone(), + kind: "interface".to_string(), line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: compute_all_metrics(node, source, "go"), - cfg: build_function_cfg(node, "go", source), - children: opt_children(children), + complexity: None, + cfg: None, + children: None, + }); + extract_go_interface_methods(&type_node, &name, source, symbols); + } + _ => { + symbols.definitions.push(Definition { + name, + kind: "type".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, }); } } + } +} - "type_declaration" => { - for i in 0..node.child_count() { - if let Some(spec) = node.child(i) { - if spec.kind() != "type_spec" { - continue; - } - let name_node = spec.child_by_field_name("name"); - let type_node = spec.child_by_field_name("type"); - if let (Some(name_node), Some(type_node)) = (name_node, type_node) { - let name = node_text(&name_node, source).to_string(); - match type_node.kind() { - "struct_type" => { - let children = extract_go_struct_fields(&type_node, source); - symbols.definitions.push(Definition { - name, - kind: "struct".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: None, - cfg: None, - children: opt_children(children), - }); - } - "interface_type" => { - symbols.definitions.push(Definition { - name: name.clone(), - kind: "interface".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: None, - cfg: None, - children: None, - }); - // Extract interface methods - for j in 0..type_node.child_count() { - if let Some(member) = type_node.child(j) { - if member.kind() == "method_elem" { - if let Some(meth_name) = - member.child_by_field_name("name") - { - symbols.definitions.push(Definition { - name: format!( - "{}.{}", - name, - node_text(&meth_name, source) - ), - kind: "method".to_string(), - line: start_line(&member), - end_line: Some(end_line(&member)), - decorators: None, - complexity: None, - cfg: None, - children: None, - }); - } - } - } - } - } - _ => { - symbols.definitions.push(Definition { - name, - kind: "type".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: None, - cfg: None, - children: None, - }); - } - } - } - } - } +fn extract_go_interface_methods(type_node: &Node, iface_name: &str, source: &[u8], symbols: &mut FileSymbols) { + for j in 0..type_node.child_count() { + let Some(member) = type_node.child(j) else { continue }; + if member.kind() != "method_elem" { continue; } + if let Some(meth_name) = member.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: format!("{}.{}", iface_name, node_text(&meth_name, source)), + kind: "method".to_string(), + line: start_line(&member), + end_line: Some(end_line(&member)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); } + } +} - "const_declaration" => { - for i in 0..node.child_count() { - if let Some(spec) = node.child(i) { - if spec.kind() == "const_spec" { - if let Some(name_node) = spec.child_by_field_name("name") { - symbols.definitions.push(Definition { - name: node_text(&name_node, source).to_string(), - kind: "constant".to_string(), - line: start_line(&spec), - end_line: Some(end_line(&spec)), - decorators: None, - complexity: None, - cfg: None, - children: None, - }); - } - } - } - } +fn handle_const_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + for i in 0..node.child_count() { + let Some(spec) = node.child(i) else { continue }; + if spec.kind() != "const_spec" { continue; } + if let Some(name_node) = spec.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "constant".to_string(), + line: start_line(&spec), + end_line: Some(end_line(&spec)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); } + } +} - "import_declaration" => { - for i in 0..node.child_count() { - if let Some(child) = node.child(i) { - match child.kind() { - "import_spec" => { - extract_go_import_spec(&child, source, symbols); - } - "import_spec_list" => { - for j in 0..child.child_count() { - if let Some(spec) = child.child(j) { - if spec.kind() == "import_spec" { - extract_go_import_spec(&spec, source, symbols); - } - } - } +fn handle_import_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + for i in 0..node.child_count() { + let Some(child) = node.child(i) else { continue }; + match child.kind() { + "import_spec" => { + extract_go_import_spec(&child, source, symbols); + } + "import_spec_list" => { + for j in 0..child.child_count() { + if let Some(spec) = child.child(j) { + if spec.kind() == "import_spec" { + extract_go_import_spec(&spec, source, symbols); } - _ => {} } } } + _ => {} } + } +} - "call_expression" => { - if let Some(fn_node) = node.child_by_field_name("function") { - match fn_node.kind() { - "identifier" => { - symbols.calls.push(Call { - name: node_text(&fn_node, source).to_string(), - line: start_line(node), - dynamic: None, - receiver: None, - }); - } - "selector_expression" => { - if let Some(field) = fn_node.child_by_field_name("field") { - let receiver = fn_node.child_by_field_name("operand") - .map(|op| node_text(&op, source).to_string()); - symbols.calls.push(Call { - name: node_text(&field, source).to_string(), - line: start_line(node), - dynamic: None, - receiver, - }); - } - } - _ => {} - } +fn handle_call_expr(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(fn_node) = node.child_by_field_name("function") else { return }; + match fn_node.kind() { + "identifier" => { + symbols.calls.push(Call { + name: node_text(&fn_node, source).to_string(), + line: start_line(node), + dynamic: None, + receiver: None, + }); + } + "selector_expression" => { + if let Some(field) = fn_node.child_by_field_name("field") { + let receiver = fn_node.child_by_field_name("operand") + .map(|op| node_text(&op, source).to_string()); + symbols.calls.push(Call { + name: node_text(&field, source).to_string(), + line: start_line(node), + dynamic: None, + receiver, + }); } } - _ => {} } } diff --git a/crates/codegraph-core/src/extractors/hcl.rs b/crates/codegraph-core/src/extractors/hcl.rs index a97e785b..98af5ed9 100644 --- a/crates/codegraph-core/src/extractors/hcl.rs +++ b/crates/codegraph-core/src/extractors/hcl.rs @@ -1,7 +1,7 @@ -use tree_sitter::{Node, Tree}; -use crate::types::*; use super::helpers::*; use super::SymbolExtractor; +use crate::types::*; +use tree_sitter::{Node, Tree}; pub struct HclExtractor; @@ -13,52 +13,72 @@ impl SymbolExtractor for HclExtractor { } } -fn match_hcl_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { - if node.kind() == "block" { - let mut identifiers = Vec::new(); - let mut strings = Vec::new(); - - for i in 0..node.child_count() { - if let Some(child) = node.child(i) { - if child.kind() == "identifier" { - identifiers.push(node_text(&child, source).to_string()); - } - if child.kind() == "string_lit" { - strings.push( - node_text(&child, source) - .replace('"', "") - .to_string(), - ); - } +/// Collect identifier and string children from a block node. +fn collect_block_tokens(node: &Node, source: &[u8]) -> (Vec, Vec) { + let mut identifiers = Vec::new(); + let mut strings = Vec::new(); + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + match child.kind() { + "identifier" => identifiers.push(node_text(&child, source).to_string()), + "string_lit" => strings.push(node_text(&child, source).replace('"', "")), + _ => {} } } + } + (identifiers, strings) +} - if !identifiers.is_empty() { - let block_type = &identifiers[0]; - let mut name = String::new(); +/// Resolve the definition name from a block type and its string labels. +fn resolve_block_name(block_type: &str, strings: &[String]) -> String { + match block_type { + "resource" if strings.len() >= 2 => format!("{}.{}", strings[0], strings[1]), + "data" if strings.len() >= 2 => format!("data.{}.{}", strings[0], strings[1]), + "variable" | "output" | "module" if !strings.is_empty() => { + format!("{}.{}", block_type, strings[0]) + } + "locals" => "locals".to_string(), + "terraform" | "provider" if !strings.is_empty() => { + format!("{}.{}", block_type, strings[0]) + } + "terraform" | "provider" => block_type.to_string(), + _ => String::new(), + } +} - match block_type.as_str() { - "resource" if strings.len() >= 2 => { - name = format!("{}.{}", strings[0], strings[1]); - } - "data" if strings.len() >= 2 => { - name = format!("data.{}.{}", strings[0], strings[1]); - } - "variable" | "output" | "module" if !strings.is_empty() => { - name = format!("{}.{}", block_type, strings[0]); +/// Extract module source imports from a module block's body. +fn extract_module_source(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let body = node.children(&mut node.walk()).find(|c| c.kind() == "body"); + let body = match body { + Some(b) => b, + None => return, + }; + for i in 0..body.child_count() { + let attr = match body.child(i) { + Some(a) if a.kind() == "attribute" => a, + _ => continue, + }; + let key = attr.child_by_field_name("key").or_else(|| attr.child(0)); + let val = attr.child_by_field_name("val").or_else(|| attr.child(2)); + if let (Some(key), Some(val)) = (key, val) { + if node_text(&key, source) == "source" { + let src = node_text(&val, source).replace('"', ""); + if src.starts_with("./") || src.starts_with("../") { + symbols + .imports + .push(Import::new(src, vec![], start_line(&attr))); } - "locals" => { - name = "locals".to_string(); - } - "terraform" | "provider" => { - name = block_type.clone(); - if !strings.is_empty() { - name = format!("{}.{}", block_type, strings[0]); - } - } - _ => {} } + } + } +} +fn match_hcl_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { + if node.kind() == "block" { + let (identifiers, strings) = collect_block_tokens(node, source); + if !identifiers.is_empty() { + let block_type = &identifiers[0]; + let name = resolve_block_name(block_type, &strings); if !name.is_empty() { symbols.definitions.push(Definition { name, @@ -70,40 +90,8 @@ fn match_hcl_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: cfg: None, children: None, }); - - // Module source imports if block_type == "module" { - let body = node - .children(&mut node.walk()) - .find(|c| c.kind() == "body"); - if let Some(body) = body { - for i in 0..body.child_count() { - if let Some(attr) = body.child(i) { - if attr.kind() == "attribute" { - let key = attr - .child_by_field_name("key") - .or_else(|| attr.child(0)); - let val = attr - .child_by_field_name("val") - .or_else(|| attr.child(2)); - if let (Some(key), Some(val)) = (key, val) { - if node_text(&key, source) == "source" { - let src = - node_text(&val, source).replace('"', ""); - if src.starts_with("./") || src.starts_with("../") - { - symbols.imports.push(Import::new( - src, - vec![], - start_line(&attr), - )); - } - } - } - } - } - } - } + extract_module_source(node, source, symbols); } } } diff --git a/crates/codegraph-core/src/extractors/helpers.rs b/crates/codegraph-core/src/extractors/helpers.rs index 10342f80..1dccae00 100644 --- a/crates/codegraph-core/src/extractors/helpers.rs +++ b/crates/codegraph-core/src/extractors/helpers.rs @@ -1,5 +1,5 @@ -use tree_sitter::Node; use crate::types::{AstNode, Definition, FileSymbols}; +use tree_sitter::Node; // Re-export so extractors that `use super::helpers::*` still see it. pub use crate::constants::MAX_WALK_DEPTH; @@ -64,6 +64,26 @@ pub fn find_parent_of_types<'a>(node: &Node<'a>, kinds: &[&str]) -> Option Option { + let mut current = node.parent(); + while let Some(parent) = current { + if kinds.contains(&parent.kind()) { + return parent + .child_by_field_name("name") + .map(|n| node_text(&n, source).to_string()); + } + current = parent.parent(); + } + None +} + /// Get the name of a named field child, returning its text. pub fn named_child_text<'a>(node: &Node<'a>, field: &str, source: &'a [u8]) -> Option<&'a str> { node.child_by_field_name(field) diff --git a/crates/codegraph-core/src/extractors/java.rs b/crates/codegraph-core/src/extractors/java.rs index 32ecc2ad..d41554db 100644 --- a/crates/codegraph-core/src/extractors/java.rs +++ b/crates/codegraph-core/src/extractors/java.rs @@ -1,9 +1,9 @@ -use tree_sitter::{Node, Tree}; +use super::helpers::*; +use super::SymbolExtractor; use crate::cfg::build_function_cfg; use crate::complexity::compute_all_metrics; use crate::types::*; -use super::helpers::*; -use super::SymbolExtractor; +use tree_sitter::{Node, Tree}; pub struct JavaExtractor; @@ -63,238 +63,230 @@ fn match_java_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols, _d } } -fn find_java_parent_class<'a>(node: &Node<'a>, source: &[u8]) -> Option { - let mut current = node.parent(); - while let Some(parent) = current { - match parent.kind() { - "class_declaration" | "enum_declaration" | "interface_declaration" => { - return parent - .child_by_field_name("name") - .map(|n| node_text(&n, source).to_string()); - } - _ => {} - } - current = parent.parent(); - } - None +const JAVA_CLASS_KINDS: &[&str] = &["class_declaration", "enum_declaration", "interface_declaration"]; + +fn find_java_parent_class(node: &Node, source: &[u8]) -> Option { + find_enclosing_type_name(node, JAVA_CLASS_KINDS, source) } fn match_java_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { match node.kind() { - "class_declaration" => { - if let Some(name_node) = node.child_by_field_name("name") { - let class_name = node_text(&name_node, source).to_string(); - let children = extract_java_class_fields(node, source); - symbols.definitions.push(Definition { - name: class_name.clone(), - kind: "class".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: None, - cfg: None, - children: opt_children(children), - }); + "class_declaration" => handle_class_decl(node, source, symbols), + "interface_declaration" => handle_interface_decl(node, source, symbols), + "enum_declaration" => handle_enum_decl(node, source, symbols), + "method_declaration" => handle_method_decl(node, source, symbols), + "constructor_declaration" => handle_constructor_decl(node, source, symbols), + "import_declaration" => handle_import_decl(node, source, symbols), + "method_invocation" => handle_method_invocation(node, source, symbols), + "object_creation_expression" => handle_object_creation(node, source, symbols), + _ => {} + } +} - // Superclass - if let Some(superclass) = node.child_by_field_name("superclass") { - for i in 0..superclass.child_count() { - if let Some(child) = superclass.child(i) { - match child.kind() { - "type_identifier" | "identifier" => { - symbols.classes.push(ClassRelation { - name: class_name.clone(), - extends: Some(node_text(&child, source).to_string()), - implements: None, - line: start_line(node), - }); - break; - } - "generic_type" => { - if let Some(first) = child.child(0) { - symbols.classes.push(ClassRelation { - name: class_name.clone(), - extends: Some( - node_text(&first, source).to_string(), - ), - implements: None, - line: start_line(node), - }); - } - break; - } - _ => {} - } - } - } - } +// ── Per-node-kind handlers for walk_node_depth ─────────────────────────────── - // Interfaces - if let Some(interfaces) = node.child_by_field_name("interfaces") { - extract_java_interfaces(&interfaces, &class_name, source, symbols); - } - } - } +fn handle_class_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(name_node) = node.child_by_field_name("name") else { return }; + let class_name = node_text(&name_node, source).to_string(); + let children = extract_java_class_fields(node, source); + symbols.definitions.push(Definition { + name: class_name.clone(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: opt_children(children), + }); - "interface_declaration" => { - if let Some(name_node) = node.child_by_field_name("name") { - let iface_name = node_text(&name_node, source).to_string(); - symbols.definitions.push(Definition { - name: iface_name.clone(), - kind: "interface".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: None, - cfg: None, - children: None, - }); - if let Some(body) = node.child_by_field_name("body") { - for i in 0..body.child_count() { - if let Some(child) = body.child(i) { - if child.kind() == "method_declaration" { - if let Some(meth_name) = child.child_by_field_name("name") { - symbols.definitions.push(Definition { - name: format!( - "{}.{}", - iface_name, - node_text(&meth_name, source) - ), - kind: "method".to_string(), - line: start_line(&child), - end_line: Some(end_line(&child)), - decorators: None, - complexity: compute_all_metrics(&child, source, "java"), - cfg: build_function_cfg(&child, "java", source), - children: None, - }); - } - } - } - } - } - } - } + // Superclass + if let Some(superclass) = node.child_by_field_name("superclass") { + extract_java_superclass(&superclass, &class_name, node, source, symbols); + } - "enum_declaration" => { - if let Some(name_node) = node.child_by_field_name("name") { - let enum_name = node_text(&name_node, source).to_string(); - let children = extract_java_enum_constants(node, source); - symbols.definitions.push(Definition { - name: enum_name, - kind: "enum".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: None, - cfg: None, - children: opt_children(children), - }); - } - } + // Interfaces + if let Some(interfaces) = node.child_by_field_name("interfaces") { + extract_java_interfaces(&interfaces, &class_name, source, symbols); + } +} - "method_declaration" => { - if let Some(name_node) = node.child_by_field_name("name") { - let parent_class = find_java_parent_class(node, source); - let name = node_text(&name_node, source); - let full_name = match &parent_class { - Some(cls) => format!("{}.{}", cls, name), - None => name.to_string(), - }; - let children = extract_java_parameters(node, source); - symbols.definitions.push(Definition { - name: full_name, - kind: "method".to_string(), +fn extract_java_superclass(superclass: &Node, class_name: &str, node: &Node, source: &[u8], symbols: &mut FileSymbols) { + for i in 0..superclass.child_count() { + let Some(child) = superclass.child(i) else { continue }; + match child.kind() { + "type_identifier" | "identifier" => { + symbols.classes.push(ClassRelation { + name: class_name.to_string(), + extends: Some(node_text(&child, source).to_string()), + implements: None, line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: compute_all_metrics(node, source, "java"), - cfg: build_function_cfg(node, "java", source), - children: opt_children(children), }); + break; } + "generic_type" => { + if let Some(first) = child.child(0) { + symbols.classes.push(ClassRelation { + name: class_name.to_string(), + extends: Some(node_text(&first, source).to_string()), + implements: None, + line: start_line(node), + }); + } + break; + } + _ => {} } + } +} - "constructor_declaration" => { - if let Some(name_node) = node.child_by_field_name("name") { - let parent_class = find_java_parent_class(node, source); - let name = node_text(&name_node, source); - let full_name = match &parent_class { - Some(cls) => format!("{}.{}", cls, name), - None => name.to_string(), - }; - let children = extract_java_parameters(node, source); +fn handle_interface_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(name_node) = node.child_by_field_name("name") else { return }; + let iface_name = node_text(&name_node, source).to_string(); + symbols.definitions.push(Definition { + name: iface_name.clone(), + kind: "interface".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); + if let Some(body) = node.child_by_field_name("body") { + for i in 0..body.child_count() { + let Some(child) = body.child(i) else { continue }; + if child.kind() != "method_declaration" { continue; } + if let Some(meth_name) = child.child_by_field_name("name") { symbols.definitions.push(Definition { - name: full_name, + name: format!("{}.{}", iface_name, node_text(&meth_name, source)), kind: "method".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), + line: start_line(&child), + end_line: Some(end_line(&child)), decorators: None, - complexity: compute_all_metrics(node, source, "java"), - cfg: build_function_cfg(node, "java", source), - children: opt_children(children), + complexity: compute_all_metrics(&child, source, "java"), + cfg: build_function_cfg(&child, "java", source), + children: None, }); } } + } +} - "import_declaration" => { - let mut import_path = String::new(); - let mut has_asterisk = false; - for i in 0..node.child_count() { - if let Some(child) = node.child(i) { - if child.kind() == "scoped_identifier" || child.kind() == "identifier" { - import_path = node_text(&child, source).to_string(); - } - if child.kind() == "asterisk" { - has_asterisk = true; - } - } - } - if !import_path.is_empty() { - let names = if has_asterisk { - vec!["*".to_string()] - } else { - let last = import_path.split('.').last().unwrap_or("").to_string(); - vec![last] - }; - let mut imp = Import::new(import_path, names, start_line(node)); - imp.java_import = Some(true); - symbols.imports.push(imp); - } - } +fn handle_enum_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(name_node) = node.child_by_field_name("name") { + let enum_name = node_text(&name_node, source).to_string(); + let children = extract_java_enum_constants(node, source); + symbols.definitions.push(Definition { + name: enum_name, + kind: "enum".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: opt_children(children), + }); + } +} - "method_invocation" => { - if let Some(name_node) = node.child_by_field_name("name") { - let receiver = node.child_by_field_name("object") - .map(|obj| node_text(&obj, source).to_string()); - symbols.calls.push(Call { - name: node_text(&name_node, source).to_string(), - line: start_line(node), - dynamic: None, - receiver, - }); - } - } +fn handle_method_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(name_node) = node.child_by_field_name("name") { + let parent_class = find_java_parent_class(node, source); + let name = node_text(&name_node, source); + let full_name = match &parent_class { + Some(cls) => format!("{}.{}", cls, name), + None => name.to_string(), + }; + let children = extract_java_parameters(node, source); + symbols.definitions.push(Definition { + name: full_name, + kind: "method".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: compute_all_metrics(node, source, "java"), + cfg: build_function_cfg(node, "java", source), + children: opt_children(children), + }); + } +} - "object_creation_expression" => { - if let Some(type_node) = node.child_by_field_name("type") { - let type_name = if type_node.kind() == "generic_type" { - type_node.child(0).map(|n| node_text(&n, source).to_string()) - } else { - Some(node_text(&type_node, source).to_string()) - }; - if let Some(name) = type_name { - symbols.calls.push(Call { - name, - line: start_line(node), - dynamic: None, - receiver: None, - }); - } +fn handle_constructor_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(name_node) = node.child_by_field_name("name") { + let parent_class = find_java_parent_class(node, source); + let name = node_text(&name_node, source); + let full_name = match &parent_class { + Some(cls) => format!("{}.{}", cls, name), + None => name.to_string(), + }; + let children = extract_java_parameters(node, source); + symbols.definitions.push(Definition { + name: full_name, + kind: "method".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: compute_all_metrics(node, source, "java"), + cfg: build_function_cfg(node, "java", source), + children: opt_children(children), + }); + } +} + +fn handle_import_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let mut import_path = String::new(); + let mut has_asterisk = false; + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + if child.kind() == "scoped_identifier" || child.kind() == "identifier" { + import_path = node_text(&child, source).to_string(); + } + if child.kind() == "asterisk" { + has_asterisk = true; } } + } + if !import_path.is_empty() { + let names = if has_asterisk { + vec!["*".to_string()] + } else { + let last = import_path.split('.').last().unwrap_or("").to_string(); + vec![last] + }; + let mut imp = Import::new(import_path, names, start_line(node)); + imp.java_import = Some(true); + symbols.imports.push(imp); + } +} - _ => {} +fn handle_method_invocation(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(name_node) = node.child_by_field_name("name") { + let receiver = node.child_by_field_name("object") + .map(|obj| node_text(&obj, source).to_string()); + symbols.calls.push(Call { + name: node_text(&name_node, source).to_string(), + line: start_line(node), + dynamic: None, + receiver, + }); + } +} + +fn handle_object_creation(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(type_node) = node.child_by_field_name("type") else { return }; + let type_name = if type_node.kind() == "generic_type" { + type_node.child(0).map(|n| node_text(&n, source).to_string()) + } else { + Some(node_text(&type_node, source).to_string()) + }; + if let Some(name) = type_name { + symbols.calls.push(Call { + name, + line: start_line(node), + dynamic: None, + receiver: None, + }); } } diff --git a/crates/codegraph-core/src/extractors/javascript.rs b/crates/codegraph-core/src/extractors/javascript.rs index 05bca14c..f65c8a38 100644 --- a/crates/codegraph-core/src/extractors/javascript.rs +++ b/crates/codegraph-core/src/extractors/javascript.rs @@ -1,9 +1,9 @@ -use tree_sitter::{Node, Tree}; +use super::helpers::*; +use super::SymbolExtractor; use crate::cfg::build_function_cfg; use crate::complexity::compute_all_metrics; use crate::types::*; -use super::helpers::*; -use super::SymbolExtractor; +use tree_sitter::{Node, Tree}; pub struct JsExtractor; @@ -105,382 +105,347 @@ fn match_js_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols, _dep fn match_js_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { match node.kind() { - "function_declaration" => { - if let Some(name_node) = node.child_by_field_name("name") { - let children = extract_js_parameters(node, source); - symbols.definitions.push(Definition { - name: node_text(&name_node, source).to_string(), - kind: "function".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: compute_all_metrics(node, source, "javascript"), - cfg: build_function_cfg(node, "javascript", source), - children: opt_children(children), - }); - } - } + "function_declaration" => handle_function_decl(node, source, symbols), + "class_declaration" => handle_class_decl(node, source, symbols), + "method_definition" => handle_method_def(node, source, symbols), + "interface_declaration" => handle_interface_decl(node, source, symbols), + "type_alias_declaration" => handle_type_alias(node, source, symbols), + "enum_declaration" => handle_enum_decl(node, source, symbols), + "lexical_declaration" | "variable_declaration" => handle_var_decl(node, source, symbols), + "call_expression" => handle_call_expr(node, source, symbols), + "import_statement" => handle_import_stmt(node, source, symbols), + "export_statement" => handle_export_stmt(node, source, symbols), + "expression_statement" => handle_expr_stmt(node, source, symbols), + _ => {} + } +} - "class_declaration" => { - if let Some(name_node) = node.child_by_field_name("name") { - let class_name = node_text(&name_node, source).to_string(); - let children = extract_js_class_properties(node, source); - symbols.definitions.push(Definition { - name: class_name.clone(), - kind: "class".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: None, - cfg: None, - children: opt_children(children), - }); - - // Heritage: extends + implements - let heritage = node - .child_by_field_name("heritage") - .or_else(|| find_child(node, "class_heritage")); - if let Some(heritage) = heritage { - if let Some(super_name) = extract_superclass(&heritage, source) { - symbols.classes.push(ClassRelation { - name: class_name.clone(), - extends: Some(super_name), - implements: None, - line: start_line(node), - }); - } - for iface in extract_implements(&heritage, source) { - symbols.classes.push(ClassRelation { - name: class_name.clone(), - extends: None, - implements: Some(iface), - line: start_line(node), - }); - } - } - } - } +// ── Per-node-kind handlers for walk_node_depth ─────────────────────────────── - "method_definition" => { - if let Some(name_node) = node.child_by_field_name("name") { - let method_name = node_text(&name_node, source); - let parent_class = find_parent_class(node, source); - let full_name = match parent_class { - Some(cls) => format!("{}.{}", cls, method_name), - None => method_name.to_string(), - }; - let children = extract_js_parameters(node, source); - symbols.definitions.push(Definition { - name: full_name, - kind: "method".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: compute_all_metrics(node, source, "javascript"), - cfg: build_function_cfg(node, "javascript", source), - children: opt_children(children), - }); - } - } +fn handle_function_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(name_node) = node.child_by_field_name("name") { + let children = extract_js_parameters(node, source); + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "function".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: compute_all_metrics(node, source, "javascript"), + cfg: build_function_cfg(node, "javascript", source), + children: opt_children(children), + }); + } +} - "interface_declaration" => { - if let Some(name_node) = node.child_by_field_name("name") { - let iface_name = node_text(&name_node, source).to_string(); - symbols.definitions.push(Definition { - name: iface_name.clone(), - kind: "interface".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: None, - cfg: None, - children: None, - }); - // Extract interface methods - let body = node - .child_by_field_name("body") - .or_else(|| find_child(node, "interface_body")) - .or_else(|| find_child(node, "object_type")); - if let Some(body) = body { - extract_interface_methods(&body, &iface_name, source, &mut symbols.definitions); - } - } +fn handle_class_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(name_node) = node.child_by_field_name("name") else { return }; + let class_name = node_text(&name_node, source).to_string(); + let children = extract_js_class_properties(node, source); + symbols.definitions.push(Definition { + name: class_name.clone(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: opt_children(children), + }); + + // Heritage: extends + implements + let heritage = node + .child_by_field_name("heritage") + .or_else(|| find_child(node, "class_heritage")); + if let Some(heritage) = heritage { + if let Some(super_name) = extract_superclass(&heritage, source) { + symbols.classes.push(ClassRelation { + name: class_name.clone(), + extends: Some(super_name), + implements: None, + line: start_line(node), + }); } - - "type_alias_declaration" => { - if let Some(name_node) = node.child_by_field_name("name") { - symbols.definitions.push(Definition { - name: node_text(&name_node, source).to_string(), - kind: "type".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: None, - cfg: None, - children: None, - }); - } + for iface in extract_implements(&heritage, source) { + symbols.classes.push(ClassRelation { + name: class_name.clone(), + extends: None, + implements: Some(iface), + line: start_line(node), + }); } + } +} - "enum_declaration" => { - // TypeScript enum - if let Some(name_node) = node.child_by_field_name("name") { - let enum_name = node_text(&name_node, source).to_string(); - let children = extract_ts_enum_members(node, source); - symbols.definitions.push(Definition { - name: enum_name, - kind: "enum".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: None, - cfg: None, - children: opt_children(children), - }); - } - } +fn handle_method_def(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(name_node) = node.child_by_field_name("name") { + let method_name = node_text(&name_node, source); + let parent_class = find_parent_class(node, source); + let full_name = match parent_class { + Some(cls) => format!("{}.{}", cls, method_name), + None => method_name.to_string(), + }; + let children = extract_js_parameters(node, source); + symbols.definitions.push(Definition { + name: full_name, + kind: "method".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: compute_all_metrics(node, source, "javascript"), + cfg: build_function_cfg(node, "javascript", source), + children: opt_children(children), + }); + } +} - "lexical_declaration" | "variable_declaration" => { - let is_const = node.child(0) - .map(|c| node_text(&c, source) == "const") - .unwrap_or(false); - for i in 0..node.child_count() { - if let Some(declarator) = node.child(i) { - if declarator.kind() == "variable_declarator" { - let name_n = declarator.child_by_field_name("name"); - let value_n = declarator.child_by_field_name("value"); - if let (Some(name_n), Some(value_n)) = (name_n, value_n) { - let vt = value_n.kind(); - if vt == "arrow_function" - || vt == "function_expression" - || vt == "function" - { - let children = extract_js_parameters(&value_n, source); - symbols.definitions.push(Definition { - name: node_text(&name_n, source).to_string(), - kind: "function".to_string(), - line: start_line(node), - end_line: Some(end_line(&value_n)), - decorators: None, - complexity: compute_all_metrics(&value_n, source, "javascript"), - cfg: build_function_cfg(&value_n, "javascript", source), - children: opt_children(children), - }); - } else if is_const && is_js_literal(&value_n) - && find_parent_of_types(node, &[ - "function_declaration", "arrow_function", - "function_expression", "method_definition", - "generator_function_declaration", "generator_function", - ]).is_none() - { - symbols.definitions.push(Definition { - name: node_text(&name_n, source).to_string(), - kind: "constant".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: None, - cfg: None, - children: None, - }); - } - } - } - } - } +fn handle_interface_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(name_node) = node.child_by_field_name("name") else { return }; + let iface_name = node_text(&name_node, source).to_string(); + symbols.definitions.push(Definition { + name: iface_name.clone(), + kind: "interface".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); + // Extract interface methods + let body = node + .child_by_field_name("body") + .or_else(|| find_child(node, "interface_body")) + .or_else(|| find_child(node, "object_type")); + if let Some(body) = body { + extract_interface_methods(&body, &iface_name, source, &mut symbols.definitions); + } +} + +fn handle_type_alias(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(name_node) = node.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "type".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); + } +} + +fn handle_enum_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(name_node) = node.child_by_field_name("name") { + let enum_name = node_text(&name_node, source).to_string(); + let children = extract_ts_enum_members(node, source); + symbols.definitions.push(Definition { + name: enum_name, + kind: "enum".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: opt_children(children), + }); + } +} + +fn handle_var_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let is_const = node.child(0) + .map(|c| node_text(&c, source) == "const") + .unwrap_or(false); + for i in 0..node.child_count() { + let Some(declarator) = node.child(i) else { continue }; + if declarator.kind() != "variable_declarator" { continue; } + let name_n = declarator.child_by_field_name("name"); + let value_n = declarator.child_by_field_name("value"); + let (Some(name_n), Some(value_n)) = (name_n, value_n) else { continue }; + let vt = value_n.kind(); + if vt == "arrow_function" || vt == "function_expression" || vt == "function" { + let children = extract_js_parameters(&value_n, source); + symbols.definitions.push(Definition { + name: node_text(&name_n, source).to_string(), + kind: "function".to_string(), + line: start_line(node), + end_line: Some(end_line(&value_n)), + decorators: None, + complexity: compute_all_metrics(&value_n, source, "javascript"), + cfg: build_function_cfg(&value_n, "javascript", source), + children: opt_children(children), + }); + } else if is_const && is_js_literal(&value_n) + && find_parent_of_types(node, &[ + "function_declaration", "arrow_function", + "function_expression", "method_definition", + "generator_function_declaration", "generator_function", + ]).is_none() + { + symbols.definitions.push(Definition { + name: node_text(&name_n, source).to_string(), + kind: "constant".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); } + } +} - "call_expression" => { - if let Some(fn_node) = node.child_by_field_name("function") { - // Detect dynamic import() expressions - if fn_node.kind() == "import" { - if let Some(args) = node.child_by_field_name("arguments") - .or_else(|| find_child(node, "arguments")) - { - if let Some(str_node) = find_child(&args, "string") - .or_else(|| find_child(&args, "template_string")) - { - let mod_path = node_text(&str_node, source) - .replace(&['\'', '"', '`'][..], ""); - let names = extract_dynamic_import_names(node, source); - let mut imp = Import::new(mod_path, names, start_line(node)); - imp.dynamic_import = Some(true); - symbols.imports.push(imp); - } - } - } else if let Some(call_info) = extract_call_info(&fn_node, node, source) { - symbols.calls.push(call_info); - } - } - if let Some(cb_def) = extract_callback_definition(node, source) { - symbols.definitions.push(cb_def); - } +fn handle_call_expr(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(fn_node) = node.child_by_field_name("function") { + if fn_node.kind() == "import" { + handle_dynamic_import(node, &fn_node, source, symbols); + } else if let Some(call_info) = extract_call_info(&fn_node, node, source) { + symbols.calls.push(call_info); } + } + if let Some(cb_def) = extract_callback_definition(node, source) { + symbols.definitions.push(cb_def); + } +} - "import_statement" => { - let text = node_text(node, source); - let is_type_only = text.starts_with("import type"); - let source_node = node - .child_by_field_name("source") - .or_else(|| find_child(node, "string")); - if let Some(source_node) = source_node { - let mod_path = node_text(&source_node, source) - .replace(&['\'', '"'][..], ""); - let names = extract_import_names(node, source); - let mut imp = Import::new(mod_path, names, start_line(node)); - if is_type_only { - imp.type_only = Some(true); - } - symbols.imports.push(imp); - } +fn handle_dynamic_import(node: &Node, _fn_node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let args = node.child_by_field_name("arguments") + .or_else(|| find_child(node, "arguments")); + let Some(args) = args else { return }; + let str_node = find_child(&args, "string") + .or_else(|| find_child(&args, "template_string")); + if let Some(str_node) = str_node { + let mod_path = node_text(&str_node, source) + .replace(&['\'', '"', '`'][..], ""); + let names = extract_dynamic_import_names(node, source); + let mut imp = Import::new(mod_path, names, start_line(node)); + imp.dynamic_import = Some(true); + symbols.imports.push(imp); + } +} + +fn handle_import_stmt(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let text = node_text(node, source); + let is_type_only = text.starts_with("import type"); + let source_node = node + .child_by_field_name("source") + .or_else(|| find_child(node, "string")); + if let Some(source_node) = source_node { + let mod_path = node_text(&source_node, source) + .replace(&['\'', '"'][..], ""); + let names = extract_import_names(node, source); + let mut imp = Import::new(mod_path, names, start_line(node)); + if is_type_only { + imp.type_only = Some(true); } + symbols.imports.push(imp); + } +} - "export_statement" => { - let decl = node.child_by_field_name("declaration"); - if let Some(decl) = &decl { - match decl.kind() { - "function_declaration" => { - if let Some(n) = decl.child_by_field_name("name") { - symbols.exports.push(ExportInfo { - name: node_text(&n, source).to_string(), - kind: "function".to_string(), - line: start_line(node), - }); - } - } - "class_declaration" => { - if let Some(n) = decl.child_by_field_name("name") { - symbols.exports.push(ExportInfo { - name: node_text(&n, source).to_string(), - kind: "class".to_string(), - line: start_line(node), - }); - } - } - "interface_declaration" => { - if let Some(n) = decl.child_by_field_name("name") { - symbols.exports.push(ExportInfo { - name: node_text(&n, source).to_string(), - kind: "interface".to_string(), - line: start_line(node), - }); - } - } - "type_alias_declaration" => { - if let Some(n) = decl.child_by_field_name("name") { - symbols.exports.push(ExportInfo { - name: node_text(&n, source).to_string(), - kind: "type".to_string(), - line: start_line(node), - }); - } - } - _ => {} - } - } - let source_node = node - .child_by_field_name("source") - .or_else(|| find_child(node, "string")); - if source_node.is_some() && decl.is_none() { - let source_node = source_node.unwrap(); - let mod_path = node_text(&source_node, source) +fn handle_export_stmt(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let decl = node.child_by_field_name("declaration"); + if let Some(decl) = &decl { + handle_export_declaration(node, decl, source, symbols); + } + let source_node = node + .child_by_field_name("source") + .or_else(|| find_child(node, "string")); + if source_node.is_some() && decl.is_none() { + handle_reexport(node, &source_node.unwrap(), source, symbols); + } +} + +fn handle_export_declaration(node: &Node, decl: &Node, source: &[u8], symbols: &mut FileSymbols) { + let (kind_str, field) = match decl.kind() { + "function_declaration" => ("function", "name"), + "class_declaration" => ("class", "name"), + "interface_declaration" => ("interface", "name"), + "type_alias_declaration" => ("type", "name"), + _ => return, + }; + if let Some(n) = decl.child_by_field_name(field) { + symbols.exports.push(ExportInfo { + name: node_text(&n, source).to_string(), + kind: kind_str.to_string(), + line: start_line(node), + }); + } +} + +fn handle_reexport(node: &Node, source_node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let mod_path = node_text(source_node, source) + .replace(&['\'', '"'][..], ""); + let reexport_names = extract_import_names(node, source); + let text = node_text(node, source); + let is_wildcard = text.contains("export *") || text.contains("export*"); + let mut imp = Import::new(mod_path, reexport_names.clone(), start_line(node)); + imp.reexport = Some(true); + if is_wildcard && reexport_names.is_empty() { + imp.wildcard_reexport = Some(true); + } + symbols.imports.push(imp); +} + +fn handle_expr_stmt(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(expr) = node.child(0) else { return }; + if expr.kind() != "assignment_expression" { return; } + let left = expr.child_by_field_name("left"); + let right = expr.child_by_field_name("right"); + let (Some(left), Some(right)) = (left, right) else { return }; + let left_text = node_text(&left, source); + if !left_text.starts_with("module.exports") && left_text != "exports" { return; } + if right.kind() == "call_expression" { + handle_require_reexport(&right, node, source, symbols); + } + if right.kind() == "object" { + handle_spread_require_reexports(&right, node, source, symbols); + } +} + +fn handle_require_reexport(right: &Node, node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let fn_node = right.child_by_field_name("function"); + let args = right + .child_by_field_name("arguments") + .or_else(|| find_child(right, "arguments")); + if let (Some(fn_node), Some(args)) = (fn_node, args) { + if node_text(&fn_node, source) == "require" { + if let Some(str_arg) = find_child(&args, "string") { + let mod_path = node_text(&str_arg, source) .replace(&['\'', '"'][..], ""); - let reexport_names = extract_import_names(node, source); - let text = node_text(node, source); - let is_wildcard = - text.contains("export *") || text.contains("export*"); - let mut imp = Import::new(mod_path, reexport_names.clone(), start_line(node)); + let mut imp = Import::new(mod_path, vec![], start_line(node)); imp.reexport = Some(true); - if is_wildcard && reexport_names.is_empty() { - imp.wildcard_reexport = Some(true); - } + imp.wildcard_reexport = Some(true); symbols.imports.push(imp); } } + } +} - "expression_statement" => { - if let Some(expr) = node.child(0) { - if expr.kind() == "assignment_expression" { - let left = expr.child_by_field_name("left"); - let right = expr.child_by_field_name("right"); - if let (Some(left), Some(right)) = (left, right) { - let left_text = node_text(&left, source); - if left_text.starts_with("module.exports") || left_text == "exports" { - if right.kind() == "call_expression" { - let fn_node = right.child_by_field_name("function"); - let args = right - .child_by_field_name("arguments") - .or_else(|| find_child(&right, "arguments")); - if let (Some(fn_node), Some(args)) = (fn_node, args) { - if node_text(&fn_node, source) == "require" { - if let Some(str_arg) = find_child(&args, "string") { - let mod_path = node_text(&str_arg, source) - .replace(&['\'', '"'][..], ""); - let mut imp = - Import::new(mod_path, vec![], start_line(node)); - imp.reexport = Some(true); - imp.wildcard_reexport = Some(true); - symbols.imports.push(imp); - } - } - } - } - if right.kind() == "object" { - for ci in 0..right.child_count() { - if let Some(child) = right.child(ci) { - if child.kind() == "spread_element" { - let spread_expr = child - .child(1) - .or_else(|| child.child_by_field_name("value")); - if let Some(spread_expr) = spread_expr { - if spread_expr.kind() == "call_expression" { - let fn2 = spread_expr - .child_by_field_name("function"); - let args2 = spread_expr - .child_by_field_name("arguments") - .or_else(|| { - find_child( - &spread_expr, - "arguments", - ) - }); - if let (Some(fn2), Some(args2)) = - (fn2, args2) - { - if node_text(&fn2, source) == "require" { - if let Some(str_arg2) = - find_child(&args2, "string") - { - let mod_path2 = - node_text(&str_arg2, source) - .replace( - &['\'', '"'][..], - "", - ); - let mut imp = Import::new( - mod_path2, - vec![], - start_line(node), - ); - imp.reexport = Some(true); - imp.wildcard_reexport = Some(true); - symbols.imports.push(imp); - } - } - } - } - } - } - } - } - } - } - } - } - } +fn handle_spread_require_reexports(right: &Node, node: &Node, source: &[u8], symbols: &mut FileSymbols) { + for ci in 0..right.child_count() { + let Some(child) = right.child(ci) else { continue }; + if child.kind() != "spread_element" { continue; } + let spread_expr = child.child(1) + .or_else(|| child.child_by_field_name("value")); + let Some(spread_expr) = spread_expr else { continue }; + if spread_expr.kind() != "call_expression" { continue; } + let fn2 = spread_expr.child_by_field_name("function"); + let args2 = spread_expr + .child_by_field_name("arguments") + .or_else(|| find_child(&spread_expr, "arguments")); + let (Some(fn2), Some(args2)) = (fn2, args2) else { continue }; + if node_text(&fn2, source) != "require" { continue; } + if let Some(str_arg2) = find_child(&args2, "string") { + let mod_path2 = node_text(&str_arg2, source) + .replace(&['\'', '"'][..], ""); + let mut imp = Import::new(mod_path2, vec![], start_line(node)); + imp.reexport = Some(true); + imp.wildcard_reexport = Some(true); + symbols.imports.push(imp); } - - _ => {} } } @@ -1153,18 +1118,10 @@ fn extract_superclass(heritage: &Node, source: &[u8]) -> Option { None } -fn find_parent_class<'a>(node: &Node<'a>, source: &[u8]) -> Option { - let mut current = node.parent(); - while let Some(parent) = current { - if parent.kind() == "class_declaration" || parent.kind() == "class" { - if let Some(name_node) = parent.child_by_field_name("name") { - return Some(node_text(&name_node, source).to_string()); - } - return None; - } - current = parent.parent(); - } - None +const JS_CLASS_KINDS: &[&str] = &["class_declaration", "class"]; + +fn find_parent_class(node: &Node, source: &[u8]) -> Option { + find_enclosing_type_name(node, JS_CLASS_KINDS, source) } /// Extract named bindings from a dynamic `import()` call expression. diff --git a/crates/codegraph-core/src/extractors/php.rs b/crates/codegraph-core/src/extractors/php.rs index a30b4635..d68ef68a 100644 --- a/crates/codegraph-core/src/extractors/php.rs +++ b/crates/codegraph-core/src/extractors/php.rs @@ -1,9 +1,9 @@ -use tree_sitter::{Node, Tree}; +use super::helpers::*; +use super::SymbolExtractor; use crate::cfg::build_function_cfg; use crate::complexity::compute_all_metrics; use crate::types::*; -use super::helpers::*; -use super::SymbolExtractor; +use tree_sitter::{Node, Tree}; pub struct PhpExtractor; @@ -17,295 +17,281 @@ impl SymbolExtractor for PhpExtractor { } } -fn find_php_parent_class<'a>(node: &Node<'a>, source: &[u8]) -> Option { - let mut current = node.parent(); - while let Some(parent) = current { - match parent.kind() { - "class_declaration" | "trait_declaration" | "enum_declaration" => { - return parent - .child_by_field_name("name") - .map(|n| node_text(&n, source).to_string()); - } - _ => {} - } - current = parent.parent(); - } - None +const PHP_CLASS_KINDS: &[&str] = &["class_declaration", "trait_declaration", "enum_declaration"]; + +fn find_php_parent_class(node: &Node, source: &[u8]) -> Option { + find_enclosing_type_name(node, PHP_CLASS_KINDS, source) } fn match_php_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { match node.kind() { - "function_definition" => { - if let Some(name_node) = node.child_by_field_name("name") { - let children = extract_php_parameters(node, source); - symbols.definitions.push(Definition { - name: node_text(&name_node, source).to_string(), - kind: "function".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: compute_all_metrics(node, source, "php"), - cfg: build_function_cfg(node, "php", source), - children: opt_children(children), - }); - } - } + "function_definition" => handle_function_def(node, source, symbols), + "class_declaration" => handle_class_decl(node, source, symbols), + "interface_declaration" => handle_interface_decl(node, source, symbols), + "trait_declaration" => handle_trait_decl(node, source, symbols), + "enum_declaration" => handle_enum_decl(node, source, symbols), + "method_declaration" => handle_method_decl(node, source, symbols), + "namespace_use_declaration" => handle_namespace_use(node, source, symbols), + "function_call_expression" => handle_function_call(node, source, symbols), + "member_call_expression" => handle_member_call(node, source, symbols), + "scoped_call_expression" => handle_scoped_call(node, source, symbols), + "object_creation_expression" => handle_object_creation(node, source, symbols), + _ => {} + } +} - "class_declaration" => { - if let Some(name_node) = node.child_by_field_name("name") { - let class_name = node_text(&name_node, source).to_string(); - let children = extract_php_class_properties(node, source); - symbols.definitions.push(Definition { - name: class_name.clone(), - kind: "class".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: None, - cfg: None, - children: opt_children(children), - }); +// ── Per-node-kind handlers for walk_node_depth ─────────────────────────────── - // Extends - let base_clause = node - .child_by_field_name("base_clause") - .or_else(|| find_child(node, "base_clause")); - if let Some(base_clause) = base_clause { - for i in 0..base_clause.child_count() { - if let Some(child) = base_clause.child(i) { - if child.kind() == "name" || child.kind() == "qualified_name" { - symbols.classes.push(ClassRelation { - name: class_name.clone(), - extends: Some(node_text(&child, source).to_string()), - implements: None, - line: start_line(node), - }); - break; - } - } - } - } +fn handle_function_def(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(name_node) = node.child_by_field_name("name") { + let children = extract_php_parameters(node, source); + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "function".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: compute_all_metrics(node, source, "php"), + cfg: build_function_cfg(node, "php", source), + children: opt_children(children), + }); + } +} - // Implements - let interface_clause = find_child(node, "class_interface_clause"); - if let Some(interface_clause) = interface_clause { - for i in 0..interface_clause.child_count() { - if let Some(child) = interface_clause.child(i) { - if child.kind() == "name" || child.kind() == "qualified_name" { - symbols.classes.push(ClassRelation { - name: class_name.clone(), - extends: None, - implements: Some(node_text(&child, source).to_string()), - line: start_line(node), - }); - } - } - } +fn handle_class_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(name_node) = node.child_by_field_name("name") else { return }; + let class_name = node_text(&name_node, source).to_string(); + let children = extract_php_class_properties(node, source); + symbols.definitions.push(Definition { + name: class_name.clone(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: opt_children(children), + }); + + // Extends + let base_clause = node + .child_by_field_name("base_clause") + .or_else(|| find_child(node, "base_clause")); + if let Some(base_clause) = base_clause { + for i in 0..base_clause.child_count() { + if let Some(child) = base_clause.child(i) { + if child.kind() == "name" || child.kind() == "qualified_name" { + symbols.classes.push(ClassRelation { + name: class_name.clone(), + extends: Some(node_text(&child, source).to_string()), + implements: None, + line: start_line(node), + }); + break; } } } + } - "interface_declaration" => { - if let Some(name_node) = node.child_by_field_name("name") { - let iface_name = node_text(&name_node, source).to_string(); - symbols.definitions.push(Definition { - name: iface_name.clone(), - kind: "interface".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: None, - cfg: None, - children: None, - }); - if let Some(body) = node.child_by_field_name("body") { - for i in 0..body.child_count() { - if let Some(child) = body.child(i) { - if child.kind() == "method_declaration" { - if let Some(meth_name) = child.child_by_field_name("name") { - symbols.definitions.push(Definition { - name: format!( - "{}.{}", - iface_name, - node_text(&meth_name, source) - ), - kind: "method".to_string(), - line: start_line(&child), - end_line: Some(end_line(&child)), - decorators: None, - complexity: compute_all_metrics(&child, source, "php"), - cfg: build_function_cfg(&child, "php", source), - children: None, - }); - } - } - } - } + // Implements + let interface_clause = find_child(node, "class_interface_clause"); + if let Some(interface_clause) = interface_clause { + for i in 0..interface_clause.child_count() { + if let Some(child) = interface_clause.child(i) { + if child.kind() == "name" || child.kind() == "qualified_name" { + symbols.classes.push(ClassRelation { + name: class_name.clone(), + extends: None, + implements: Some(node_text(&child, source).to_string()), + line: start_line(node), + }); } } } + } +} - "trait_declaration" => { - if let Some(name_node) = node.child_by_field_name("name") { +fn handle_interface_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(name_node) = node.child_by_field_name("name") else { return }; + let iface_name = node_text(&name_node, source).to_string(); + symbols.definitions.push(Definition { + name: iface_name.clone(), + kind: "interface".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); + if let Some(body) = node.child_by_field_name("body") { + for i in 0..body.child_count() { + let Some(child) = body.child(i) else { continue }; + if child.kind() != "method_declaration" { continue; } + if let Some(meth_name) = child.child_by_field_name("name") { symbols.definitions.push(Definition { - name: node_text(&name_node, source).to_string(), - kind: "trait".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), + name: format!("{}.{}", iface_name, node_text(&meth_name, source)), + kind: "method".to_string(), + line: start_line(&child), + end_line: Some(end_line(&child)), decorators: None, - complexity: None, - cfg: None, + complexity: compute_all_metrics(&child, source, "php"), + cfg: build_function_cfg(&child, "php", source), children: None, }); } } + } +} - "enum_declaration" => { - if let Some(name_node) = node.child_by_field_name("name") { - let enum_name = node_text(&name_node, source).to_string(); - let children = extract_php_enum_cases(node, source); - symbols.definitions.push(Definition { - name: enum_name, - kind: "enum".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: None, - cfg: None, - children: opt_children(children), - }); - } - } +fn handle_trait_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(name_node) = node.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "trait".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); + } +} - "method_declaration" => { - if let Some(name_node) = node.child_by_field_name("name") { - let parent_class = find_php_parent_class(node, source); - let name = node_text(&name_node, source); - let full_name = match &parent_class { - Some(cls) => format!("{}.{}", cls, name), - None => name.to_string(), - }; - let children = extract_php_parameters(node, source); - symbols.definitions.push(Definition { - name: full_name, - kind: "method".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: compute_all_metrics(node, source, "php"), - cfg: build_function_cfg(node, "php", source), - children: opt_children(children), - }); - } - } +fn handle_enum_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(name_node) = node.child_by_field_name("name") { + let enum_name = node_text(&name_node, source).to_string(); + let children = extract_php_enum_cases(node, source); + symbols.definitions.push(Definition { + name: enum_name, + kind: "enum".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: opt_children(children), + }); + } +} - "namespace_use_declaration" => { - for i in 0..node.child_count() { - if let Some(child) = node.child(i) { - if child.kind() == "namespace_use_clause" { - let name_node = find_child(&child, "qualified_name") - .or_else(|| find_child(&child, "name")); - if let Some(name_node) = name_node { - let full_path = node_text(&name_node, source).to_string(); - let last_name = full_path.split('\\').last().unwrap_or("").to_string(); - let alias = child.child_by_field_name("alias"); - let alias_text = alias - .map(|a| node_text(&a, source).to_string()) - .unwrap_or(last_name); - let mut imp = - Import::new(full_path, vec![alias_text], start_line(node)); - imp.php_use = Some(true); - symbols.imports.push(imp); - } - } - // Single use clause without wrapper - if child.kind() == "qualified_name" || child.kind() == "name" { - let full_path = node_text(&child, source).to_string(); - let last_name = full_path.split('\\').last().unwrap_or("").to_string(); - let mut imp = - Import::new(full_path, vec![last_name], start_line(node)); - imp.php_use = Some(true); - symbols.imports.push(imp); - } - } - } - } +fn handle_method_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(name_node) = node.child_by_field_name("name") { + let parent_class = find_php_parent_class(node, source); + let name = node_text(&name_node, source); + let full_name = match &parent_class { + Some(cls) => format!("{}.{}", cls, name), + None => name.to_string(), + }; + let children = extract_php_parameters(node, source); + symbols.definitions.push(Definition { + name: full_name, + kind: "method".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: compute_all_metrics(node, source, "php"), + cfg: build_function_cfg(node, "php", source), + children: opt_children(children), + }); + } +} - "function_call_expression" => { - let fn_node = node - .child_by_field_name("function") - .or_else(|| node.child(0)); - if let Some(fn_node) = fn_node { - match fn_node.kind() { - "name" | "identifier" => { - symbols.calls.push(Call { - name: node_text(&fn_node, source).to_string(), - line: start_line(node), - dynamic: None, - receiver: None, - }); - } - "qualified_name" => { - let text = node_text(&fn_node, source); - let last = text.split('\\').last().unwrap_or(""); - symbols.calls.push(Call { - name: last.to_string(), - line: start_line(node), - dynamic: None, - receiver: None, - }); - } - _ => {} - } +fn handle_namespace_use(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + for i in 0..node.child_count() { + let Some(child) = node.child(i) else { continue }; + if child.kind() == "namespace_use_clause" { + let name_node = find_child(&child, "qualified_name") + .or_else(|| find_child(&child, "name")); + if let Some(name_node) = name_node { + let full_path = node_text(&name_node, source).to_string(); + let last_name = full_path.split('\\').last().unwrap_or("").to_string(); + let alias = child.child_by_field_name("alias"); + let alias_text = alias + .map(|a| node_text(&a, source).to_string()) + .unwrap_or(last_name); + let mut imp = Import::new(full_path, vec![alias_text], start_line(node)); + imp.php_use = Some(true); + symbols.imports.push(imp); } } - - "member_call_expression" => { - if let Some(name) = node.child_by_field_name("name") { - let receiver = node.child_by_field_name("object") - .map(|obj| node_text(&obj, source).to_string()); - symbols.calls.push(Call { - name: node_text(&name, source).to_string(), - line: start_line(node), - dynamic: None, - receiver, - }); - } + // Single use clause without wrapper + if child.kind() == "qualified_name" || child.kind() == "name" { + let full_path = node_text(&child, source).to_string(); + let last_name = full_path.split('\\').last().unwrap_or("").to_string(); + let mut imp = Import::new(full_path, vec![last_name], start_line(node)); + imp.php_use = Some(true); + symbols.imports.push(imp); } + } +} - "scoped_call_expression" => { - if let Some(name) = node.child_by_field_name("name") { - let receiver = node.child_by_field_name("scope") - .map(|s| node_text(&s, source).to_string()); - symbols.calls.push(Call { - name: node_text(&name, source).to_string(), - line: start_line(node), - dynamic: None, - receiver, - }); - } +fn handle_function_call(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let fn_node = node.child_by_field_name("function").or_else(|| node.child(0)); + let Some(fn_node) = fn_node else { return }; + match fn_node.kind() { + "name" | "identifier" => { + symbols.calls.push(Call { + name: node_text(&fn_node, source).to_string(), + line: start_line(node), + dynamic: None, + receiver: None, + }); } - - "object_creation_expression" => { - // Skip 'new' keyword (child 0) and get class node (child 1) - if let Some(class_node) = node.child(1) { - if class_node.kind() == "name" || class_node.kind() == "qualified_name" { - let text = node_text(&class_node, source); - let last = text.split('\\').last().unwrap_or(""); - symbols.calls.push(Call { - name: last.to_string(), - line: start_line(node), - dynamic: None, - receiver: None, - }); - } - } + "qualified_name" => { + let text = node_text(&fn_node, source); + let last = text.split('\\').last().unwrap_or(""); + symbols.calls.push(Call { + name: last.to_string(), + line: start_line(node), + dynamic: None, + receiver: None, + }); } - _ => {} } } +fn handle_member_call(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(name) = node.child_by_field_name("name") { + let receiver = node.child_by_field_name("object") + .map(|obj| node_text(&obj, source).to_string()); + symbols.calls.push(Call { + name: node_text(&name, source).to_string(), + line: start_line(node), + dynamic: None, + receiver, + }); + } +} + +fn handle_scoped_call(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(name) = node.child_by_field_name("name") { + let receiver = node.child_by_field_name("scope") + .map(|s| node_text(&s, source).to_string()); + symbols.calls.push(Call { + name: node_text(&name, source).to_string(), + line: start_line(node), + dynamic: None, + receiver, + }); + } +} + +fn handle_object_creation(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(class_node) = node.child(1) else { return }; + if class_node.kind() != "name" && class_node.kind() != "qualified_name" { return; } + let text = node_text(&class_node, source); + let last = text.split('\\').last().unwrap_or(""); + symbols.calls.push(Call { + name: last.to_string(), + line: start_line(node), + dynamic: None, + receiver: None, + }); +} + // ── Extended kinds helpers ────────────────────────────────────────────────── fn extract_php_parameters(node: &Node, source: &[u8]) -> Vec { diff --git a/crates/codegraph-core/src/extractors/python.rs b/crates/codegraph-core/src/extractors/python.rs index 7a88e2c3..eb2eab2f 100644 --- a/crates/codegraph-core/src/extractors/python.rs +++ b/crates/codegraph-core/src/extractors/python.rs @@ -19,186 +19,177 @@ impl SymbolExtractor for PythonExtractor { fn match_python_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { match node.kind() { - "function_definition" => { - if let Some(name_node) = node.child_by_field_name("name") { - let name_text = node_text(&name_node, source); - let mut decorators = Vec::new(); - if let Some(prev) = node.prev_sibling() { - if prev.kind() == "decorator" { - decorators.push(node_text(&prev, source).to_string()); - } - } - let parent_class = find_python_parent_class(node, source); - let (full_name, kind) = match &parent_class { - Some(cls) => (format!("{}.{}", cls, name_text), "method".to_string()), - None => (name_text.to_string(), "function".to_string()), - }; - let children = extract_python_parameters(node, source, parent_class.is_some()); - symbols.definitions.push(Definition { - name: full_name, - kind, - line: start_line(node), - end_line: Some(end_line(node)), - decorators: if decorators.is_empty() { - None - } else { - Some(decorators) - }, - complexity: compute_all_metrics(node, source, "python"), - cfg: build_function_cfg(node, "python", source), - children: opt_children(children), - }); - } - } + "function_definition" => handle_function_def(node, source, symbols), + "class_definition" => handle_class_def(node, source, symbols), + "expression_statement" => handle_expr_stmt(node, source, symbols), + "call" => handle_call(node, source, symbols), + "import_statement" => handle_import_stmt(node, source, symbols), + "import_from_statement" => handle_import_from_stmt(node, source, symbols), + _ => {} + } +} - "class_definition" => { - if let Some(name_node) = node.child_by_field_name("name") { - let class_name = node_text(&name_node, source).to_string(); - let children = extract_python_class_properties(node, source); - symbols.definitions.push(Definition { - name: class_name.clone(), - kind: "class".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: None, - cfg: None, - children: opt_children(children), - }); - let superclasses = node - .child_by_field_name("superclasses") - .or_else(|| find_child(node, "argument_list")); - if let Some(superclasses) = superclasses { - for i in 0..superclasses.child_count() { - if let Some(child) = superclasses.child(i) { - if child.kind() == "identifier" { - symbols.classes.push(ClassRelation { - name: class_name.clone(), - extends: Some(node_text(&child, source).to_string()), - implements: None, - line: start_line(node), - }); - } - } - } - } - } - } +// ── Per-node-kind handlers for walk_node_depth ─────────────────────────────── - "expression_statement" => { - // Module-level UPPER_CASE = literal → constant - if is_module_level(node) { - if let Some(expr) = node.child(0) { - if expr.kind() == "assignment" { - if let Some(left) = expr.child_by_field_name("left") { - if left.kind() == "identifier" { - let name = node_text(&left, source); - if is_upper_snake_case(name) { - symbols.definitions.push(Definition { - name: name.to_string(), - kind: "constant".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: None, - cfg: None, - children: None, - }); - } - } - } - } - } - } +fn handle_function_def(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(name_node) = node.child_by_field_name("name") else { return }; + let name_text = node_text(&name_node, source); + let mut decorators = Vec::new(); + if let Some(prev) = node.prev_sibling() { + if prev.kind() == "decorator" { + decorators.push(node_text(&prev, source).to_string()); } + } + let parent_class = find_python_parent_class(node, source); + let (full_name, kind) = match &parent_class { + Some(cls) => (format!("{}.{}", cls, name_text), "method".to_string()), + None => (name_text.to_string(), "function".to_string()), + }; + let children = extract_python_parameters(node, source, parent_class.is_some()); + symbols.definitions.push(Definition { + name: full_name, + kind, + line: start_line(node), + end_line: Some(end_line(node)), + decorators: if decorators.is_empty() { None } else { Some(decorators) }, + complexity: compute_all_metrics(node, source, "python"), + cfg: build_function_cfg(node, "python", source), + children: opt_children(children), + }); +} - "call" => { - if let Some(fn_node) = node.child_by_field_name("function") { - let (call_name, receiver) = match fn_node.kind() { - "identifier" => (Some(node_text(&fn_node, source).to_string()), None), - "attribute" => { - let name = fn_node - .child_by_field_name("attribute") - .map(|a| node_text(&a, source).to_string()); - let recv = fn_node.child_by_field_name("object") - .map(|obj| node_text(&obj, source).to_string()); - (name, recv) - } - _ => (None, None), - }; - if let Some(name) = call_name { - symbols.calls.push(Call { - name, +fn handle_class_def(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(name_node) = node.child_by_field_name("name") else { return }; + let class_name = node_text(&name_node, source).to_string(); + let children = extract_python_class_properties(node, source); + symbols.definitions.push(Definition { + name: class_name.clone(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: opt_children(children), + }); + let superclasses = node + .child_by_field_name("superclasses") + .or_else(|| find_child(node, "argument_list")); + if let Some(superclasses) = superclasses { + for i in 0..superclasses.child_count() { + if let Some(child) = superclasses.child(i) { + if child.kind() == "identifier" { + symbols.classes.push(ClassRelation { + name: class_name.clone(), + extends: Some(node_text(&child, source).to_string()), + implements: None, line: start_line(node), - dynamic: None, - receiver, }); } } } + } +} - "import_statement" => { - let mut names = Vec::new(); - for i in 0..node.child_count() { - if let Some(child) = node.child(i) { - if child.kind() == "dotted_name" || child.kind() == "aliased_import" { - let name = if child.kind() == "aliased_import" { - child - .child_by_field_name("alias") - .or_else(|| child.child_by_field_name("name")) - .map(|n| node_text(&n, source).to_string()) - } else { - Some(node_text(&child, source).to_string()) - }; - if let Some(name) = name { - names.push(name); - } - } - } - } - if !names.is_empty() { - let mut imp = Import::new(names[0].clone(), names, start_line(node)); - imp.python_import = Some(true); - symbols.imports.push(imp); - } +fn handle_expr_stmt(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if !is_module_level(node) { return; } + let Some(expr) = node.child(0) else { return }; + if expr.kind() != "assignment" { return; } + let Some(left) = expr.child_by_field_name("left") else { return }; + if left.kind() != "identifier" { return; } + let name = node_text(&left, source); + if !is_upper_snake_case(name) { return; } + symbols.definitions.push(Definition { + name: name.to_string(), + kind: "constant".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); +} + +fn handle_call(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(fn_node) = node.child_by_field_name("function") else { return }; + let (call_name, receiver) = match fn_node.kind() { + "identifier" => (Some(node_text(&fn_node, source).to_string()), None), + "attribute" => { + let name = fn_node + .child_by_field_name("attribute") + .map(|a| node_text(&a, source).to_string()); + let recv = fn_node.child_by_field_name("object") + .map(|obj| node_text(&obj, source).to_string()); + (name, recv) } + _ => (None, None), + }; + if let Some(name) = call_name { + symbols.calls.push(Call { + name, + line: start_line(node), + dynamic: None, + receiver, + }); + } +} - "import_from_statement" => { - let mut source_str = String::new(); - let mut names = Vec::new(); - for i in 0..node.child_count() { - if let Some(child) = node.child(i) { - match child.kind() { - "dotted_name" | "relative_import" => { - if source_str.is_empty() { - source_str = node_text(&child, source).to_string(); - } else { - names.push(node_text(&child, source).to_string()); - } - } - "aliased_import" => { - let n = child - .child_by_field_name("name") - .or_else(|| child.child(0)); - if let Some(n) = n { - names.push(node_text(&n, source).to_string()); - } - } - "wildcard_import" => { - names.push("*".to_string()); - } - _ => {} - } +fn handle_import_stmt(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let mut names = Vec::new(); + for i in 0..node.child_count() { + let Some(child) = node.child(i) else { continue }; + if child.kind() != "dotted_name" && child.kind() != "aliased_import" { continue; } + let name = if child.kind() == "aliased_import" { + child + .child_by_field_name("alias") + .or_else(|| child.child_by_field_name("name")) + .map(|n| node_text(&n, source).to_string()) + } else { + Some(node_text(&child, source).to_string()) + }; + if let Some(name) = name { + names.push(name); + } + } + if !names.is_empty() { + let mut imp = Import::new(names[0].clone(), names, start_line(node)); + imp.python_import = Some(true); + symbols.imports.push(imp); + } +} + +fn handle_import_from_stmt(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let mut source_str = String::new(); + let mut names = Vec::new(); + for i in 0..node.child_count() { + let Some(child) = node.child(i) else { continue }; + match child.kind() { + "dotted_name" | "relative_import" => { + if source_str.is_empty() { + source_str = node_text(&child, source).to_string(); + } else { + names.push(node_text(&child, source).to_string()); } } - if !source_str.is_empty() { - let mut imp = Import::new(source_str, names, start_line(node)); - imp.python_import = Some(true); - symbols.imports.push(imp); + "aliased_import" => { + let n = child + .child_by_field_name("name") + .or_else(|| child.child(0)); + if let Some(n) = n { + names.push(node_text(&n, source).to_string()); + } } + "wildcard_import" => { + names.push("*".to_string()); + } + _ => {} } - - _ => {} + } + if !source_str.is_empty() { + let mut imp = Import::new(source_str, names, start_line(node)); + imp.python_import = Some(true); + symbols.imports.push(imp); } } @@ -319,17 +310,10 @@ fn is_upper_snake_case(s: &str) -> bool { // ── Existing helpers ──────────────────────────────────────────────────────── -fn find_python_parent_class<'a>(node: &Node<'a>, source: &[u8]) -> Option { - let mut current = node.parent(); - while let Some(parent) = current { - if parent.kind() == "class_definition" { - return parent - .child_by_field_name("name") - .map(|n| node_text(&n, source).to_string()); - } - current = parent.parent(); - } - None +const PYTHON_CLASS_KINDS: &[&str] = &["class_definition"]; + +fn find_python_parent_class(node: &Node, source: &[u8]) -> Option { + find_enclosing_type_name(node, PYTHON_CLASS_KINDS, source) } fn extract_python_type_name<'a>(type_node: &Node<'a>, source: &'a [u8]) -> Option<&'a str> { diff --git a/crates/codegraph-core/src/extractors/ruby.rs b/crates/codegraph-core/src/extractors/ruby.rs index 05400598..b74c5952 100644 --- a/crates/codegraph-core/src/extractors/ruby.rs +++ b/crates/codegraph-core/src/extractors/ruby.rs @@ -1,9 +1,9 @@ -use tree_sitter::{Node, Tree}; +use super::helpers::*; +use super::SymbolExtractor; use crate::cfg::build_function_cfg; use crate::complexity::compute_all_metrics; use crate::types::*; -use super::helpers::*; -use super::SymbolExtractor; +use tree_sitter::{Node, Tree}; pub struct RubyExtractor; @@ -16,162 +16,147 @@ impl SymbolExtractor for RubyExtractor { } } -fn find_ruby_parent_class<'a>(node: &Node<'a>, source: &[u8]) -> Option { - let mut current = node.parent(); - while let Some(parent) = current { - match parent.kind() { - "class" | "module" => { - return parent - .child_by_field_name("name") - .map(|n| node_text(&n, source).to_string()); - } - _ => {} - } - current = parent.parent(); - } - None +const RUBY_CLASS_KINDS: &[&str] = &["class", "module"]; + +fn find_ruby_parent_class(node: &Node, source: &[u8]) -> Option { + find_enclosing_type_name(node, RUBY_CLASS_KINDS, source) } fn match_ruby_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { match node.kind() { - "class" => { - if let Some(name_node) = node.child_by_field_name("name") { - let class_name = node_text(&name_node, source).to_string(); - let children = extract_ruby_class_children(node, source); - symbols.definitions.push(Definition { - name: class_name.clone(), - kind: "class".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: None, - cfg: None, - children: opt_children(children), - }); - if let Some(superclass) = node.child_by_field_name("superclass") { - extract_ruby_superclass(&superclass, &class_name, node, source, symbols); - } - } - } + "class" => handle_class(node, source, symbols), + "module" => handle_module(node, source, symbols), + "method" => handle_method(node, source, symbols), + "singleton_method" => handle_singleton_method(node, source, symbols), + "call" => handle_call(node, source, symbols), + _ => {} + } +} - "module" => { - if let Some(name_node) = node.child_by_field_name("name") { - symbols.definitions.push(Definition { - name: node_text(&name_node, source).to_string(), - kind: "module".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: None, - cfg: None, - children: None, - }); - } - } +// ── Per-node-kind handlers for walk_node_depth ─────────────────────────────── - "method" => { - if let Some(name_node) = node.child_by_field_name("name") { - let parent_class = find_ruby_parent_class(node, source); - let name = node_text(&name_node, source); - let full_name = match &parent_class { - Some(cls) => format!("{}.{}", cls, name), - None => name.to_string(), - }; - let children = extract_ruby_parameters(node, source); - symbols.definitions.push(Definition { - name: full_name, - kind: "method".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: compute_all_metrics(node, source, "ruby"), - cfg: build_function_cfg(node, "ruby", source), - children: opt_children(children), - }); - } - } +fn handle_class(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(name_node) = node.child_by_field_name("name") else { return }; + let class_name = node_text(&name_node, source).to_string(); + let children = extract_ruby_class_children(node, source); + symbols.definitions.push(Definition { + name: class_name.clone(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: opt_children(children), + }); + if let Some(superclass) = node.child_by_field_name("superclass") { + extract_ruby_superclass(&superclass, &class_name, node, source, symbols); + } +} - "singleton_method" => { - if let Some(name_node) = node.child_by_field_name("name") { - let parent_class = find_ruby_parent_class(node, source); - let name = node_text(&name_node, source); - let full_name = match &parent_class { - Some(cls) => format!("{}.{}", cls, name), - None => name.to_string(), - }; - symbols.definitions.push(Definition { - name: full_name, - kind: "function".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: compute_all_metrics(node, source, "ruby"), - cfg: build_function_cfg(node, "ruby", source), - children: None, - }); - } - } +fn handle_module(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(name_node) = node.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "module".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); + } +} - "call" => { - if let Some(method_node) = node.child_by_field_name("method") { - let method_text = node_text(&method_node, source); +fn handle_method(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(name_node) = node.child_by_field_name("name") else { return }; + let parent_class = find_ruby_parent_class(node, source); + let name = node_text(&name_node, source); + let full_name = match &parent_class { + Some(cls) => format!("{}.{}", cls, name), + None => name.to_string(), + }; + let children = extract_ruby_parameters(node, source); + symbols.definitions.push(Definition { + name: full_name, + kind: "method".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: compute_all_metrics(node, source, "ruby"), + cfg: build_function_cfg(node, "ruby", source), + children: opt_children(children), + }); +} - if method_text == "require" || method_text == "require_relative" { - let args = node.child_by_field_name("arguments"); - if let Some(args) = args { - for i in 0..args.child_count() { - if let Some(arg) = args.child(i) { - let str_content = extract_ruby_string_content(&arg, source); - if let Some(content) = str_content { - let last = content.split('/').last().unwrap_or("").to_string(); - let mut imp = - Import::new(content, vec![last], start_line(node)); - imp.ruby_require = Some(true); - symbols.imports.push(imp); - break; - } - } - } - } - } else if method_text == "include" - || method_text == "extend" - || method_text == "prepend" - { - let parent_class = find_ruby_parent_class(node, source); - if let Some(parent_class) = parent_class { - if let Some(args) = node.child_by_field_name("arguments") { - for i in 0..args.child_count() { - if let Some(arg) = args.child(i) { - if arg.kind() == "constant" - || arg.kind() == "scope_resolution" - { - symbols.classes.push(ClassRelation { - name: parent_class.clone(), - extends: None, - implements: Some( - node_text(&arg, source).to_string(), - ), - line: start_line(node), - }); - } - } - } - } - } - } else { - let receiver = node.child_by_field_name("receiver") - .map(|r| node_text(&r, source).to_string()); - symbols.calls.push(Call { - name: method_text.to_string(), - line: start_line(node), - dynamic: None, - receiver, - }); - } - } +fn handle_singleton_method(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(name_node) = node.child_by_field_name("name") else { return }; + let parent_class = find_ruby_parent_class(node, source); + let name = node_text(&name_node, source); + let full_name = match &parent_class { + Some(cls) => format!("{}.{}", cls, name), + None => name.to_string(), + }; + symbols.definitions.push(Definition { + name: full_name, + kind: "function".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: compute_all_metrics(node, source, "ruby"), + cfg: build_function_cfg(node, "ruby", source), + children: None, + }); +} + +fn handle_call(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(method_node) = node.child_by_field_name("method") else { return }; + let method_text = node_text(&method_node, source); + + if method_text == "require" || method_text == "require_relative" { + handle_require_call(node, source, symbols); + } else if method_text == "include" || method_text == "extend" || method_text == "prepend" { + handle_mixin_call(node, source, symbols); + } else { + let receiver = node.child_by_field_name("receiver") + .map(|r| node_text(&r, source).to_string()); + symbols.calls.push(Call { + name: method_text.to_string(), + line: start_line(node), + dynamic: None, + receiver, + }); + } +} + +fn handle_require_call(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(args) = node.child_by_field_name("arguments") else { return }; + for i in 0..args.child_count() { + let Some(arg) = args.child(i) else { continue }; + if let Some(content) = extract_ruby_string_content(&arg, source) { + let last = content.split('/').last().unwrap_or("").to_string(); + let mut imp = Import::new(content, vec![last], start_line(node)); + imp.ruby_require = Some(true); + symbols.imports.push(imp); + break; } + } +} - _ => {} +fn handle_mixin_call(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(parent_class) = find_ruby_parent_class(node, source) else { return }; + let Some(args) = node.child_by_field_name("arguments") else { return }; + for i in 0..args.child_count() { + let Some(arg) = args.child(i) else { continue }; + if arg.kind() == "constant" || arg.kind() == "scope_resolution" { + symbols.classes.push(ClassRelation { + name: parent_class.clone(), + extends: None, + implements: Some(node_text(&arg, source).to_string()), + line: start_line(node), + }); + } } } diff --git a/crates/codegraph-core/src/extractors/rust_lang.rs b/crates/codegraph-core/src/extractors/rust_lang.rs index 0cc6cfb6..3d22416d 100644 --- a/crates/codegraph-core/src/extractors/rust_lang.rs +++ b/crates/codegraph-core/src/extractors/rust_lang.rs @@ -1,9 +1,9 @@ -use tree_sitter::{Node, Tree}; +use super::helpers::*; +use super::SymbolExtractor; use crate::cfg::build_function_cfg; use crate::complexity::compute_all_metrics; use crate::types::*; -use super::helpers::*; -use super::SymbolExtractor; +use tree_sitter::{Node, Tree}; pub struct RustExtractor; @@ -32,202 +32,205 @@ fn find_current_impl<'a>(node: &Node<'a>, source: &[u8]) -> Option { fn match_rust_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { match node.kind() { - "function_item" => { - // Skip default-impl functions inside traits — already emitted by trait_item handler - if node.parent() - .and_then(|p| p.parent()) - .map_or(false, |gp| gp.kind() == "trait_item") - { - // still recurse into children below - } else if let Some(name_node) = node.child_by_field_name("name") { - let name = node_text(&name_node, source); - let impl_type = find_current_impl(node, source); - let (full_name, kind) = match &impl_type { - Some(t) => (format!("{}.{}", t, name), "method".to_string()), - None => (name.to_string(), "function".to_string()), - }; - let children = extract_rust_parameters(node, source); - symbols.definitions.push(Definition { - name: full_name, - kind, - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: compute_all_metrics(node, source, "rust"), - cfg: build_function_cfg(node, "rust", source), - children: opt_children(children), - }); - } - } + "function_item" => handle_function_item(node, source, symbols), + "struct_item" => handle_struct_item(node, source, symbols), + "enum_item" => handle_enum_item(node, source, symbols), + "const_item" => handle_const_item(node, source, symbols), + "trait_item" => handle_trait_item(node, source, symbols), + "impl_item" => handle_impl_item(node, source, symbols), + "use_declaration" => handle_use_decl(node, source, symbols), + "call_expression" => handle_call_expr(node, source, symbols), + "macro_invocation" => handle_macro_invocation(node, source, symbols), + _ => {} + } +} - "struct_item" => { - if let Some(name_node) = node.child_by_field_name("name") { - let children = extract_rust_struct_fields(node, source); - symbols.definitions.push(Definition { - name: node_text(&name_node, source).to_string(), - kind: "struct".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: None, - cfg: None, - children: opt_children(children), - }); - } - } +// ── Per-node-kind handlers for walk_node_depth ─────────────────────────────── - "enum_item" => { - if let Some(name_node) = node.child_by_field_name("name") { - let children = extract_rust_enum_variants(node, source); - symbols.definitions.push(Definition { - name: node_text(&name_node, source).to_string(), - kind: "enum".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: None, - cfg: None, - children: opt_children(children), - }); - } - } +fn handle_function_item(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + // Skip default-impl functions inside traits — already emitted by trait_item handler + if node.parent() + .and_then(|p| p.parent()) + .map_or(false, |gp| gp.kind() == "trait_item") + { + return; + } + let Some(name_node) = node.child_by_field_name("name") else { return }; + let name = node_text(&name_node, source); + let impl_type = find_current_impl(node, source); + let (full_name, kind) = match &impl_type { + Some(t) => (format!("{}.{}", t, name), "method".to_string()), + None => (name.to_string(), "function".to_string()), + }; + let children = extract_rust_parameters(node, source); + symbols.definitions.push(Definition { + name: full_name, + kind, + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: compute_all_metrics(node, source, "rust"), + cfg: build_function_cfg(node, "rust", source), + children: opt_children(children), + }); +} - "const_item" => { - if let Some(name_node) = node.child_by_field_name("name") { - symbols.definitions.push(Definition { - name: node_text(&name_node, source).to_string(), - kind: "constant".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: None, - cfg: None, - children: None, - }); - } - } +fn handle_struct_item(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(name_node) = node.child_by_field_name("name") { + let children = extract_rust_struct_fields(node, source); + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "struct".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: opt_children(children), + }); + } +} + +fn handle_enum_item(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(name_node) = node.child_by_field_name("name") { + let children = extract_rust_enum_variants(node, source); + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "enum".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: opt_children(children), + }); + } +} - "trait_item" => { - if let Some(name_node) = node.child_by_field_name("name") { - let trait_name = node_text(&name_node, source).to_string(); +fn handle_const_item(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(name_node) = node.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "constant".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); + } +} + +fn handle_trait_item(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(name_node) = node.child_by_field_name("name") else { return }; + let trait_name = node_text(&name_node, source).to_string(); + symbols.definitions.push(Definition { + name: trait_name.clone(), + kind: "trait".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); + if let Some(body) = node.child_by_field_name("body") { + for i in 0..body.child_count() { + let Some(child) = body.child(i) else { continue }; + if child.kind() != "function_signature_item" && child.kind() != "function_item" { + continue; + } + if let Some(meth_name) = child.child_by_field_name("name") { symbols.definitions.push(Definition { - name: trait_name.clone(), - kind: "trait".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), + name: format!("{}.{}", trait_name, node_text(&meth_name, source)), + kind: "method".to_string(), + line: start_line(&child), + end_line: Some(end_line(&child)), decorators: None, - complexity: None, - cfg: None, + complexity: compute_all_metrics(&child, source, "rust"), + cfg: build_function_cfg(&child, "rust", source), children: None, }); - if let Some(body) = node.child_by_field_name("body") { - for i in 0..body.child_count() { - if let Some(child) = body.child(i) { - if child.kind() == "function_signature_item" - || child.kind() == "function_item" - { - if let Some(meth_name) = child.child_by_field_name("name") { - symbols.definitions.push(Definition { - name: format!( - "{}.{}", - trait_name, - node_text(&meth_name, source) - ), - kind: "method".to_string(), - line: start_line(&child), - end_line: Some(end_line(&child)), - decorators: None, - complexity: compute_all_metrics(&child, source, "rust"), - cfg: build_function_cfg(&child, "rust", source), - children: None, - }); - } - } - } - } - } } } + } +} - "impl_item" => { - let type_node = node.child_by_field_name("type"); - let trait_node = node.child_by_field_name("trait"); - if let (Some(type_node), Some(trait_node)) = (type_node, trait_node) { - symbols.classes.push(ClassRelation { - name: node_text(&type_node, source).to_string(), - extends: None, - implements: Some(node_text(&trait_node, source).to_string()), - line: start_line(node), - }); - } - } +fn handle_impl_item(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let type_node = node.child_by_field_name("type"); + let trait_node = node.child_by_field_name("trait"); + if let (Some(type_node), Some(trait_node)) = (type_node, trait_node) { + symbols.classes.push(ClassRelation { + name: node_text(&type_node, source).to_string(), + extends: None, + implements: Some(node_text(&trait_node, source).to_string()), + line: start_line(node), + }); + } +} - "use_declaration" => { - if let Some(arg_node) = node.child(1) { - let use_paths = extract_rust_use_path(&arg_node, source); - for (src, names) in use_paths { - let mut imp = Import::new(src, names, start_line(node)); - imp.rust_use = Some(true); - symbols.imports.push(imp); - } - } +fn handle_use_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(arg_node) = node.child(1) { + let use_paths = extract_rust_use_path(&arg_node, source); + for (src, names) in use_paths { + let mut imp = Import::new(src, names, start_line(node)); + imp.rust_use = Some(true); + symbols.imports.push(imp); } + } +} - "call_expression" => { - if let Some(fn_node) = node.child_by_field_name("function") { - match fn_node.kind() { - "identifier" => { - symbols.calls.push(Call { - name: node_text(&fn_node, source).to_string(), - line: start_line(node), - dynamic: None, - receiver: None, - }); - } - "field_expression" => { - if let Some(field) = fn_node.child_by_field_name("field") { - let receiver = fn_node.child_by_field_name("value") - .map(|v| node_text(&v, source).to_string()); - symbols.calls.push(Call { - name: node_text(&field, source).to_string(), - line: start_line(node), - dynamic: None, - receiver, - }); - } - } - "scoped_identifier" => { - if let Some(name) = fn_node.child_by_field_name("name") { - let receiver = fn_node.child_by_field_name("path") - .map(|p| node_text(&p, source).to_string()); - symbols.calls.push(Call { - name: node_text(&name, source).to_string(), - line: start_line(node), - dynamic: None, - receiver, - }); - } - } - _ => {} - } +fn handle_call_expr(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(fn_node) = node.child_by_field_name("function") else { return }; + match fn_node.kind() { + "identifier" => { + symbols.calls.push(Call { + name: node_text(&fn_node, source).to_string(), + line: start_line(node), + dynamic: None, + receiver: None, + }); + } + "field_expression" => { + if let Some(field) = fn_node.child_by_field_name("field") { + let receiver = fn_node.child_by_field_name("value") + .map(|v| node_text(&v, source).to_string()); + symbols.calls.push(Call { + name: node_text(&field, source).to_string(), + line: start_line(node), + dynamic: None, + receiver, + }); } } - - "macro_invocation" => { - if let Some(macro_node) = node.child(0) { + "scoped_identifier" => { + if let Some(name) = fn_node.child_by_field_name("name") { + let receiver = fn_node.child_by_field_name("path") + .map(|p| node_text(&p, source).to_string()); symbols.calls.push(Call { - name: format!("{}!", node_text(¯o_node, source)), + name: node_text(&name, source).to_string(), line: start_line(node), dynamic: None, - receiver: None, + receiver, }); } } - _ => {} } } +fn handle_macro_invocation(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(macro_node) = node.child(0) { + symbols.calls.push(Call { + name: format!("{}!", node_text(¯o_node, source)), + line: start_line(node), + dynamic: None, + receiver: None, + }); + } +} + // ── Extended kinds helpers ────────────────────────────────────────────────── fn extract_rust_parameters(node: &Node, source: &[u8]) -> Vec { diff --git a/crates/codegraph-core/src/roles_db.rs b/crates/codegraph-core/src/roles_db.rs index a901f392..93653c37 100644 --- a/crates/codegraph-core/src/roles_db.rs +++ b/crates/codegraph-core/src/roles_db.rs @@ -119,10 +119,7 @@ fn classify_node( median_fan_out: u32, ) -> &'static str { // Framework entry - if FRAMEWORK_ENTRY_PREFIXES - .iter() - .any(|p| name.starts_with(p)) - { + if FRAMEWORK_ENTRY_PREFIXES.iter().any(|p| name.starts_with(p)) { return "entry"; } @@ -212,7 +209,7 @@ pub(crate) fn do_classify_full(conn: &Connection) -> rusqlite::Result dead-leaf let leaf_ids: Vec = { let mut stmt = tx.prepare("SELECT id FROM nodes WHERE kind IN ('parameter', 'property')")?; @@ -220,7 +217,7 @@ pub(crate) fn do_classify_full(conn: &Connection) -> rusqlite::Result = { let mut stmt = tx.prepare( "SELECT n.id, n.name, n.kind, n.file, @@ -268,21 +265,17 @@ pub(crate) fn do_classify_full(conn: &Connection) -> rusqlite::Result = { - let test_filter = TEST_FILE_PATTERNS - .iter() - .map(|p| format!("AND caller.file NOT LIKE '{}'", p)) - .collect::>() - .join(" "); let sql = format!( "SELECT e.target_id, COUNT(*) AS cnt FROM edges e JOIN nodes caller ON e.source_id = caller.id WHERE e.kind = 'calls' {} GROUP BY e.target_id", - test_filter + test_file_filter() ); let mut stmt = tx.prepare(&sql)?; - let mapped = stmt.query_map([], |row| Ok((row.get::<_, i64>(0)?, row.get::<_, u32>(1)?)))?; + let mapped = + stmt.query_map([], |row| Ok((row.get::<_, i64>(0)?, row.get::<_, u32>(1)?)))?; mapped.filter_map(|r| r.ok()).collect() }; @@ -303,7 +296,105 @@ pub(crate) fn do_classify_full(conn: &Connection) -> rusqlite::Result String { + TEST_FILE_PATTERNS + .iter() + .map(|p| format!("AND caller.file NOT LIKE '{}'", p)) + .collect::>() + .join(" ") +} + +/// Compute global median fan-in and fan-out from the edge distribution. +fn compute_global_medians(tx: &rusqlite::Transaction) -> rusqlite::Result<(u32, u32)> { + let median_fan_in = { + let mut stmt = tx + .prepare("SELECT COUNT(*) AS cnt FROM edges WHERE kind = 'calls' GROUP BY target_id")?; + let mut vals: Vec = stmt + .query_map([], |row| row.get::<_, u32>(0))? + .filter_map(|r| r.ok()) + .collect(); + vals.sort_unstable(); + median(&vals) + }; + let median_fan_out = { + let mut stmt = tx + .prepare("SELECT COUNT(*) AS cnt FROM edges WHERE kind = 'calls' GROUP BY source_id")?; + let mut vals: Vec = stmt + .query_map([], |row| row.get::<_, u32>(0))? + .filter_map(|r| r.ok()) + .collect(); + vals.sort_unstable(); + median(&vals) + }; + Ok((median_fan_in, median_fan_out)) +} + +/// Execute a query with bound file parameters and collect i64 results into a HashSet. +fn query_id_set( + tx: &rusqlite::Transaction, + sql: &str, + files: &[&str], +) -> rusqlite::Result> { + let mut stmt = tx.prepare(sql)?; + for (i, f) in files.iter().enumerate() { + stmt.raw_bind_parameter(i + 1, *f)?; + } + let mut rows = stmt.raw_query(); + let mut result = std::collections::HashSet::new(); + while let Some(row) = rows.next()? { + result.insert(row.get::<_, i64>(0)?); + } + Ok(result) +} + +/// Execute a query with bound file parameters and collect (id, count) into a HashMap. +fn query_id_counts( + tx: &rusqlite::Transaction, + sql: &str, + files: &[&str], +) -> rusqlite::Result> { + let mut stmt = tx.prepare(sql)?; + for (i, f) in files.iter().enumerate() { + stmt.raw_bind_parameter(i + 1, *f)?; + } + let mut rows = stmt.raw_query(); + let mut result = HashMap::new(); + while let Some(row) = rows.next()? { + result.insert(row.get::<_, i64>(0)?, row.get::<_, u32>(1)?); + } + Ok(result) +} + +/// Classify rows and accumulate into ids_by_role and summary. +fn classify_rows( + rows: &[(i64, String, String, String, u32, u32)], + exported_ids: &std::collections::HashSet, + prod_fan_in: &HashMap, + median_fan_in: u32, + median_fan_out: u32, + ids_by_role: &mut HashMap<&'static str, Vec>, + summary: &mut RoleSummary, +) { + for (id, name, kind, file, fan_in, fan_out) in rows { let is_exported = exported_ids.contains(id); let prod_fi = prod_fan_in.get(id).copied().unwrap_or(0); let role = classify_node( @@ -317,32 +408,22 @@ pub(crate) fn do_classify_full(conn: &Connection) -> rusqlite::Result rusqlite::Result { - let tx = conn.unchecked_transaction()?; - let mut summary = RoleSummary::default(); - - // Build placeholders for changed files - let seed_ph: String = changed_files.iter().map(|_| "?").collect::>().join(","); - - // Expand affected set: include edge neighbours - let neighbour_sql = format!( +) -> rusqlite::Result> { + let seed_ph: String = changed_files + .iter() + .map(|_| "?") + .collect::>() + .join(","); + let sql = format!( "SELECT DISTINCT n2.file FROM edges e JOIN nodes n1 ON (e.source_id = n1.id OR e.target_id = n1.id) JOIN nodes n2 ON (e.source_id = n2.id OR e.target_id = n2.id) @@ -352,65 +433,38 @@ pub(crate) fn do_classify_incremental( AND n2.kind NOT IN ('file', 'directory')", seed_ph, seed_ph ); - let neighbour_files: Vec = { - let mut stmt = tx.prepare(&neighbour_sql)?; - // Bind changed_files twice (for both IN clauses) - let mut idx = 1; - for f in changed_files { - stmt.raw_bind_parameter(idx, f.as_str())?; - idx += 1; - } - for f in changed_files { - stmt.raw_bind_parameter(idx, f.as_str())?; - idx += 1; - } - let rows = stmt.raw_query(); - let mut result = Vec::new(); - let mut rows = rows; - while let Some(row) = rows.next()? { - result.push(row.get::<_, String>(0)?); - } - result - }; - - let mut all_affected: Vec<&str> = changed_files.iter().map(|s| s.as_str()).collect(); - for f in &neighbour_files { - all_affected.push(f.as_str()); + let mut stmt = tx.prepare(&sql)?; + let mut idx = 1; + for f in changed_files { + stmt.raw_bind_parameter(idx, f.as_str())?; + idx += 1; } - let affected_ph: String = all_affected.iter().map(|_| "?").collect::>().join(","); + for f in changed_files { + stmt.raw_bind_parameter(idx, f.as_str())?; + idx += 1; + } + let mut rows = stmt.raw_query(); + let mut result = Vec::new(); + while let Some(row) = rows.next()? { + result.push(row.get::<_, String>(0)?); + } + Ok(result) +} - // 1. Global medians from edge distribution - let median_fan_in = { - let mut stmt = tx.prepare( - "SELECT COUNT(*) AS cnt FROM edges WHERE kind = 'calls' GROUP BY target_id", - )?; - let mut vals: Vec = stmt - .query_map([], |row| row.get::<_, u32>(0))? - .filter_map(|r| r.ok()) - .collect(); - vals.sort_unstable(); - median(&vals) - }; - let median_fan_out = { - let mut stmt = tx.prepare( - "SELECT COUNT(*) AS cnt FROM edges WHERE kind = 'calls' GROUP BY source_id", - )?; - let mut vals: Vec = stmt - .query_map([], |row| row.get::<_, u32>(0))? - .filter_map(|r| r.ok()) - .collect(); - vals.sort_unstable(); - median(&vals) - }; +/// Query leaf kind node IDs and callable node rows for a set of files. +fn query_nodes_for_files( + tx: &rusqlite::Transaction, + files: &[&str], +) -> rusqlite::Result<(Vec, Vec<(i64, String, String, String, u32, u32)>)> { + let ph: String = files.iter().map(|_| "?").collect::>().join(","); - // 2a. Leaf kinds in affected files let leaf_sql = format!( "SELECT id FROM nodes WHERE kind IN ('parameter', 'property') AND file IN ({})", - affected_ph + ph ); let leaf_ids: Vec = { let mut stmt = tx.prepare(&leaf_sql)?; - for (i, f) in all_affected.iter().enumerate() { + for (i, f) in files.iter().enumerate() { stmt.raw_bind_parameter(i + 1, *f)?; } let mut rows = stmt.raw_query(); @@ -421,7 +475,6 @@ pub(crate) fn do_classify_incremental( result }; - // 2b. Callable nodes with correlated subquery fan-in/fan-out let rows_sql = format!( "SELECT n.id, n.name, n.kind, n.file, (SELECT COUNT(*) FROM edges WHERE kind = 'calls' AND target_id = n.id) AS fan_in, @@ -429,11 +482,11 @@ pub(crate) fn do_classify_incremental( FROM nodes n WHERE n.kind NOT IN ('file', 'directory', 'parameter', 'property') AND n.file IN ({})", - affected_ph + ph ); let rows: Vec<(i64, String, String, String, u32, u32)> = { let mut stmt = tx.prepare(&rows_sql)?; - for (i, f) in all_affected.iter().enumerate() { + for (i, f) in files.iter().enumerate() { stmt.raw_bind_parameter(i + 1, *f)?; } let mut qrows = stmt.raw_query(); @@ -451,12 +504,39 @@ pub(crate) fn do_classify_incremental( result }; + Ok((leaf_ids, rows)) +} + +// ── Incremental classification ─────────────────────────────────────── + +pub(crate) fn do_classify_incremental( + conn: &Connection, + changed_files: &[String], +) -> rusqlite::Result { + let tx = conn.unchecked_transaction()?; + let mut summary = RoleSummary::default(); + + let neighbour_files = find_neighbour_files(&tx, changed_files)?; + + let mut all_affected: Vec<&str> = changed_files.iter().map(|s| s.as_str()).collect(); + for f in &neighbour_files { + all_affected.push(f.as_str()); + } + let affected_ph: String = all_affected + .iter() + .map(|_| "?") + .collect::>() + .join(","); + + let (median_fan_in, median_fan_out) = compute_global_medians(&tx)?; + + let (leaf_ids, rows) = query_nodes_for_files(&tx, &all_affected)?; + if rows.is_empty() && leaf_ids.is_empty() { tx.commit()?; return Ok(summary); } - // 3. Exported IDs for affected nodes let exported_sql = format!( "SELECT DISTINCT e.target_id FROM edges e @@ -466,25 +546,8 @@ pub(crate) fn do_classify_incremental( AND target.file IN ({})", affected_ph ); - let exported_ids: std::collections::HashSet = { - let mut stmt = tx.prepare(&exported_sql)?; - for (i, f) in all_affected.iter().enumerate() { - stmt.raw_bind_parameter(i + 1, *f)?; - } - let mut qrows = stmt.raw_query(); - let mut result = std::collections::HashSet::new(); - while let Some(row) = qrows.next()? { - result.insert(row.get::<_, i64>(0)?); - } - result - }; + let exported_ids = query_id_set(&tx, &exported_sql, &all_affected)?; - // 4. Production fan-in for affected nodes - let test_filter = TEST_FILE_PATTERNS - .iter() - .map(|p| format!("AND caller.file NOT LIKE '{}'", p)) - .collect::>() - .join(" "); let prod_sql = format!( "SELECT e.target_id, COUNT(*) AS cnt FROM edges e @@ -494,22 +557,11 @@ pub(crate) fn do_classify_incremental( AND target.file IN ({}) {} GROUP BY e.target_id", - affected_ph, test_filter + affected_ph, + test_file_filter() ); - let prod_fan_in: HashMap = { - let mut stmt = tx.prepare(&prod_sql)?; - for (i, f) in all_affected.iter().enumerate() { - stmt.raw_bind_parameter(i + 1, *f)?; - } - let mut qrows = stmt.raw_query(); - let mut result = HashMap::new(); - while let Some(row) = qrows.next()? { - result.insert(row.get::<_, i64>(0)?, row.get::<_, u32>(1)?); - } - result - }; + let prod_fan_in = query_id_counts(&tx, &prod_sql, &all_affected)?; - // 5. Classify let mut ids_by_role: HashMap<&str, Vec> = HashMap::new(); if !leaf_ids.is_empty() { @@ -518,25 +570,17 @@ pub(crate) fn do_classify_incremental( ids_by_role.insert("dead-leaf", leaf_ids); } - for (id, name, kind, file, fan_in, fan_out) in &rows { - let is_exported = exported_ids.contains(id); - let prod_fi = prod_fan_in.get(id).copied().unwrap_or(0); - let role = classify_node( - name, - kind, - file, - *fan_in, - *fan_out, - is_exported, - prod_fi, - median_fan_in, - median_fan_out, - ); - increment_summary(&mut summary, role); - ids_by_role.entry(role).or_default().push(*id); - } + classify_rows( + &rows, + &exported_ids, + &prod_fan_in, + median_fan_in, + median_fan_out, + &mut ids_by_role, + &mut summary, + ); - // 6. Reset roles for affected files only, then update + // Reset roles for affected files only, then update let reset_sql = format!( "UPDATE nodes SET role = NULL WHERE file IN ({}) AND kind NOT IN ('file', 'directory')", affected_ph diff --git a/docs/roadmap/ROADMAP.md b/docs/roadmap/ROADMAP.md index 442e1023..b971d232 100644 --- a/docs/roadmap/ROADMAP.md +++ b/docs/roadmap/ROADMAP.md @@ -1,6 +1,6 @@ # Codegraph Roadmap -> **Current version:** 3.5.0 | **Status:** Active development | **Updated:** 2026-03-29 +> **Current version:** 3.5.0 | **Status:** Active development | **Updated:** 2026-03-30 Codegraph is a strong local-first code graph CLI. This roadmap describes planned improvements across thirteen phases -- closing gaps with commercial code intelligence platforms while preserving codegraph's core strengths: fully local, open source, zero cloud dependency by default. diff --git a/generated/titan/titan-report-v3.5.0-2026-03-30T03-04-14.md b/generated/titan/titan-report-v3.5.0-2026-03-30T03-04-14.md new file mode 100644 index 00000000..397b3d12 --- /dev/null +++ b/generated/titan/titan-report-v3.5.0-2026-03-30T03-04-14.md @@ -0,0 +1,246 @@ +# Titan Audit Report + +**Version:** 3.5.0 +**Date:** 2026-03-29 -> 2026-03-30 +**Branch:** release/3.5.0 +**Target:** . (full codebase) + +--- + +## Executive Summary + +The Titan pipeline audited 122 files across 13 domains, identifying 55 fail-level and 30 decompose-level targets. The forge phase executed 31 commits addressing dead code removal, shared abstraction extraction, function decomposition, fail-level fixes, and warn-level improvements. All 2131 tests pass. Quality score improved from 65 to 67, functions above threshold dropped from 50 to 48 (with the worst offender `makePartition` improving from MI 5 to MI 13.4), and function-level cycles dropped from 9 to 6. + +--- + +## Pipeline Timeline + +| Phase | Duration | Notes | +|-------|----------|-------| +| RECON | ~15 min | Completed before crash (prior session) | +| GAUNTLET | ~55 min | 37/122 done pre-crash; resumed, 2 iterations finished remaining 85 targets | +| SYNC | ~5 min | Single sub-agent pass | +| FORGE (5 sub-phases) | ~2.5 hrs | 31 commits, first at 00:26 CDT, last at 02:51 CDT (2026-03-30) | +| GATE (22 runs) | across forge | Inline with each forge commit | +| CLOSE | ~8 min | Report + PR creation | +| **Total** | **~3.5 hrs** | Excludes pre-crash RECON + partial GAUNTLET | + +--- + +## Metrics: Before & After + +| Metric | Baseline | Final | Delta | Trend | +|--------|----------|-------|-------|-------| +| Quality Score | 65 | 67 | +2 | up | +| Total Files | 486 | 487 | +1 | -- | +| Total Symbols | 11672 | 12628 | +956 | up (decomposition added helpers) | +| Total Edges | 21833 | 24110 | +2277 | up (new helper call edges) | +| Functions Above Threshold | 50 | 48 | -2 | down | +| Dead Symbols (codegraph roles) | N/A | 9620 | -- | -- | +| File-Level Cycles | 1 | 1 | 0 | -- | +| Function-Level Cycles | 9 | 6 | -3 | down | +| Avg Cognitive Complexity | 5.8 | 5.8 | 0 | -- | +| Avg Cyclomatic Complexity | 4.9 | 4.9 | 0 | -- | +| Avg MI | 61.2 | 61.2 | 0 | -- | +| Min MI | 5.0 | 13.4 | +8.4 | up | +| Community Modularity | 0.49 | 0.49 | 0 | -- | + +### Complexity Improvement: Top Movers + +| Function | Before MI | After MI | Delta | Before Bugs | After Bugs | Delta | +|----------|-----------|----------|-------|-------------|------------|-------| +| makePartition | 5.0 | 13.4 | +8.4 | 6.26 | 4.49 | -1.77 | +| walk_node_depth (javascript.rs) | 8.3 | decomposed | -- | 5.50 | decomposed into helpers | -- | +| build_call_edges (edge_builder.rs) | 22.7 | decomposed | -- | 4.36 | decomposed into helpers | -- | + +The worst offenders from the baseline (`walk_node_depth` variants across extractors, `build_call_edges`, `makePartition`) were all decomposed into smaller focused functions. The monolithic `walk_node_depth` in each native extractor now delegates shared logic to `helpers.rs`. + +### Remaining Hot Spots + +| Function | File | Cognitive | MI | Halstead Bugs | +|----------|------|-----------|-----|---------------| +| makePartition | leiden/partition.ts | 104 | 13.4 | 4.49 | +| computeFunctionComplexity | features/complexity.ts | 103 | 39.4 | 1.25 | +| extract_param_names_strategy | dataflow.rs | 83 | 23.1 | 1.36 | +| extract_dynamic_import_names | javascript.rs | 79 | 44.1 | 1.07 | +| extract_csharp_class_fields | csharp.rs | 78 | 50.0 | 0.37 | +| walkWithVisitors | visitor.ts | 65 | 43.7 | 1.05 | +| createAstStoreVisitor | ast-store-visitor.ts | 65 | 36.6 | 1.24 | +| CfgBuilder.process_try_catch | cfg.rs | 62 | 34.2 | 1.85 | +| renderContextResult | inspect.ts | 59 | 27.2 | 1.48 | +| buildAstNodes | features/ast.ts | 54 | 35.2 | 1.24 | + +--- + +## Audit Results Summary + +**Targets audited:** 122 +**Pass:** 41 | **Warn:** 26 | **Fail:** 25 | **Decompose:** 30 + +### By Pillar + +| Pillar | Most Common Violations | +|--------|----------------------| +| I -- Structural Purity | cognitive (107), cyclomatic (78), halsteadBugs (57), sloc (47), deadCode (43) | +| II -- Data & Type Sovereignty | magicValues (7), emptyCatch (6), empty-catch (2) | +| III -- Ecosystem Synergy | dry (9), config-env (1) | +| IV -- Quality Vigil | criticalPath (2), naming (1), console (1) | + +### Most Common Violations + +1. **Cognitive complexity** -- 107 instances (extractors, features, domain) +2. **Cyclomatic complexity** -- 78 instances (extractors, features, graph) +3. **Halstead bugs** -- 57 instances (extractors, leiden, features) +4. **SLOC** -- 47 instances (extractors, presentation, domain) +5. **Dead code** -- 43 instances (shared, db, extractors) + +--- + +## Changes Made + +### Commits: 32 + +| SHA | Message | Files | Domain | +|-----|---------|-------|--------| +| 9e1286a | chore(shared): remove dead code from types and shared utilities | 2 | shared-types | +| cc89d7a | chore(db): remove dead code from database layer | 1 | database | +| 9fafa5a | refactor(native): extract shared walk_node_depth helpers into helpers.rs | 7 | native-extractors | +| c9fba51 | refactor(extractors): extract shared visitor utilities from WASM extractors | 6 | wasm-extractors | +| a6f942f | refactor(analysis): extract shared query-building helpers | 6 | domain-analysis | +| 1673a6c | refactor(leiden): decompose makePartition into focused sub-functions | 2 | graph-engine | +| ed0707e | fix(leiden): reduce cognitive complexity in adapter and index | 2 | graph-engine | +| 0c0c24c | refactor: decompose MCP server and search CLI formatter | 2 | mcp-search | +| 3f56c5b | refactor(graph): decompose finalize stage into sub-steps | 1 | graph-builder | +| 4de3ac7 | refactor(ast): decompose setupVisitors into focused helper functions | 1 | ast-analysis | +| 662387b | refactor(extractors): decompose javascript and go WASM extractors | 2 | wasm-extractors | +| 67a8241 | refactor(features): decompose complexity-query and graph-enrichment | 2 | features | +| ff32950 | refactor(presentation): decompose check, audit, and branch-compare formatters | 3 | presentation | +| 3d34774 | refactor(structure): decompose computeDirectoryMetrics into focused helpers | 1 | features | +| b7a6206 | refactor(presentation): decompose complexity CLI formatter | 1 | presentation | +| aa34dc4 | refactor(native): decompose javascript.rs walk_node_depth | 1 | native-extractors | +| 2653693 | refactor(native): decompose go/python/php extractors | 3 | native-extractors | +| a49e393 | refactor(native): decompose java/csharp/ruby/rust extractors | 4 | native-extractors | +| 56c2584 | refactor(native): decompose edge_builder, complexity, and cfg modules | 3 | native-engine | +| 6f3fb3d | refactor(native): decompose dataflow module | 1 | native-engine | +| 3f25376 | refactor(extractors): decompose javascript.ts and go.ts WASM extractors | 2 | wasm-extractors | +| 6e0e5df | fix: reduce complexity in parser dispatch and config loading | 2 | domain-parser | +| bbffcd6 | fix(extractors): reduce complexity and remove dead code in WASM extractors | 5 | wasm-extractors | +| d186da9 | fix(analysis): reduce complexity and remove dead code in analysis modules | 4 | domain-analysis | +| a55ee53 | fix(graph): fix empty catches, reduce complexity in graph builder pipeline | 5 | graph-builder | +| da41157 | fix(ast): reduce complexity in AST engine and complexity visitor | 2 | ast-analysis | +| 4932570 | fix(features): reduce complexity in cfg, dataflow, and check modules | 3 | features | +| 99b733c | fix(native): reduce complexity in roles_db and HCL extractor | 2 | native-engine | +| a027aaf | refactor(shared): address warnings in types and database layer | 2 | shared-types | +| 8468b49 | refactor: address warnings in domain analysis and presentation | 2 | presentation | +| 6f13090 | refactor: address warnings in infrastructure, features, and CLI | 3 | infrastructure | +| 053cfe9 | fix: resolve build errors from noUncheckedIndexedAccess and unexported types | 3 | wasm-extractors | + +### PR Split Plan + +All 32 commits were submitted as a single PR due to extensive cross-file dependencies between commits that make cherry-pick splitting fragile: + +**PR:** [#699](https://github.com/optave/ops-codegraph-tool/pull/699) -- refactor: Titan audit -- decompose, reduce complexity, remove dead code + +The logical grouping for review purposes: + +| Group | Title | Concern | Domain | Commits | Files | +|-------|-------|---------|--------|---------|-------| +| 1 | Remove dead code from shared, types, and database | dead_code | shared/db | 2 | 3 | +| 2 | Extract shared helpers for native and WASM extractors | abstraction | extractors | 3 | 19 | +| 3 | Decompose Leiden partition and optimiser | decomposition | graph-engine | 2 | 4 | +| 4 | Decompose MCP server, search formatter, graph builder, AST engine | decomposition | domain | 3 | 4 | +| 5 | Decompose WASM extractors (javascript.ts, go.ts) | decomposition | wasm-extractors | 2 | 4 | +| 6 | Decompose features and presentation formatters | decomposition | features/presentation | 4 | 7 | +| 7 | Decompose native Rust extractors | decomposition | native-extractors | 4 | 8 | +| 8 | Decompose native engine core (edge_builder, complexity, cfg, dataflow) | decomposition | native-engine | 2 | 4 | +| 9 | Reduce complexity across domain, extractors, and features | quality_fix | cross-cutting | 7 | 28 | +| 10 | Address warn-level issues in shared, domain, presentation, infra | warning | cross-cutting | 3 | 7 | + +--- + +## Gate Validation History + +**Total runs:** 22 +**Pass:** 14 | **Warn:** 8 | **Fail:** 0 +**Rollbacks:** 0 + +### Failure Patterns + +No failures or rollbacks occurred. 8 warnings were issued: +- **blast-radius warn** (2x): native extractor refactors touched many files (18, 124 blast radius) +- **complexity warn** (4x): Leiden partition and config still above thresholds after decomposition +- **lint warn** (2x): pre-existing lint issues in `src/extractors/rust.ts`, intentional signature removal flagged + +--- + +## Issues Discovered + +### Codegraph Bugs (1) +- **limitation** -- `codegraph exports` reports interfaces as dead-unresolved when used as type annotations but not directly imported by name. This is a known limitation of the resolution engine for TypeScript type-only exports. + +### Tooling Issues (0) + +### Process Suggestions (1) +- **suggestion** -- Batch 2 (10 files) exceeded the recommended batch size of 5. Future RECON should split large same-domain batches. + +### Codebase Observations (1) +- **suggestion** -- `walk_node_depth` pattern is duplicated across all 9 language extractors in `crates/codegraph-core/src/extractors/`. A shared macro or trait-based dispatch could eliminate massive duplication and reduce total cognitive complexity by ~800 points. + +--- + +## Domains Analyzed + +| Domain | Root Dirs | Files | Status | +|--------|-----------|-------|--------| +| Shared/Types | `src/shared/`, `src/types.ts` | 10 | audited | +| Database | `src/db/` | 20 | audited | +| Infrastructure | `src/infrastructure/` | 7 | audited | +| Domain/Parser | `src/domain/`, `src/extractors/` | 57 | audited | +| Graph Engine | `src/graph/` | 22 | audited | +| AST Analysis | `src/ast-analysis/` | 22 | audited | +| Features | `src/features/` | 23 | audited | +| Presentation | `src/presentation/` | 31 | audited | +| CLI | `src/cli/` | 48 | not in scope | +| MCP Server | `src/mcp/` | 40 | partially audited | +| Search | `src/domain/search/` | 10 | partially audited | +| Native Engine | `crates/codegraph-core/` | 31 | audited | +| Scripts/Tests | `scripts/`, `tests/` | 169 | excluded | + +--- + +## Pipeline Freshness + +**Main at RECON:** 573f181 +**Main at CLOSE:** ae09cfc +**Commits behind:** 2 +**Overall staleness:** fresh + +### Drift Events + +| Phase | Staleness | Impacted Targets | Action | +|-------|-----------|-----------------|--------| +| gauntlet (start) | none | 0 | continued | +| gauntlet (end) | none | 0 | continued | +| close | fresh | 0 | report generated normally | + +The 2 commits on main since RECON are non-code changes (skill rename, docs). No audited targets were affected. + +### Stale Targets + +None. + +--- + +## Recommendations for Next Run + +1. **makePartition remains the worst function** (MI 13.4, cognitive 104). The decomposition improved MI from 5 to 13.4 but it needs further splitting -- the core partition loop is still monolithic. + +2. **computeFunctionComplexity** (cognitive 103) was not decomposed in this run. It is the second-worst function and should be a priority target. + +3. **Native extractor duplication** -- The `walk_node_depth` pattern is still duplicated across 9 extractors. A Rust macro or trait-based dispatch could reduce total cognitive complexity by ~800 points. This is the single highest-leverage refactor remaining. + +4. **Type-only export resolution** -- The codegraph limitation with TypeScript type-only exports inflates dead symbol counts. Fixing this in the resolution engine would improve quality score. + +5. **Batch sizing** -- Keep RECON batches to 5 files max for better audit granularity. + +6. **CLI and test domains** were excluded from this run. A future Titan run scoped to `src/cli/` could improve the tangled CLI layer (cohesion 0.299). diff --git a/src/ast-analysis/engine.ts b/src/ast-analysis/engine.ts index b9c4ed25..a8fb2e44 100644 --- a/src/ast-analysis/engine.ts +++ b/src/ast-analysis/engine.ts @@ -162,6 +162,80 @@ async function ensureWasmTreesIfNeeded( // ─── Per-file visitor setup ───────────────────────────────────────────── +/** Check if a definition has a real function body (not a type signature). */ +function hasFuncBody(d: { + name: string; + kind: string; + line: number; + endLine?: number | null; +}): boolean { + return ( + (d.kind === 'function' || d.kind === 'method') && + d.line > 0 && + d.endLine != null && + d.endLine > d.line && + !d.name.includes('.') + ); +} + +/** Set up AST-store visitor if applicable. */ +function setupAstVisitor( + db: BetterSqlite3Database, + relPath: string, + symbols: ExtractorOutput, + langId: string, + ext: string, +): Visitor | null { + const astTypeMap = AST_TYPE_MAPS.get(langId); + if (!astTypeMap || !WALK_EXTENSIONS.has(ext) || Array.isArray(symbols.astNodes)) return null; + const nodeIdMap = new Map(); + for (const row of bulkNodeIdsByFile(db, relPath)) { + nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id); + } + return createAstStoreVisitor(astTypeMap, symbols.definitions || [], relPath, nodeIdMap); +} + +/** Set up complexity visitor if any definitions need WASM complexity analysis. */ +function setupComplexityVisitorForFile( + defs: Definition[], + langId: string, + walkerOpts: WalkOptions, +): Visitor | null { + const cRules = COMPLEXITY_RULES.get(langId); + if (!cRules) return null; + + const hRules = HALSTEAD_RULES.get(langId); + const needsWasmComplexity = defs.some((d) => hasFuncBody(d) && !d.complexity); + if (!needsWasmComplexity) return null; + + const visitor = createComplexityVisitor(cRules, hRules, { fileLevelWalk: true, langId }); + + for (const t of cRules.nestingNodes) walkerOpts.nestingNodeTypes?.add(t); + + const dfRules = DATAFLOW_RULES.get(langId); + walkerOpts.getFunctionName = (node: TreeSitterNode): string | null => { + const nameNode = node.childForFieldName('name'); + if (nameNode) return nameNode.text; + if (dfRules) return getFuncName(node, dfRules as any); + return null; + }; + + return visitor; +} + +/** Set up CFG visitor if any definitions need WASM CFG analysis. */ +function setupCfgVisitorForFile(defs: Definition[], langId: string, ext: string): Visitor | null { + const cfgRulesForLang = CFG_RULES.get(langId); + if (!cfgRulesForLang || !CFG_EXTENSIONS.has(ext)) return null; + + const needsWasmCfg = defs.some( + (d) => hasFuncBody(d) && d.cfg !== null && !Array.isArray(d.cfg?.blocks), + ); + if (!needsWasmCfg) return null; + + return createCfgVisitor(cfgRulesForLang); +} + function setupVisitors( db: BetterSqlite3Database, relPath: string, @@ -171,10 +245,6 @@ function setupVisitors( ): SetupResult { const ext = path.extname(relPath).toLowerCase(); const defs = symbols.definitions || []; - const doAst = opts.ast !== false; - const doComplexity = opts.complexity !== false; - const doCfg = opts.cfg !== false; - const doDataflow = opts.dataflow !== false; const visitors: Visitor[] = []; const walkerOpts: WalkOptions = { @@ -183,75 +253,19 @@ function setupVisitors( getFunctionName: (_node: TreeSitterNode) => null, }; - // AST-store visitor (call kind already filtered in runAnalyses upfront) - let astVisitor: Visitor | null = null; - const astTypeMap = AST_TYPE_MAPS.get(langId); - if (doAst && astTypeMap && WALK_EXTENSIONS.has(ext) && !Array.isArray(symbols.astNodes)) { - const nodeIdMap = new Map(); - for (const row of bulkNodeIdsByFile(db, relPath)) { - nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id); - } - astVisitor = createAstStoreVisitor(astTypeMap, defs, relPath, nodeIdMap); - visitors.push(astVisitor); - } + const astVisitor = opts.ast !== false ? setupAstVisitor(db, relPath, symbols, langId, ext) : null; + if (astVisitor) visitors.push(astVisitor); - // Complexity visitor (file-level mode) - let complexityVisitor: Visitor | null = null; - const cRules = COMPLEXITY_RULES.get(langId); - const hRules = HALSTEAD_RULES.get(langId); - if (doComplexity && cRules) { - // Only trigger WASM complexity for definitions with real function bodies. - // Interface/type property signatures (dotted names, single-line span) - // correctly lack native complexity data and should not trigger a fallback. - const needsWasmComplexity = defs.some( - (d) => - (d.kind === 'function' || d.kind === 'method') && - d.line > 0 && - d.endLine != null && - d.endLine > d.line && - !d.name.includes('.') && - !d.complexity, - ); - if (needsWasmComplexity) { - complexityVisitor = createComplexityVisitor(cRules, hRules, { fileLevelWalk: true, langId }); - visitors.push(complexityVisitor); - - for (const t of cRules.nestingNodes) walkerOpts.nestingNodeTypes?.add(t); - - const dfRules = DATAFLOW_RULES.get(langId); - walkerOpts.getFunctionName = (node: TreeSitterNode): string | null => { - const nameNode = node.childForFieldName('name'); - if (nameNode) return nameNode.text; - if (dfRules) return getFuncName(node, dfRules as any); - return null; - }; - } - } + const complexityVisitor = + opts.complexity !== false ? setupComplexityVisitorForFile(defs, langId, walkerOpts) : null; + if (complexityVisitor) visitors.push(complexityVisitor); - // CFG visitor - let cfgVisitor: Visitor | null = null; - const cfgRulesForLang = CFG_RULES.get(langId); - if (doCfg && cfgRulesForLang && CFG_EXTENSIONS.has(ext)) { - const needsWasmCfg = defs.some( - (d) => - (d.kind === 'function' || d.kind === 'method') && - d.line > 0 && - d.endLine != null && - d.endLine > d.line && - !d.name.includes('.') && - d.cfg !== null && - !Array.isArray(d.cfg?.blocks), - ); - if (needsWasmCfg) { - cfgVisitor = createCfgVisitor(cfgRulesForLang); - visitors.push(cfgVisitor); - } - } + const cfgVisitor = opts.cfg !== false ? setupCfgVisitorForFile(defs, langId, ext) : null; + if (cfgVisitor) visitors.push(cfgVisitor); - // Dataflow visitor let dataflowVisitor: Visitor | null = null; const dfRules = DATAFLOW_RULES.get(langId); - if (doDataflow && dfRules && DATAFLOW_EXTENSIONS.has(ext) && !symbols.dataflow) { + if (opts.dataflow !== false && dfRules && DATAFLOW_EXTENSIONS.has(ext) && !symbols.dataflow) { dataflowVisitor = createDataflowVisitor(dfRules); visitors.push(dataflowVisitor); } @@ -261,88 +275,80 @@ function setupVisitors( // ─── Result storage helpers ───────────────────────────────────────────── -function storeComplexityResults(results: WalkResults, defs: Definition[], langId: string): void { - const complexityResults = (results.complexity || []) as ComplexityFuncResult[]; - const resultByLine = new Map(); - for (const r of complexityResults) { - if (r.funcNode) { - const line = r.funcNode.startPosition.row + 1; - if (!resultByLine.has(line)) resultByLine.set(line, []); - resultByLine.get(line)?.push(r); - } +/** Index per-function results by start line for O(1) lookup. */ +function indexByLine(results: T[]): Map { + const byLine = new Map(); + for (const r of results) { + if (!r.funcNode) continue; + const line = r.funcNode.startPosition.row + 1; + if (!byLine.has(line)) byLine.set(line, []); + byLine.get(line)?.push(r); } + return byLine; +} + +/** Find the best matching result for a definition by line + name. */ +function matchResultToDef( + candidates: T[] | undefined, + defName: string, +): T | undefined { + if (!candidates) return undefined; + if (candidates.length === 1) return candidates[0]; + return ( + candidates.find((r) => { + const n = r.funcNode.childForFieldName('name'); + return n && n.text === defName; + }) ?? candidates[0] + ); +} + +function storeComplexityResults(results: WalkResults, defs: Definition[], langId: string): void { + const byLine = indexByLine((results.complexity || []) as ComplexityFuncResult[]); for (const def of defs) { if ((def.kind === 'function' || def.kind === 'method') && def.line && !def.complexity) { - const candidates = resultByLine.get(def.line); - const funcResult = !candidates - ? undefined - : candidates.length === 1 - ? candidates[0] - : (candidates.find((r) => { - const n = r.funcNode.childForFieldName('name'); - return n && n.text === def.name; - }) ?? candidates[0]); - if (funcResult) { - const { metrics } = funcResult; - const loc = computeLOCMetrics(funcResult.funcNode, langId); - const volume = metrics.halstead ? metrics.halstead.volume : 0; - const commentRatio = loc.loc > 0 ? loc.commentLines / loc.loc : 0; - const mi = computeMaintainabilityIndex(volume, metrics.cyclomatic, loc.sloc, commentRatio); - - def.complexity = { - cognitive: metrics.cognitive, - cyclomatic: metrics.cyclomatic, - maxNesting: metrics.maxNesting, - halstead: metrics.halstead, - loc, - maintainabilityIndex: mi, - }; - } + const funcResult = matchResultToDef(byLine.get(def.line), def.name); + if (!funcResult) continue; + const { metrics } = funcResult; + const loc = computeLOCMetrics(funcResult.funcNode, langId); + const volume = metrics.halstead ? metrics.halstead.volume : 0; + const commentRatio = loc.loc > 0 ? loc.commentLines / loc.loc : 0; + const mi = computeMaintainabilityIndex(volume, metrics.cyclomatic, loc.sloc, commentRatio); + def.complexity = { + cognitive: metrics.cognitive, + cyclomatic: metrics.cyclomatic, + maxNesting: metrics.maxNesting, + halstead: metrics.halstead, + loc, + maintainabilityIndex: mi, + }; } } } function storeCfgResults(results: WalkResults, defs: Definition[]): void { - const cfgResults = (results.cfg || []) as CfgFuncResult[]; - const cfgByLine = new Map(); - for (const r of cfgResults) { - if (r.funcNode) { - const line = r.funcNode.startPosition.row + 1; - if (!cfgByLine.has(line)) cfgByLine.set(line, []); - cfgByLine.get(line)?.push(r); - } - } + const byLine = indexByLine((results.cfg || []) as CfgFuncResult[]); for (const def of defs) { if ( (def.kind === 'function' || def.kind === 'method') && def.line && !def.cfg?.blocks?.length ) { - const candidates = cfgByLine.get(def.line); - const cfgResult = !candidates - ? undefined - : candidates.length === 1 - ? candidates[0] - : (candidates.find((r) => { - const n = r.funcNode.childForFieldName('name'); - return n && n.text === def.name; - }) ?? candidates[0]); - if (cfgResult) { - def.cfg = { blocks: cfgResult.blocks, edges: cfgResult.edges }; - - // Override complexity's cyclomatic with CFG-derived value (single source of truth) - if (def.complexity && cfgResult.cyclomatic != null) { - def.complexity.cyclomatic = cfgResult.cyclomatic; - const { loc, halstead } = def.complexity; - const volume = halstead ? halstead.volume : 0; - const commentRatio = loc && loc.loc > 0 ? loc.commentLines / loc.loc : 0; - def.complexity.maintainabilityIndex = computeMaintainabilityIndex( - volume, - cfgResult.cyclomatic, - loc?.sloc ?? 0, - commentRatio, - ); - } + const cfgResult = matchResultToDef(byLine.get(def.line), def.name); + if (!cfgResult) continue; + def.cfg = { blocks: cfgResult.blocks, edges: cfgResult.edges }; + + // Override complexity's cyclomatic with CFG-derived value (single source of truth) + if (def.complexity && cfgResult.cyclomatic != null) { + def.complexity.cyclomatic = cfgResult.cyclomatic; + const { loc, halstead } = def.complexity; + const volume = halstead ? halstead.volume : 0; + const commentRatio = loc && loc.loc > 0 ? loc.commentLines / loc.loc : 0; + def.complexity.maintainabilityIndex = computeMaintainabilityIndex( + volume, + cfgResult.cyclomatic, + loc?.sloc ?? 0, + commentRatio, + ); } } } diff --git a/src/ast-analysis/visitors/complexity-visitor.ts b/src/ast-analysis/visitors/complexity-visitor.ts index ffbf47ab..6decd648 100644 --- a/src/ast-analysis/visitors/complexity-visitor.ts +++ b/src/ast-analysis/visitors/complexity-visitor.ts @@ -87,6 +87,16 @@ function classifyBranchNode( } } +function classifyLogicalOp(node: TreeSitterNode, cRules: AnyRules, acc: ComplexityAcc): void { + const op = node.child(1)?.type; + if (!op || !cRules.logicalOperators.has(op)) return; + acc.cyclomatic++; + const parent = node.parent; + const sameSequence = + parent != null && parent.type === cRules.logicalNodeType && parent.child(1)?.type === op; + if (!sameSequence) acc.cognitive++; +} + function classifyPlainElse( node: TreeSitterNode, type: string, @@ -215,17 +225,7 @@ export function createComplexityVisitor( if (nestingLevel > acc.maxNesting) acc.maxNesting = nestingLevel; if (type === cRules.logicalNodeType) { - const op = node.child(1)?.type; - if (op && cRules.logicalOperators.has(op)) { - acc.cyclomatic++; - const parent = node.parent; - let sameSequence = false; - if (parent && parent.type === cRules.logicalNodeType) { - const parentOp = parent.child(1)?.type; - if (parentOp === op) sameSequence = true; - } - if (!sameSequence) acc.cognitive++; - } + classifyLogicalOp(node, cRules, acc); } if (type === cRules.optionalChainType) acc.cyclomatic++; diff --git a/src/db/connection.ts b/src/db/connection.ts index 1513717d..89b8845f 100644 --- a/src/db/connection.ts +++ b/src/db/connection.ts @@ -29,6 +29,23 @@ function getPackageVersion(): string { /** Warn once per process when DB version mismatches the running codegraph version. */ let _versionWarned = false; +/** Check and warn (once) if the running codegraph version differs from the DB build version. */ +function warnOnVersionMismatch(getBuildVersion: () => string | undefined | null): void { + if (_versionWarned) return; + _versionWarned = true; + try { + const buildVersion = getBuildVersion(); + const currentVersion = getPackageVersion(); + if (buildVersion && currentVersion && buildVersion !== currentVersion) { + warn( + `DB was built with codegraph v${buildVersion}, running v${currentVersion}. Consider: codegraph build --no-incremental`, + ); + } + } catch { + // build_meta table may not exist in older DBs — silently ignore + } +} + /** DB instance with optional advisory lock path. */ export type LockedDatabase = BetterSqlite3Database & { __lockPath?: string }; @@ -81,11 +98,6 @@ export function _resetRepoRootCache(): void { _cachedRepoRootCwd = undefined; } -/** Reset the version warning flag (for testing). */ -export function _resetVersionWarning(): void { - _versionWarned = false; -} - function isProcessAlive(pid: number): boolean { try { process.kill(pid, 0); @@ -299,28 +311,41 @@ export function openReadonlyOrFail(customPath?: string): BetterSqlite3Database { const Database = getDatabase(); const db = new Database(dbPath, { readonly: true }) as unknown as BetterSqlite3Database; - // Warn once per process if the DB was built with a different codegraph version - if (!_versionWarned) { - try { - const row = db - .prepare<{ value: string }>('SELECT value FROM build_meta WHERE key = ?') - .get('codegraph_version'); - const buildVersion = row?.value; - const currentVersion = getPackageVersion(); - if (buildVersion && currentVersion && buildVersion !== currentVersion) { - warn( - `DB was built with codegraph v${buildVersion}, running v${currentVersion}. Consider: codegraph build --no-incremental`, - ); - } - } catch { - // build_meta table may not exist in older DBs — silently ignore - } - _versionWarned = true; - } + warnOnVersionMismatch(() => { + const row = db + .prepare<{ value: string }>('SELECT value FROM build_meta WHERE key = ?') + .get('codegraph_version'); + return row?.value; + }); return db; } +/** Open a NativeRepository via rusqlite, throwing DbError if the DB file is missing. */ +function openRepoNative(customDbPath?: string): { repo: Repository; close(): void } { + const dbPath = findDbPath(customDbPath); + if (!fs.existsSync(dbPath)) { + throw new DbError( + `No codegraph database found at ${dbPath}.\nRun "codegraph build" first to analyze your codebase.`, + { file: dbPath }, + ); + } + const native = getNative(); + const ndb = native.NativeDatabase.openReadonly(dbPath); + try { + warnOnVersionMismatch(() => ndb.getBuildMeta('codegraph_version')); + return { + repo: new NativeRepository(ndb), + close() { + ndb.close(); + }, + }; + } catch (innerErr) { + ndb.close(); + throw innerErr; + } +} + /** * Open a Repository from either an injected instance or a DB path. * @@ -345,42 +370,7 @@ export function openRepo( // Try native rusqlite path first (Phase 6.14) if (isNativeAvailable()) { try { - const dbPath = findDbPath(customDbPath); - if (!fs.existsSync(dbPath)) { - throw new DbError( - `No codegraph database found at ${dbPath}.\nRun "codegraph build" first to analyze your codebase.`, - { file: dbPath }, - ); - } - const native = getNative(); - const ndb = native.NativeDatabase.openReadonly(dbPath); - try { - // Version check (same logic as openReadonlyOrFail) - if (!_versionWarned) { - try { - const buildVersion = ndb.getBuildMeta('codegraph_version'); - const currentVersion = getPackageVersion(); - if (buildVersion && currentVersion && buildVersion !== currentVersion) { - warn( - `DB was built with codegraph v${buildVersion}, running v${currentVersion}. Consider: codegraph build --no-incremental`, - ); - } - } catch { - // build_meta table may not exist in older DBs - } - _versionWarned = true; - } - - return { - repo: new NativeRepository(ndb), - close() { - ndb.close(); - }, - }; - } catch (innerErr) { - ndb.close(); - throw innerErr; - } + return openRepoNative(customDbPath); } catch (e) { // Re-throw user-visible errors (e.g. DB not found) — only silently // fall back for native-engine failures (e.g. incompatible native binary). diff --git a/src/db/migrations.ts b/src/db/migrations.ts index 03828b49..2dfd052b 100644 --- a/src/db/migrations.ts +++ b/src/db/migrations.ts @@ -304,7 +304,8 @@ export function setBuildMeta( tx(); } -export function initSchema(db: BetterSqlite3Database): void { +/** Run numbered migrations that haven't been applied yet. */ +function applyMigrations(db: BetterSqlite3Database): void { db.exec(`CREATE TABLE IF NOT EXISTS schema_version (version INTEGER NOT NULL DEFAULT 0)`); const row = db.prepare<{ version: number }>('SELECT version FROM schema_version').get(); @@ -322,40 +323,43 @@ export function initSchema(db: BetterSqlite3Database): void { currentVersion = migration.version; } } +} - // Legacy column compat — add columns that may be missing from pre-migration DBs +/** Ensure columns and indexes exist for pre-migration DBs (legacy compat). */ +function ensureLegacyColumns(db: BetterSqlite3Database): void { if (hasTable(db, 'nodes')) { - if (!hasColumn(db, 'nodes', 'end_line')) { - db.exec('ALTER TABLE nodes ADD COLUMN end_line INTEGER'); - } - if (!hasColumn(db, 'nodes', 'role')) { - db.exec('ALTER TABLE nodes ADD COLUMN role TEXT'); - } - db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_role ON nodes(role)'); - if (!hasColumn(db, 'nodes', 'parent_id')) { - db.exec('ALTER TABLE nodes ADD COLUMN parent_id INTEGER REFERENCES nodes(id)'); - } - db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_parent ON nodes(parent_id)'); - db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_kind_parent ON nodes(kind, parent_id)'); - if (!hasColumn(db, 'nodes', 'qualified_name')) { - db.exec('ALTER TABLE nodes ADD COLUMN qualified_name TEXT'); - } - if (!hasColumn(db, 'nodes', 'scope')) { - db.exec('ALTER TABLE nodes ADD COLUMN scope TEXT'); - } - if (!hasColumn(db, 'nodes', 'visibility')) { - db.exec('ALTER TABLE nodes ADD COLUMN visibility TEXT'); - } - db.exec('UPDATE nodes SET qualified_name = name WHERE qualified_name IS NULL'); - db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_qualified_name ON nodes(qualified_name)'); - db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_scope ON nodes(scope)'); + ensureNodeColumns(db); } if (hasTable(db, 'edges')) { - if (!hasColumn(db, 'edges', 'confidence')) { - db.exec('ALTER TABLE edges ADD COLUMN confidence REAL DEFAULT 1.0'); - } - if (!hasColumn(db, 'edges', 'dynamic')) { - db.exec('ALTER TABLE edges ADD COLUMN dynamic INTEGER DEFAULT 0'); - } + ensureEdgeColumns(db); } } + +function ensureNodeColumns(db: BetterSqlite3Database): void { + const missing = (col: string) => !hasColumn(db, 'nodes', col); + if (missing('end_line')) db.exec('ALTER TABLE nodes ADD COLUMN end_line INTEGER'); + if (missing('role')) db.exec('ALTER TABLE nodes ADD COLUMN role TEXT'); + db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_role ON nodes(role)'); + if (missing('parent_id')) + db.exec('ALTER TABLE nodes ADD COLUMN parent_id INTEGER REFERENCES nodes(id)'); + db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_parent ON nodes(parent_id)'); + db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_kind_parent ON nodes(kind, parent_id)'); + if (missing('qualified_name')) db.exec('ALTER TABLE nodes ADD COLUMN qualified_name TEXT'); + if (missing('scope')) db.exec('ALTER TABLE nodes ADD COLUMN scope TEXT'); + if (missing('visibility')) db.exec('ALTER TABLE nodes ADD COLUMN visibility TEXT'); + db.exec('UPDATE nodes SET qualified_name = name WHERE qualified_name IS NULL'); + db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_qualified_name ON nodes(qualified_name)'); + db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_scope ON nodes(scope)'); +} + +function ensureEdgeColumns(db: BetterSqlite3Database): void { + if (!hasColumn(db, 'edges', 'confidence')) + db.exec('ALTER TABLE edges ADD COLUMN confidence REAL DEFAULT 1.0'); + if (!hasColumn(db, 'edges', 'dynamic')) + db.exec('ALTER TABLE edges ADD COLUMN dynamic INTEGER DEFAULT 0'); +} + +export function initSchema(db: BetterSqlite3Database): void { + applyMigrations(db); + ensureLegacyColumns(db); +} diff --git a/src/domain/analysis/context.ts b/src/domain/analysis/context.ts index 9f3f42dd..2d595548 100644 --- a/src/domain/analysis/context.ts +++ b/src/domain/analysis/context.ts @@ -15,10 +15,8 @@ import { getComplexityForNode, getLineCountForNode, getMaxEndLineForFile, - openReadonlyOrFail, } from '../../db/index.js'; import { cachedStmt } from '../../db/repository/cached-stmt.js'; -import { loadConfig } from '../../infrastructure/config.js'; import { debug } from '../../infrastructure/logger.js'; import { isTestFile } from '../../infrastructure/test-filter.js'; import { @@ -40,6 +38,7 @@ import type { RelatedNodeRow, StmtCache, } from '../../types.js'; +import { resolveAnalysisOpts, withReadonlyDb } from './query-helpers.js'; import { findMatchingNodes } from './symbol-lookup.js'; interface DisplayOpts { @@ -52,6 +51,60 @@ interface DisplayOpts { [key: string]: unknown; } +/** Format a callee row into the output shape with summary and source. */ +function formatCalleeRow( + c: RelatedNodeRow, + repoRoot: string, + getFileLines: (file: string) => string[] | null, + displayOpts: DisplayOpts, + includeSource: boolean, +) { + const cLines = getFileLines(c.file); + return { + name: c.name, + kind: c.kind, + file: c.file, + line: c.line, + endLine: c.end_line || null, + summary: cLines ? extractSummary(cLines, c.line, displayOpts) : null, + source: includeSource + ? readSourceRange(repoRoot, c.file, c.line, c.end_line ?? undefined, displayOpts) + : null, + }; +} + +/** BFS to collect deeper callees beyond the first level. */ +function collectDeeperCallees( + db: BetterSqlite3Database, + startIds: number[], + rootId: number, + repoRoot: string, + getFileLines: (file: string) => string[] | null, + opts: { noTests: boolean; maxDepth: number; displayOpts: DisplayOpts }, +) { + const { noTests, maxDepth, displayOpts } = opts; + const visited = new Set(startIds); + visited.add(rootId); + let frontier = [...startIds]; + const result: ReturnType[] = []; + + for (let d = 2; d <= maxDepth; d++) { + const nextFrontier: number[] = []; + for (const fid of frontier) { + const deeper = findCallees(db, fid) as RelatedNodeRow[]; + for (const c of deeper) { + if (visited.has(c.id) || (noTests && isTestFile(c.file))) continue; + visited.add(c.id); + nextFrontier.push(c.id); + result.push(formatCalleeRow(c, repoRoot, getFileLines, displayOpts, true)); + } + } + frontier = nextFrontier; + if (frontier.length === 0) break; + } + return result; +} + function buildCallees( db: BetterSqlite3Database, node: NodeRow, @@ -63,65 +116,20 @@ function buildCallees( const calleeRows = findCallees(db, node.id) as RelatedNodeRow[]; const filteredCallees = noTests ? calleeRows.filter((c) => !isTestFile(c.file)) : calleeRows; - const callees = filteredCallees.map((c) => { - const cLines = getFileLines(c.file); - const summary = cLines ? extractSummary(cLines, c.line, displayOpts) : null; - let calleeSource: string | null = null; - if (depth >= 1) { - calleeSource = readSourceRange( - repoRoot, - c.file, - c.line, - c.end_line ?? undefined, - displayOpts, - ); - } - return { - name: c.name, - kind: c.kind, - file: c.file, - line: c.line, - endLine: c.end_line || null, - summary, - source: calleeSource, - }; - }); + const callees = filteredCallees.map((c) => + formatCalleeRow(c, repoRoot, getFileLines, displayOpts, depth >= 1), + ); if (depth > 1) { - const visited = new Set(filteredCallees.map((c) => c.id)); - visited.add(node.id); - let frontier = filteredCallees.map((c) => c.id); - const maxDepth = Math.min(depth, 5); - for (let d = 2; d <= maxDepth; d++) { - const nextFrontier: number[] = []; - for (const fid of frontier) { - const deeper = findCallees(db, fid) as RelatedNodeRow[]; - for (const c of deeper) { - if (!visited.has(c.id) && (!noTests || !isTestFile(c.file))) { - visited.add(c.id); - nextFrontier.push(c.id); - const cLines = getFileLines(c.file); - callees.push({ - name: c.name, - kind: c.kind, - file: c.file, - line: c.line, - endLine: c.end_line || null, - summary: cLines ? extractSummary(cLines, c.line, displayOpts) : null, - source: readSourceRange( - repoRoot, - c.file, - c.line, - c.end_line ?? undefined, - displayOpts, - ), - }); - } - } - } - frontier = nextFrontier; - if (frontier.length === 0) break; - } + const deeper = collectDeeperCallees( + db, + filteredCallees.map((c) => c.id), + node.id, + repoRoot, + getFileLines, + { noTests, maxDepth: Math.min(depth, 5), displayOpts }, + ); + callees.push(...deeper); } return callees; @@ -433,15 +441,12 @@ export function contextData( config?: any; } = {}, ) { - const db = openReadonlyOrFail(customDbPath); - try { + return withReadonlyDb(customDbPath, (db) => { const depth = opts.depth || 0; const noSource = opts.noSource || false; - const noTests = opts.noTests || false; const includeTests = opts.includeTests || false; - const config = opts.config || loadConfig(); - const displayOpts: DisplayOpts = config.display || {}; + const { noTests, displayOpts } = resolveAnalysisOpts(opts); const dbPath = findDbPath(customDbPath); const repoRoot = path.resolve(path.dirname(dbPath), '..'); @@ -494,9 +499,7 @@ export function contextData( const base = { name, results }; return paginateResult(base, 'results', { limit: opts.limit, offset: opts.offset }); - } finally { - db.close(); - } + }); } export function explainData( @@ -510,14 +513,11 @@ export function explainData( config?: any; } = {}, ) { - const db = openReadonlyOrFail(customDbPath); - try { - const noTests = opts.noTests || false; + return withReadonlyDb(customDbPath, (db) => { const depth = opts.depth || 0; const kind = isFileLikeTarget(target) ? 'file' : 'function'; - const config = opts.config || loadConfig(); - const displayOpts: DisplayOpts = config.display || {}; + const { noTests, displayOpts } = resolveAnalysisOpts(opts); const dbPath = findDbPath(customDbPath); const repoRoot = path.resolve(path.dirname(dbPath), '..'); @@ -536,7 +536,5 @@ export function explainData( const base = { target, kind, results }; return paginateResult(base, 'results', { limit: opts.limit, offset: opts.offset }); - } finally { - db.close(); - } + }); } diff --git a/src/domain/analysis/dependencies.ts b/src/domain/analysis/dependencies.ts index 7da8e898..37211b0e 100644 --- a/src/domain/analysis/dependencies.ts +++ b/src/domain/analysis/dependencies.ts @@ -5,7 +5,6 @@ import { findImportSources, findImportTargets, findNodesByFile, - openReadonlyOrFail, } from '../../db/index.js'; import { cachedStmt } from '../../db/repository/cached-stmt.js'; import { isTestFile } from '../../infrastructure/test-filter.js'; @@ -19,6 +18,7 @@ import type { RelatedNodeRow, StmtCache, } from '../../types.js'; +import { withReadonlyDb } from './query-helpers.js'; import { findMatchingNodes } from './symbol-lookup.js'; type UpstreamRow = { id: number; name: string; kind: string; file: string; line: number }; @@ -32,8 +32,7 @@ export function fileDepsData( customDbPath: string, opts: { noTests?: boolean; limit?: number; offset?: number } = {}, ) { - const db = openReadonlyOrFail(customDbPath); - try { + return withReadonlyDb(customDbPath, (db) => { const noTests = opts.noTests || false; const fileNodes = findFileNodes(db, `%${file}%`) as NodeRow[]; if (fileNodes.length === 0) { @@ -59,9 +58,7 @@ export function fileDepsData( const base = { file, results }; return paginateResult(base, 'results', { limit: opts.limit, offset: opts.offset }); - } finally { - db.close(); - } + }); } /** @@ -140,8 +137,7 @@ export function fnDepsData( offset?: number; } = {}, ) { - const db = openReadonlyOrFail(customDbPath); - try { + return withReadonlyDb(customDbPath, (db) => { const depth = opts.depth || 3; const noTests = opts.noTests || false; const hc = new Map(); @@ -194,9 +190,7 @@ export function fnDepsData( const base = { name, results }; return paginateResult(base, 'results', { limit: opts.limit, offset: opts.offset }); - } finally { - db.close(); - } + }); } /** @@ -384,8 +378,7 @@ export function pathData( kind?: string; } = {}, ) { - const db = openReadonlyOrFail(customDbPath); - try { + return withReadonlyDb(customDbPath, (db) => { const noTests = opts.noTests || false; const maxDepth = opts.maxDepth || 10; const edgeKinds = opts.edgeKinds || ['calls']; @@ -477,13 +470,67 @@ export function pathData( reverse, maxDepth, }; - } finally { - db.close(); - } + }); } // ── File-level shortest path ──────────────────────────────────────────── +/** BFS over file adjacency graph to find shortest path. */ +function bfsFilePath( + neighborStmt: ReturnType, + sourceFile: string, + targetFile: string, + edgeKinds: string[], + maxDepth: number, + noTests: boolean, +): { found: boolean; path: string[]; alternateCount: number } { + const visited = new Set([sourceFile]); + const parentMap = new Map(); + let queue = [sourceFile]; + let found = false; + let alternateCount = 0; + + for (let depth = 1; depth <= maxDepth; depth++) { + const nextQueue: string[] = []; + for (const currentFile of queue) { + const neighbors = neighborStmt.all(currentFile, ...edgeKinds) as Array<{ + neighbor_file: string; + }>; + for (const n of neighbors) { + if (noTests && isTestFile(n.neighbor_file)) continue; + if (n.neighbor_file === targetFile) { + if (!found) { + found = true; + parentMap.set(n.neighbor_file, currentFile); + } + alternateCount++; + continue; + } + if (!visited.has(n.neighbor_file)) { + visited.add(n.neighbor_file); + parentMap.set(n.neighbor_file, currentFile); + nextQueue.push(n.neighbor_file); + } + } + } + if (found) break; + queue = nextQueue; + if (queue.length === 0) break; + } + + if (!found) return { found: false, path: [], alternateCount: 0 }; + + // Reconstruct path + const filePath: string[] = [targetFile]; + let cur = targetFile; + while (cur !== sourceFile) { + cur = parentMap.get(cur)!; + filePath.push(cur); + } + filePath.reverse(); + return { found: true, path: filePath, alternateCount: Math.max(0, alternateCount - 1) }; +} + /** * BFS at the file level: find shortest import/edge path between two files. * Adjacency: file A → file B if any symbol in A has an edge to any symbol in B. @@ -499,8 +546,7 @@ export function filePathData( reverse?: boolean; } = {}, ) { - const db = openReadonlyOrFail(customDbPath); - try { + return withReadonlyDb(customDbPath, (db) => { const noTests = opts.noTests || false; const maxDepth = opts.maxDepth || 10; const edgeKinds = opts.edgeKinds || ['imports', 'imports-type']; @@ -569,42 +615,17 @@ export function filePathData( WHERE n_src.file = ? AND e.kind IN (${kindPlaceholders}) AND n_tgt.file != n_src.file`; const neighborStmt = db.prepare(neighborQuery); - // BFS - const visited = new Set([sourceFile]); - const parentMap = new Map(); - let queue = [sourceFile]; - let found = false; - let alternateCount = 0; - - for (let depth = 1; depth <= maxDepth; depth++) { - const nextQueue: string[] = []; - for (const currentFile of queue) { - const neighbors = neighborStmt.all(currentFile, ...edgeKinds) as Array<{ - neighbor_file: string; - }>; - for (const n of neighbors) { - if (noTests && isTestFile(n.neighbor_file)) continue; - if (n.neighbor_file === targetFile) { - if (!found) { - found = true; - parentMap.set(n.neighbor_file, currentFile); - } - alternateCount++; - continue; - } - if (!visited.has(n.neighbor_file)) { - visited.add(n.neighbor_file); - parentMap.set(n.neighbor_file, currentFile); - nextQueue.push(n.neighbor_file); - } - } - } - if (found) break; - queue = nextQueue; - if (queue.length === 0) break; - } + // BFS to find shortest file path + const bfsResult = bfsFilePath( + neighborStmt, + sourceFile, + targetFile, + edgeKinds, + maxDepth, + noTests, + ); - if (!found) { + if (!bfsResult.found) { return { from, to, @@ -620,29 +641,18 @@ export function filePathData( }; } - // Reconstruct path - const filePath: string[] = [targetFile]; - let cur = targetFile; - while (cur !== sourceFile) { - cur = parentMap.get(cur)!; - filePath.push(cur); - } - filePath.reverse(); - return { from, to, fromCandidates, toCandidates, found: true, - hops: filePath.length - 1, - path: filePath, - alternateCount: Math.max(0, alternateCount - 1), + hops: bfsResult.path.length - 1, + path: bfsResult.path, + alternateCount: bfsResult.alternateCount, edgeKinds, reverse, maxDepth, }; - } finally { - db.close(); - } + }); } diff --git a/src/domain/analysis/exports.ts b/src/domain/analysis/exports.ts index f0162e0e..62f97c43 100644 --- a/src/domain/analysis/exports.ts +++ b/src/domain/analysis/exports.ts @@ -4,10 +4,8 @@ import { findDbPath, findFileNodes, findNodesByFile, - openReadonlyOrFail, } from '../../db/index.js'; import { cachedStmt } from '../../db/repository/cached-stmt.js'; -import { loadConfig } from '../../infrastructure/config.js'; import { debug } from '../../infrastructure/logger.js'; import { isTestFile } from '../../infrastructure/test-filter.js'; import { @@ -17,6 +15,7 @@ import { } from '../../shared/file-utils.js'; import { paginateResult } from '../../shared/paginate.js'; import type { BetterSqlite3Database, NodeRow, StmtCache } from '../../types.js'; +import { resolveAnalysisOpts, withReadonlyDb } from './query-helpers.js'; /** Cache the schema probe for the `exported` column per db handle. */ const _hasExportedColCache: WeakMap = new WeakMap(); @@ -37,12 +36,8 @@ export function exportsData( config?: any; } = {}, ) { - const db = openReadonlyOrFail(customDbPath); - try { - const noTests = opts.noTests || false; - - const config = opts.config || loadConfig(); - const displayOpts = config.display || {}; + return withReadonlyDb(customDbPath, (db) => { + const { noTests, displayOpts } = resolveAnalysisOpts(opts); const dbFilePath = findDbPath(customDbPath); const repoRoot = path.resolve(path.dirname(dbFilePath), '..'); @@ -101,9 +96,39 @@ export function exportsData( } } return paginated; - } finally { - db.close(); + }); +} + +/** Collect symbols re-exported through barrel files. */ +function collectReexportedSymbols( + db: BetterSqlite3Database, + fileNodeId: number, + reexportsToStmt: ReturnType, + exportedNodesStmt: ReturnType | null, + hasExportedCol: boolean, + getFileLines: (file: string) => string[] | null, + buildSymbolResult: (s: NodeRow, fileLines: string[] | null) => any, +) { + const reexportTargets = reexportsToStmt.all(fileNodeId) as Array<{ file: string }>; + const reexportedSymbols: Array & { originFile: string }> = + []; + for (const reexTarget of reexportTargets) { + let targetExported: NodeRow[]; + if (hasExportedCol) { + targetExported = exportedNodesStmt!.all(reexTarget.file) as NodeRow[]; + } else { + const targetSymbols = findNodesByFile(db, reexTarget.file) as NodeRow[]; + const exportedIds = findCrossFileCallTargets(db, reexTarget.file) as Set; + targetExported = targetSymbols.filter((s) => exportedIds.has(s.id)); + } + for (const s of targetExported) { + reexportedSymbols.push({ + ...buildSymbolResult(s, getFileLines(reexTarget.file)), + originFile: reexTarget.file, + }); + } } + return reexportedSymbols; } function exportsFileImpl( @@ -197,34 +222,20 @@ function exportsFileImpl( const totalUnused = results.filter((r) => r.consumerCount === 0).length; - // Files that re-export this file (barrel -> this file) const reexports = (reexportsFromStmt.all(fn.id) as Array<{ file: string }>).map((r) => ({ file: r.file, })); - // For barrel files: gather symbols re-exported from target modules - const reexportTargets = reexportsToStmt.all(fn.id) as Array<{ file: string }>; - - const reexportedSymbols: Array & { originFile: string }> = - []; - for (const reexTarget of reexportTargets) { - let targetExported: NodeRow[]; - if (hasExportedCol) { - targetExported = exportedNodesStmt!.all(reexTarget.file) as NodeRow[]; - } else { - // Fallback: same heuristic as direct exports — symbols called from other files - const targetSymbols = findNodesByFile(db, reexTarget.file) as NodeRow[]; - const exportedIds = findCrossFileCallTargets(db, reexTarget.file) as Set; - targetExported = targetSymbols.filter((s) => exportedIds.has(s.id)); - } - for (const s of targetExported) { - const fileLines = getFileLines(reexTarget.file); - reexportedSymbols.push({ - ...buildSymbolResult(s, fileLines), - originFile: reexTarget.file, - }); - } - } + // Gather symbols re-exported from target modules (barrel file support) + const reexportedSymbols = collectReexportedSymbols( + db, + fn.id, + reexportsToStmt, + exportedNodesStmt, + hasExportedCol, + getFileLines, + buildSymbolResult, + ); let filteredResults = results; let filteredReexported = reexportedSymbols; diff --git a/src/domain/analysis/fn-impact.ts b/src/domain/analysis/fn-impact.ts index d4e47c3a..35e8638c 100644 --- a/src/domain/analysis/fn-impact.ts +++ b/src/domain/analysis/fn-impact.ts @@ -4,13 +4,12 @@ import { findImplementors, findImportDependents, findNodeById, - openReadonlyOrFail, } from '../../db/index.js'; -import { loadConfig } from '../../infrastructure/config.js'; import { isTestFile } from '../../infrastructure/test-filter.js'; import { normalizeSymbol } from '../../shared/normalize.js'; import { paginateResult } from '../../shared/paginate.js'; import type { BetterSqlite3Database, NodeRow, RelatedNodeRow } from '../../types.js'; +import { resolveAnalysisOpts, withReadonlyDb } from './query-helpers.js'; import { findMatchingNodes } from './symbol-lookup.js'; // --- Shared BFS: transitive callers --- @@ -36,6 +35,62 @@ function hasImplementsEdges(db: BetterSqlite3Database): boolean { * during traversal), its concrete implementors are also added to the frontier * so that changes to an interface signature propagate to all implementors. */ +type BfsLevel = Array<{ + name: string; + kind: string; + file: string; + line: number; + viaImplements?: boolean; +}>; +type BfsLevels = Record; +type BfsOnVisit = ( + caller: RelatedNodeRow & { viaImplements?: boolean }, + parentId: number, + depth: number, +) => void; + +/** Record an implementor node at the given depth, adding to frontier and levels. */ +function recordImplementor( + impl: RelatedNodeRow, + parentId: number, + depth: number, + visited: Set, + frontier: number[], + levels: BfsLevels, + noTests: boolean, + onVisit?: BfsOnVisit, +): void { + if (visited.has(impl.id) || (noTests && isTestFile(impl.file))) return; + visited.add(impl.id); + frontier.push(impl.id); + if (!levels[depth]) levels[depth] = []; + levels[depth].push({ + name: impl.name, + kind: impl.kind, + file: impl.file, + line: impl.line, + viaImplements: true, + }); + if (onVisit) onVisit({ ...impl, viaImplements: true }, parentId, depth); +} + +/** Expand implementors for an interface/trait node into the BFS frontier. */ +function expandImplementors( + db: BetterSqlite3Database, + nodeId: number, + depth: number, + visited: Set, + frontier: number[], + levels: BfsLevels, + noTests: boolean, + onVisit?: BfsOnVisit, +): void { + const impls = findImplementors(db, nodeId) as RelatedNodeRow[]; + for (const impl of impls) { + recordImplementor(impl, nodeId, depth, visited, frontier, levels, noTests, onVisit); + } +} + export function bfsTransitiveCallers( db: BetterSqlite3Database, startId: number, @@ -48,50 +103,24 @@ export function bfsTransitiveCallers( noTests?: boolean; maxDepth?: number; includeImplementors?: boolean; - onVisit?: ( - caller: RelatedNodeRow & { viaImplements?: boolean }, - parentId: number, - depth: number, - ) => void; + onVisit?: BfsOnVisit; } = {}, ) { - // Skip all implementor lookups when the graph has no implements edges const resolveImplementors = includeImplementors && hasImplementsEdges(db); - const visited = new Set([startId]); - const levels: Record< - number, - Array<{ name: string; kind: string; file: string; line: number; viaImplements?: boolean }> - > = {}; + const levels: BfsLevels = {}; let frontier = [startId]; - // Seed: if start node is an interface/trait, include its implementors at depth 1. - // Implementors go into a separate list so their callers appear at depth 2, not depth 1. + // Seed: if start node is an interface/trait, include its implementors at depth 1 const implNextFrontier: number[] = []; if (resolveImplementors) { const startNode = findNodeById(db, startId) as NodeRow | undefined; if (startNode && INTERFACE_LIKE_KINDS.has(startNode.kind)) { - const impls = findImplementors(db, startId) as RelatedNodeRow[]; - for (const impl of impls) { - if (!visited.has(impl.id) && (!noTests || !isTestFile(impl.file))) { - visited.add(impl.id); - implNextFrontier.push(impl.id); - if (!levels[1]) levels[1] = []; - levels[1].push({ - name: impl.name, - kind: impl.kind, - file: impl.file, - line: impl.line, - viaImplements: true, - }); - if (onVisit) onVisit({ ...impl, viaImplements: true }, startId, 1); - } - } + expandImplementors(db, startId, 1, visited, implNextFrontier, levels, noTests, onVisit); } } for (let d = 1; d <= maxDepth; d++) { - // On the first wave, merge seeded implementors so their callers appear at d=2 if (d === 1 && implNextFrontier.length > 0) { frontier = [...frontier, ...implNextFrontier]; } @@ -106,27 +135,8 @@ export function bfsTransitiveCallers( levels[d]!.push({ name: c.name, kind: c.kind, file: c.file, line: c.line }); if (onVisit) onVisit(c, fid, d); } - - // If a caller is an interface/trait, also pull in its implementors - // Implementors are one extra hop away, so record at d+1 if (resolveImplementors && INTERFACE_LIKE_KINDS.has(c.kind)) { - const impls = findImplementors(db, c.id) as RelatedNodeRow[]; - for (const impl of impls) { - if (!visited.has(impl.id) && (!noTests || !isTestFile(impl.file))) { - visited.add(impl.id); - nextFrontier.push(impl.id); - const implDepth = d + 1; - if (!levels[implDepth]) levels[implDepth] = []; - levels[implDepth].push({ - name: impl.name, - kind: impl.kind, - file: impl.file, - line: impl.line, - viaImplements: true, - }); - if (onVisit) onVisit({ ...impl, viaImplements: true }, c.id, implDepth); - } - } + expandImplementors(db, c.id, d + 1, visited, nextFrontier, levels, noTests, onVisit); } } } @@ -142,8 +152,7 @@ export function impactAnalysisData( customDbPath: string, opts: { noTests?: boolean } = {}, ) { - const db = openReadonlyOrFail(customDbPath); - try { + return withReadonlyDb(customDbPath, (db) => { const noTests = opts.noTests || false; const fileNodes = findFileNodes(db, `%${file}%`) as NodeRow[]; if (fileNodes.length === 0) { @@ -187,9 +196,7 @@ export function impactAnalysisData( levels: byLevel, totalDependents: visited.size - fileNodes.length, }; - } finally { - db.close(); - } + }); } export function fnImpactData( @@ -206,11 +213,9 @@ export function fnImpactData( config?: any; } = {}, ) { - const db = openReadonlyOrFail(customDbPath); - try { - const config = opts.config || loadConfig(); + return withReadonlyDb(customDbPath, (db) => { + const { noTests, config } = resolveAnalysisOpts(opts); const maxDepth = opts.depth || config.analysis?.fnImpactDepth || 5; - const noTests = opts.noTests || false; const hc = new Map(); const nodes = findMatchingNodes(db, name, { noTests, file: opts.file, kind: opts.kind }); @@ -235,7 +240,5 @@ export function fnImpactData( const base = { name, results }; return paginateResult(base, 'results', { limit: opts.limit, offset: opts.offset }); - } finally { - db.close(); - } + }); } diff --git a/src/domain/analysis/query-helpers.ts b/src/domain/analysis/query-helpers.ts new file mode 100644 index 00000000..414d3a45 --- /dev/null +++ b/src/domain/analysis/query-helpers.ts @@ -0,0 +1,35 @@ +import { openReadonlyOrFail } from '../../db/index.js'; +import { loadConfig } from '../../infrastructure/config.js'; +import type { BetterSqlite3Database, CodegraphConfig } from '../../types.js'; + +/** + * Open a readonly DB connection, run `fn`, and close the DB on completion. + * Eliminates the duplicated `openReadonlyOrFail` + `try/finally/db.close()` pattern + * that appears in every analysis query function. + */ +export function withReadonlyDb( + customDbPath: string | undefined, + fn: (db: BetterSqlite3Database) => T, +): T { + const db = openReadonlyOrFail(customDbPath); + try { + return fn(db); + } finally { + db.close(); + } +} + +/** + * Resolve common analysis options into a normalized form. + * Shared across fn-impact, context, dependencies, and exports modules. + */ +export function resolveAnalysisOpts(opts: { noTests?: boolean; config?: CodegraphConfig }): { + noTests: boolean; + config: CodegraphConfig; + displayOpts: Record; +} { + const noTests = opts.noTests || false; + const config = opts.config || loadConfig(); + const displayOpts = config.display || {}; + return { noTests, config, displayOpts }; +} diff --git a/src/domain/graph/builder/helpers.ts b/src/domain/graph/builder/helpers.ts index d0332109..ee03e73b 100644 --- a/src/domain/graph/builder/helpers.ts +++ b/src/domain/graph/builder/helpers.ts @@ -47,6 +47,17 @@ export const BUILTIN_RECEIVERS: Set = new Set([ 'require', ]); +/** Check if a directory entry should be skipped (ignored dirs, dotfiles). */ +function shouldSkipEntry(entry: fs.Dirent, extraIgnore: Set | null): boolean { + if (entry.name.startsWith('.') && entry.name !== '.') { + if (IGNORE_DIRS.has(entry.name)) return true; + if (entry.isDirectory()) return true; + } + if (IGNORE_DIRS.has(entry.name)) return true; + if (extraIgnore?.has(entry.name)) return true; + return false; +} + /** * Recursively collect all source files under `dir`. * When `directories` is a Set, also tracks which directories contain files. @@ -100,12 +111,7 @@ export function collectFiles( } for (const entry of entries) { - if (entry.name.startsWith('.') && entry.name !== '.') { - if (IGNORE_DIRS.has(entry.name)) continue; - if (entry.isDirectory()) continue; - } - if (IGNORE_DIRS.has(entry.name)) continue; - if (extraIgnore?.has(entry.name)) continue; + if (shouldSkipEntry(entry, extraIgnore)) continue; const full = path.join(dir, entry.name); if (entry.isDirectory()) { diff --git a/src/domain/graph/builder/incremental.ts b/src/domain/graph/builder/incremental.ts index 77c5e3ef..ad3cd635 100644 --- a/src/domain/graph/builder/incremental.ts +++ b/src/domain/graph/builder/incremental.ts @@ -10,7 +10,7 @@ import fs from 'node:fs'; import path from 'node:path'; import { bulkNodeIdsByFile } from '../../../db/index.js'; -import { warn } from '../../../infrastructure/logger.js'; +import { debug, warn } from '../../../infrastructure/logger.js'; import { normalizePath } from '../../../shared/constants.js'; import type { BetterSqlite3Database, @@ -154,7 +154,8 @@ async function parseReverseDep( let code: string; try { code = readFileSafe(absPath); - } catch { + } catch (e: unknown) { + debug(`parseReverseDep: cannot read ${absPath}: ${e instanceof Error ? e.message : String(e)}`); return null; } diff --git a/src/domain/graph/builder/stages/finalize.ts b/src/domain/graph/builder/stages/finalize.ts index 534b5cd1..3db559c0 100644 --- a/src/domain/graph/builder/stages/finalize.ts +++ b/src/domain/graph/builder/stages/finalize.ts @@ -17,13 +17,8 @@ import { CODEGRAPH_VERSION } from '../../../../shared/version.js'; import { writeJournalHeader } from '../../journal.js'; import type { PipelineContext } from '../context.js'; -export async function finalize(ctx: PipelineContext): Promise { - const { db, allSymbols, rootDir, isFullBuild, hasEmbeddings, config, opts, schemaVersion } = ctx; - const useNativeDb = ctx.engineName === 'native' && !!ctx.nativeDb; - - const t0 = performance.now(); - - // Release cached WASM trees +/** Release cached WASM parse trees to free memory. */ +function releaseWasmTrees(allSymbols: PipelineContext['allSymbols']): void { for (const [, symbols] of allSymbols) { const tree = symbols._tree as { delete?: () => void } | undefined; if (tree && typeof tree.delete === 'function') { @@ -36,133 +31,141 @@ export async function finalize(ctx: PipelineContext): Promise { symbols._tree = undefined; symbols._langId = undefined; } +} - // Capture a single wall-clock timestamp for the current build — used for - // both the stale-embeddings comparison and the persisted built_at metadata. - const buildNow = new Date(); +/** + * Detect significant drift between current and previous node/edge counts. + * Skipped for small incremental changes where count fluctuation is expected. + */ +function detectIncrementalDrift( + ctx: PipelineContext, + nodeCount: number, + actualEdgeCount: number, +): void { + const { db, allSymbols, config } = ctx; + const useNativeDb = ctx.engineName === 'native' && !!ctx.nativeDb; + if (ctx.isFullBuild || allSymbols.size <= 3) return; - const nodeCount = (db.prepare('SELECT COUNT(*) as c FROM nodes').get() as { c: number }).c; - const actualEdgeCount = (db.prepare('SELECT COUNT(*) as c FROM edges').get() as { c: number }).c; - info(`Graph built: ${nodeCount} nodes, ${actualEdgeCount} edges`); - info(`Stored in ${ctx.dbPath}`); + const prevNodes = useNativeDb + ? ctx.nativeDb!.getBuildMeta('node_count') + : getBuildMeta(db, 'node_count'); + const prevEdges = useNativeDb + ? ctx.nativeDb!.getBuildMeta('edge_count') + : getBuildMeta(db, 'edge_count'); + if (!prevNodes || !prevEdges) return; - // Incremental drift detection — skip for small incremental changes where - // count fluctuation is expected (reverse-dep edge churn). - if (!isFullBuild && allSymbols.size > 3) { - const prevNodes = useNativeDb - ? ctx.nativeDb!.getBuildMeta('node_count') - : getBuildMeta(db, 'node_count'); - const prevEdges = useNativeDb - ? ctx.nativeDb!.getBuildMeta('edge_count') - : getBuildMeta(db, 'edge_count'); - if (prevNodes && prevEdges) { - const prevN = Number(prevNodes); - const prevE = Number(prevEdges); - if (prevN > 0) { - const nodeDrift = Math.abs(nodeCount - prevN) / prevN; - const edgeDrift = prevE > 0 ? Math.abs(actualEdgeCount - prevE) / prevE : 0; - const driftThreshold = - (config as { build?: { driftThreshold?: number } }).build?.driftThreshold ?? 0.2; - if (nodeDrift > driftThreshold || edgeDrift > driftThreshold) { - warn( - `Incremental build diverged significantly from previous counts (nodes: ${prevN}\u2192${nodeCount} [${(nodeDrift * 100).toFixed(1)}%], edges: ${prevE}\u2192${actualEdgeCount} [${(edgeDrift * 100).toFixed(1)}%], threshold: ${(driftThreshold * 100).toFixed(0)}%). Consider rebuilding with --no-incremental.`, - ); - } - } - } + const prevN = Number(prevNodes); + const prevE = Number(prevEdges); + if (prevN <= 0) return; + + const nodeDrift = Math.abs(nodeCount - prevN) / prevN; + const edgeDrift = prevE > 0 ? Math.abs(actualEdgeCount - prevE) / prevE : 0; + const driftThreshold = + (config as { build?: { driftThreshold?: number } }).build?.driftThreshold ?? 0.2; + if (nodeDrift > driftThreshold || edgeDrift > driftThreshold) { + warn( + `Incremental build diverged significantly from previous counts (nodes: ${prevN}\u2192${nodeCount} [${(nodeDrift * 100).toFixed(1)}%], edges: ${prevE}\u2192${actualEdgeCount} [${(edgeDrift * 100).toFixed(1)}%], threshold: ${(driftThreshold * 100).toFixed(0)}%). Consider rebuilding with --no-incremental.`, + ); } +} - // For small incremental builds, skip persisting build metadata — the - // engine/version/schema haven't changed (would have triggered a full rebuild), - // built_at is only used by stale-embeddings check (skipped for incremental), - // and counts are only used by drift detection (skipped for ≤3 files). - // This avoids a transaction commit + WAL fsync (~15-30ms). - // Threshold aligned with drift detection gate (allSymbols.size > 3) so stored - // counts stay fresh whenever drift detection reads them. - if (isFullBuild || allSymbols.size > 3) { - try { - if (useNativeDb) { - ctx.nativeDb!.setBuildMeta( - Object.entries({ - engine: ctx.engineName, - engine_version: ctx.engineVersion || '', - codegraph_version: CODEGRAPH_VERSION, - schema_version: String(schemaVersion), - built_at: buildNow.toISOString(), - node_count: String(nodeCount), - edge_count: String(actualEdgeCount), - }).map(([key, value]) => ({ key, value: String(value) })), - ); - } else { - setBuildMeta(db, { +/** + * Persist build metadata (engine, version, counts, timestamp). + * Skipped for small incremental builds to avoid WAL fsync cost. + */ +function persistBuildMetadata( + ctx: PipelineContext, + nodeCount: number, + actualEdgeCount: number, + buildNow: Date, +): void { + const useNativeDb = ctx.engineName === 'native' && !!ctx.nativeDb; + if (!ctx.isFullBuild && ctx.allSymbols.size <= 3) return; + try { + if (useNativeDb) { + ctx.nativeDb!.setBuildMeta( + Object.entries({ engine: ctx.engineName, engine_version: ctx.engineVersion || '', codegraph_version: CODEGRAPH_VERSION, - schema_version: String(schemaVersion), + schema_version: String(ctx.schemaVersion), built_at: buildNow.toISOString(), - node_count: nodeCount, - edge_count: actualEdgeCount, - }); - } - } catch (err) { - warn(`Failed to write build metadata: ${(err as Error).message}`); + node_count: String(nodeCount), + edge_count: String(actualEdgeCount), + }).map(([key, value]) => ({ key, value: String(value) })), + ); + } else { + setBuildMeta(ctx.db, { + engine: ctx.engineName, + engine_version: ctx.engineVersion || '', + codegraph_version: CODEGRAPH_VERSION, + schema_version: String(ctx.schemaVersion), + built_at: buildNow.toISOString(), + node_count: nodeCount, + edge_count: actualEdgeCount, + }); } + } catch (err) { + warn(`Failed to write build metadata: ${(err as Error).message}`); } +} - // Skip expensive advisory queries for incremental builds — these are - // informational warnings that don't affect correctness and cost ~40-60ms. - if (!isFullBuild) { - debug( - 'Finalize: skipping advisory queries (orphaned/stale embeddings, unused exports) for incremental build', - ); - } else { - // Orphaned embeddings warning - if (hasEmbeddings) { - try { - const orphaned = ( - db - .prepare( - 'SELECT COUNT(*) as c FROM embeddings WHERE node_id NOT IN (SELECT id FROM nodes)', - ) - .get() as { c: number } - ).c; - if (orphaned > 0) { - warn( - `${orphaned} embeddings are orphaned (nodes changed). Run "codegraph embed" to refresh.`, - ); - } - } catch { - /* ignore - embeddings table may have been dropped */ +/** + * Run advisory checks on full builds: orphaned embeddings, stale embeddings, + * and unused exports. Informational only — does not affect correctness. + */ +function runAdvisoryChecks( + db: PipelineContext['db'], + hasEmbeddings: boolean, + buildNow: Date, +): void { + // Orphaned embeddings warning + if (hasEmbeddings) { + try { + const orphaned = ( + db + .prepare( + 'SELECT COUNT(*) as c FROM embeddings WHERE node_id NOT IN (SELECT id FROM nodes)', + ) + .get() as { c: number } + ).c; + if (orphaned > 0) { + warn( + `${orphaned} embeddings are orphaned (nodes changed). Run "codegraph embed" to refresh.`, + ); } + } catch { + /* ignore - embeddings table may have been dropped */ } + } - // Stale embeddings warning (built before current graph rebuild) - if (hasEmbeddings) { - try { - const embedBuiltAt = ( - db.prepare("SELECT value FROM embedding_meta WHERE key = 'built_at'").get() as - | { value: string } - | undefined - )?.value; - if (embedBuiltAt) { - const embedTime = new Date(embedBuiltAt).getTime(); - if (!Number.isNaN(embedTime) && embedTime < buildNow.getTime()) { - warn( - 'Embeddings were built before the last graph rebuild. Run "codegraph embed" to update.', - ); - } + // Stale embeddings warning (built before current graph rebuild) + if (hasEmbeddings) { + try { + const embedBuiltAt = ( + db.prepare("SELECT value FROM embedding_meta WHERE key = 'built_at'").get() as + | { value: string } + | undefined + )?.value; + if (embedBuiltAt) { + const embedTime = new Date(embedBuiltAt).getTime(); + if (!Number.isNaN(embedTime) && embedTime < buildNow.getTime()) { + warn( + 'Embeddings were built before the last graph rebuild. Run "codegraph embed" to update.', + ); } - } catch { - /* ignore - embedding_meta table may not exist */ } + } catch { + /* ignore - embedding_meta table may not exist */ } + } - // Unused exports warning - try { - const unusedCount = ( - db - .prepare( - `SELECT COUNT(*) as c FROM nodes + // Unused exports warning + try { + const unusedCount = ( + db + .prepare( + `SELECT COUNT(*) as c FROM nodes WHERE exported = 1 AND kind != 'file' AND id NOT IN ( SELECT DISTINCT e.target_id FROM edges e @@ -170,17 +173,46 @@ export async function finalize(ctx: PipelineContext): Promise { JOIN nodes target ON e.target_id = target.id WHERE e.kind = 'calls' AND caller.file != target.file )`, - ) - .get() as { c: number } - ).c; - if (unusedCount > 0) { - warn( - `${unusedCount} exported symbol${unusedCount > 1 ? 's have' : ' has'} zero cross-file consumers. Run "codegraph exports --unused" to inspect.`, - ); - } - } catch { - /* exported column may not exist on older DBs */ + ) + .get() as { c: number } + ).c; + if (unusedCount > 0) { + warn( + `${unusedCount} exported symbol${unusedCount > 1 ? 's have' : ' has'} zero cross-file consumers. Run "codegraph exports --unused" to inspect.`, + ); } + } catch { + /* exported column may not exist on older DBs */ + } +} + +export async function finalize(ctx: PipelineContext): Promise { + const { db, allSymbols, rootDir, isFullBuild, hasEmbeddings, opts } = ctx; + + const t0 = performance.now(); + + releaseWasmTrees(allSymbols); + + // Capture a single wall-clock timestamp for the current build — used for + // both the stale-embeddings comparison and the persisted built_at metadata. + const buildNow = new Date(); + + const nodeCount = (db.prepare('SELECT COUNT(*) as c FROM nodes').get() as { c: number }).c; + const actualEdgeCount = (db.prepare('SELECT COUNT(*) as c FROM edges').get() as { c: number }).c; + info(`Graph built: ${nodeCount} nodes, ${actualEdgeCount} edges`); + info(`Stored in ${ctx.dbPath}`); + + detectIncrementalDrift(ctx, nodeCount, actualEdgeCount); + persistBuildMetadata(ctx, nodeCount, actualEdgeCount, buildNow); + + // Skip expensive advisory queries for incremental builds — these are + // informational warnings that don't affect correctness and cost ~40-60ms. + if (!isFullBuild) { + debug( + 'Finalize: skipping advisory queries (orphaned/stale embeddings, unused exports) for incremental build', + ); + } else { + runAdvisoryChecks(db, hasEmbeddings, buildNow); } // Intentionally measured before closeDb / writeJournalHeader / auto-registration: diff --git a/src/domain/graph/builder/stages/resolve-imports.ts b/src/domain/graph/builder/stages/resolve-imports.ts index 54f8f26f..9bfd0848 100644 --- a/src/domain/graph/builder/stages/resolve-imports.ts +++ b/src/domain/graph/builder/stages/resolve-imports.ts @@ -1,5 +1,6 @@ import path from 'node:path'; import { performance } from 'node:perf_hooks'; +import { debug } from '../../../../infrastructure/logger.js'; import type { Import } from '../../../../types.js'; import { parseFilesAuto } from '../../../parser.js'; import { resolveImportPath, resolveImportsBatch } from '../../resolve.js'; @@ -132,8 +133,8 @@ export async function resolveImports(ctx: PipelineContext): Promise { ); } } - } catch { - /* skip if unreadable */ + } catch (e: unknown) { + debug(`Barrel re-parse failed (non-fatal): ${(e as Error).message}`); } } } diff --git a/src/domain/graph/resolve.ts b/src/domain/graph/resolve.ts index e88f70cc..1647f6cd 100644 --- a/src/domain/graph/resolve.ts +++ b/src/domain/graph/resolve.ts @@ -117,6 +117,35 @@ function matchSubpathPattern(pattern: string, subpath: string): string | null { * Resolve a bare specifier through the package.json exports field. * Returns an absolute path or null. */ +/** Try to resolve a condition target to a file path in packageDir. */ +function tryResolveTarget(target: string | null, packageDir: string): string | null { + if (!target) return null; + const resolved = path.resolve(packageDir, target); + return fs.existsSync(resolved) ? resolved : null; +} + +/** Resolve subpath against a subpath map (object with "." keys). */ +function resolveSubpathMap( + exports: Record, + subpath: string, + packageDir: string, +): string | null { + // Exact match first + if (subpath in exports) { + return tryResolveTarget(resolveCondition(exports[subpath]), packageDir); + } + // Pattern matching (keys with *) + for (const [pattern, value] of Object.entries(exports)) { + if (!pattern.includes('*')) continue; + const matched = matchSubpathPattern(pattern, subpath); + if (matched == null) continue; + const rawTarget = resolveCondition(value); + if (!rawTarget) continue; + return tryResolveTarget(rawTarget.replace(/\*/g, matched), packageDir); + } + return null; +} + export function resolveViaExports(specifier: string, rootDir: string): string | null { const parsed = parseBareSpecifier(specifier); if (!parsed) return null; @@ -131,66 +160,25 @@ export function resolveViaExports(specifier: string, rootDir: string): string | // Simple string exports: "exports": "./index.js" if (typeof exports === 'string') { - if (subpath === '.') { - const resolved = path.resolve(packageDir, exports); - return fs.existsSync(resolved) ? resolved : null; - } - return null; + return subpath === '.' ? tryResolveTarget(exports, packageDir) : null; } // Array form at top level if (Array.isArray(exports)) { - if (subpath === '.') { - const target = resolveCondition(exports); - if (target) { - const resolved = path.resolve(packageDir, target); - return fs.existsSync(resolved) ? resolved : null; - } - } - return null; + return subpath === '.' ? tryResolveTarget(resolveCondition(exports), packageDir) : null; } if (typeof exports !== 'object') return null; - // Determine if exports is a conditions object (no keys start with ".") - // or a subpath map (keys start with ".") + // Determine if exports is a conditions object or a subpath map const keys = Object.keys(exports); const isSubpathMap = keys.length > 0 && keys.some((k) => k.startsWith('.')); if (!isSubpathMap) { - // Conditions object at top level → applies to "." subpath only - if (subpath === '.') { - const target = resolveCondition(exports); - if (target) { - const resolved = path.resolve(packageDir, target); - return fs.existsSync(resolved) ? resolved : null; - } - } - return null; + return subpath === '.' ? tryResolveTarget(resolveCondition(exports), packageDir) : null; } - // Subpath map: try exact match first, then pattern match - if (subpath in exports) { - const target = resolveCondition(exports[subpath]); - if (target) { - const resolved = path.resolve(packageDir, target); - return fs.existsSync(resolved) ? resolved : null; - } - } - - // Pattern matching (keys with *) - for (const [pattern, value] of Object.entries(exports)) { - if (!pattern.includes('*')) continue; - const matched = matchSubpathPattern(pattern, subpath); - if (matched == null) continue; - const rawTarget = resolveCondition(value); - if (!rawTarget) continue; - const target = rawTarget.replace(/\*/g, matched); - const resolved = path.resolve(packageDir, target); - if (fs.existsSync(resolved)) return resolved; - } - - return null; + return resolveSubpathMap(exports as Record, subpath, packageDir); } /** Clear the exports cache (for testing). */ diff --git a/src/domain/graph/watcher.ts b/src/domain/graph/watcher.ts index 7cdfbb0e..4a690bde 100644 --- a/src/domain/graph/watcher.ts +++ b/src/domain/graph/watcher.ts @@ -1,7 +1,7 @@ import fs from 'node:fs'; import path from 'node:path'; import { closeDb, getNodeId as getNodeIdQuery, initSchema, openDb } from '../../db/index.js'; -import { info } from '../../infrastructure/logger.js'; +import { debug, info } from '../../infrastructure/logger.js'; import { EXTENSIONS, IGNORE_DIRS, normalizePath } from '../../shared/constants.js'; import { DbError } from '../../shared/errors.js'; import { createParseTreeCache, getActiveEngine } from '../parser.js'; @@ -32,12 +32,10 @@ export async function watchProject(rootDir: string, opts: { engine?: string } = ast: false, }; const { name: engineName, version: engineVersion } = getActiveEngine(engineOpts); - console.log( - `Watch mode using ${engineName} engine${engineVersion ? ` (v${engineVersion})` : ''}`, - ); + info(`Watch mode using ${engineName} engine${engineVersion ? ` (v${engineVersion})` : ''}`); const cache = createParseTreeCache(); - console.log( + info( cache ? 'Incremental parsing enabled (native tree cache)' : 'Incremental parsing unavailable (full re-parse)', @@ -124,8 +122,8 @@ export async function watchProject(rootDir: string, opts: { engine?: string } = })); try { appendJournalEntries(rootDir, entries); - } catch { - /* journal write failure is non-fatal */ + } catch (e: unknown) { + debug(`Journal write failed (non-fatal): ${(e as Error).message}`); } const changeEvents = updates.map((r) => @@ -137,8 +135,8 @@ export async function watchProject(rootDir: string, opts: { engine?: string } = ); try { appendChangeEvents(rootDir, changeEvents); - } catch { - /* change event write failure is non-fatal */ + } catch (e: unknown) { + debug(`Change event write failed (non-fatal): ${(e as Error).message}`); } } @@ -153,8 +151,8 @@ export async function watchProject(rootDir: string, opts: { engine?: string } = } } - console.log(`Watching ${rootDir} for changes...`); - console.log('Press Ctrl+C to stop.\n'); + info(`Watching ${rootDir} for changes...`); + info('Press Ctrl+C to stop.'); const watcher = fs.watch(rootDir, { recursive: true }, (_eventType, filename) => { if (!filename) return; @@ -169,7 +167,7 @@ export async function watchProject(rootDir: string, opts: { engine?: string } = }); process.on('SIGINT', () => { - console.log('\nStopping watcher...'); + info('Stopping watcher...'); watcher.close(); // Flush any pending file paths to journal before exit if (pending.size > 0) { @@ -178,8 +176,8 @@ export async function watchProject(rootDir: string, opts: { engine?: string } = })); try { appendJournalEntries(rootDir, entries); - } catch { - /* best-effort */ + } catch (e: unknown) { + debug(`Journal flush on exit failed (non-fatal): ${(e as Error).message}`); } } if (cache) cache.clear(); diff --git a/src/domain/parser.ts b/src/domain/parser.ts index 70b7bde6..bc38e312 100644 --- a/src/domain/parser.ts +++ b/src/domain/parser.ts @@ -273,37 +273,33 @@ function resolveEngine(opts: ParseEngineOpts = {}): ResolvedEngine { * - Backward compat for older native binaries missing js_name annotations * - dataflow argFlows/mutations bindingType -> binding wrapper */ -function patchNativeResult(r: any): ExtractorOutput { - // lineCount: napi(js_name) emits "lineCount"; older binaries may emit "line_count" - r.lineCount = r.lineCount ?? r.line_count ?? null; - r._lineCount = r.lineCount; - - // Backward compat for older binaries missing js_name annotations - if (r.definitions) { - for (const d of r.definitions) { - if (d.endLine === undefined && d.end_line !== undefined) { - d.endLine = d.end_line; - } +/** Patch definition fields for backward compat with older native binaries. */ +function patchDefinitions(definitions: any[]): void { + for (const d of definitions) { + if (d.endLine === undefined && d.end_line !== undefined) { + d.endLine = d.end_line; } } - if (r.imports) { - for (const i of r.imports) { - if (i.typeOnly === undefined) i.typeOnly = i.type_only; - if (i.wildcardReexport === undefined) i.wildcardReexport = i.wildcard_reexport; - if (i.pythonImport === undefined) i.pythonImport = i.python_import; - if (i.goImport === undefined) i.goImport = i.go_import; - if (i.rustUse === undefined) i.rustUse = i.rust_use; - if (i.javaImport === undefined) i.javaImport = i.java_import; - if (i.csharpUsing === undefined) i.csharpUsing = i.csharp_using; - if (i.rubyRequire === undefined) i.rubyRequire = i.ruby_require; - if (i.phpUse === undefined) i.phpUse = i.php_use; - if (i.dynamicImport === undefined) i.dynamicImport = i.dynamic_import; - } +} + +/** Patch import fields for backward compat with older native binaries. */ +function patchImports(imports: any[]): void { + for (const i of imports) { + if (i.typeOnly === undefined) i.typeOnly = i.type_only; + if (i.wildcardReexport === undefined) i.wildcardReexport = i.wildcard_reexport; + if (i.pythonImport === undefined) i.pythonImport = i.python_import; + if (i.goImport === undefined) i.goImport = i.go_import; + if (i.rustUse === undefined) i.rustUse = i.rust_use; + if (i.javaImport === undefined) i.javaImport = i.java_import; + if (i.csharpUsing === undefined) i.csharpUsing = i.csharp_using; + if (i.rubyRequire === undefined) i.rubyRequire = i.ruby_require; + if (i.phpUse === undefined) i.phpUse = i.php_use; + if (i.dynamicImport === undefined) i.dynamicImport = i.dynamic_import; } +} - // typeMap: native returns an array of {name, typeName}; normalize to Map. - // Non-TS languages may omit typeMap entirely — default to empty Map so - // callers can safely access .entries()/.size without null checks. +/** Normalize native typeMap array to a Map instance. */ +function patchTypeMap(r: any): void { if (!r.typeMap) { r.typeMap = new Map(); } else if (!(r.typeMap instanceof Map)) { @@ -314,20 +310,31 @@ function patchNativeResult(r: any): ExtractorOutput { ]), ); } +} - // dataflow: wrap bindingType into binding object for argFlows and mutations - if (r.dataflow) { - if (r.dataflow.argFlows) { - for (const f of r.dataflow.argFlows) { - f.binding = f.bindingType ? { type: f.bindingType } : null; - } +/** Wrap bindingType into binding object for dataflow argFlows and mutations. */ +function patchDataflow(dataflow: any): void { + if (dataflow.argFlows) { + for (const f of dataflow.argFlows) { + f.binding = f.bindingType ? { type: f.bindingType } : null; } - if (r.dataflow.mutations) { - for (const m of r.dataflow.mutations) { - m.binding = m.bindingType ? { type: m.bindingType } : null; - } + } + if (dataflow.mutations) { + for (const m of dataflow.mutations) { + m.binding = m.bindingType ? { type: m.bindingType } : null; } } +} + +function patchNativeResult(r: any): ExtractorOutput { + // lineCount: napi(js_name) emits "lineCount"; older binaries may emit "line_count" + r.lineCount = r.lineCount ?? r.line_count ?? null; + r._lineCount = r.lineCount; + + if (r.definitions) patchDefinitions(r.definitions); + if (r.imports) patchImports(r.imports); + patchTypeMap(r); + if (r.dataflow) patchDataflow(r.dataflow); return r; } @@ -522,73 +529,48 @@ export async function parseFileAuto( return extracted ? extracted.symbols : null; } -/** - * Parse multiple files in bulk and return a Map. - */ -export async function parseFilesAuto( - filePaths: string[], - rootDir: string, - opts: ParseEngineOpts = {}, -): Promise> { - const { native } = resolveEngine(opts); - const result = new Map(); +/** Backfill typeMap via WASM for files missing type-map data from native engine. */ +async function backfillTypeMapBatch( + needsTypeMap: { filePath: string; relPath: string }[], + result: Map, +): Promise { + const tsFiles = needsTypeMap.filter(({ filePath }) => + TS_BACKFILL_EXTS.has(path.extname(filePath)), + ); + if (tsFiles.length === 0) return; - if (native) { - const nativeResults = native.parseFiles( - filePaths, - rootDir, - !!opts.dataflow, - opts.ast !== false, - ); - const needsTypeMap: { filePath: string; relPath: string }[] = []; - for (const r of nativeResults) { - if (!r) continue; - const patched = patchNativeResult(r); - const relPath = path.relative(rootDir, r.file).split(path.sep).join('/'); - result.set(relPath, patched); - if (patched.typeMap.size === 0) { - needsTypeMap.push({ filePath: r.file, relPath }); + const parsers = await createParsers(); + for (const { filePath, relPath } of tsFiles) { + let extracted: WasmExtractResult | null | undefined; + try { + const code = fs.readFileSync(filePath, 'utf-8'); + extracted = wasmExtractSymbols(parsers, filePath, code); + if (extracted?.symbols && extracted.symbols.typeMap.size > 0) { + const symbols = result.get(relPath); + if (!symbols) continue; + symbols.typeMap = extracted.symbols.typeMap; + symbols._typeMapBackfilled = true; } - } - // Backfill typeMap via WASM for native binaries that predate the type-map feature - if (needsTypeMap.length > 0) { - // Only backfill for languages where WASM extraction can produce typeMap - // (TS/TSX have type annotations; JS only has `new Expr()` which native already handles) - const tsFiles = needsTypeMap.filter(({ filePath }) => - TS_BACKFILL_EXTS.has(path.extname(filePath)), - ); - if (tsFiles.length > 0) { - const parsers = await createParsers(); - for (const { filePath, relPath } of tsFiles) { - let extracted: WasmExtractResult | null | undefined; - try { - const code = fs.readFileSync(filePath, 'utf-8'); - extracted = wasmExtractSymbols(parsers, filePath, code); - if (extracted?.symbols && extracted.symbols.typeMap.size > 0) { - const symbols = result.get(relPath); - if (!symbols) continue; - symbols.typeMap = extracted.symbols.typeMap; - symbols._typeMapBackfilled = true; - } - } catch (e) { - debug(`batchExtract: typeMap backfill failed: ${toErrorMessage(e)}`); - } finally { - // Free the WASM tree to prevent memory accumulation across repeated builds - if (extracted?.tree && typeof extracted.tree.delete === 'function') { - try { - extracted.tree.delete(); - } catch (e) { - debug(`batchExtract: WASM tree cleanup failed: ${toErrorMessage(e)}`); - } - } - } + } catch (e) { + debug(`batchExtract: typeMap backfill failed: ${toErrorMessage(e)}`); + } finally { + if (extracted?.tree && typeof extracted.tree.delete === 'function') { + try { + extracted.tree.delete(); + } catch (e) { + debug(`batchExtract: WASM tree cleanup failed: ${toErrorMessage(e)}`); } } } - return result; } +} - // WASM path +/** Parse files via WASM engine, returning a Map. */ +async function parseFilesWasm( + filePaths: string[], + rootDir: string, +): Promise> { + const result = new Map(); const parsers = await createParsers(); for (const filePath of filePaths) { let code: string; @@ -610,6 +592,36 @@ export async function parseFilesAuto( return result; } +/** + * Parse multiple files in bulk and return a Map. + */ +export async function parseFilesAuto( + filePaths: string[], + rootDir: string, + opts: ParseEngineOpts = {}, +): Promise> { + const { native } = resolveEngine(opts); + + if (!native) return parseFilesWasm(filePaths, rootDir); + + const result = new Map(); + const nativeResults = native.parseFiles(filePaths, rootDir, !!opts.dataflow, opts.ast !== false); + const needsTypeMap: { filePath: string; relPath: string }[] = []; + for (const r of nativeResults) { + if (!r) continue; + const patched = patchNativeResult(r); + const relPath = path.relative(rootDir, r.file).split(path.sep).join('/'); + result.set(relPath, patched); + if (patched.typeMap.size === 0) { + needsTypeMap.push({ filePath: r.file, relPath }); + } + } + if (needsTypeMap.length > 0) { + await backfillTypeMapBatch(needsTypeMap, result); + } + return result; +} + /** * Report which engine is active. */ diff --git a/src/domain/search/search/cli-formatter.ts b/src/domain/search/search/cli-formatter.ts index 70a5afa8..44eeb4fb 100644 --- a/src/domain/search/search/cli-formatter.ts +++ b/src/domain/search/search/cli-formatter.ts @@ -11,113 +11,98 @@ interface SearchOpts extends SemanticSearchOpts { offset?: number; } -export async function search( - query: string, - customDbPath: string | undefined, - opts: SearchOpts = {}, -): Promise { - const mode = opts.mode || 'hybrid'; +const kindIcon = (kind: string): string => + kind === 'function' ? 'f' : kind === 'class' ? '*' : 'o'; - const queries = query - .split(';') - .map((q) => q.trim()) - .filter((q) => q.length > 0); - - const kindIcon = (kind: string): string => - kind === 'function' ? 'f' : kind === 'class' ? '*' : 'o'; - - // Keyword-only mode - if (mode === 'keyword') { - const singleQuery = queries.length === 1 ? queries[0]! : query; - const data = ftsSearchData(singleQuery, customDbPath, opts); - if (!data) { - console.log('No FTS5 index found. Run `codegraph embed` to build the keyword index.'); - return; - } - if (opts.json) { - console.log(JSON.stringify(data, null, 2)); - return; - } - console.log(`\nKeyword search: "${singleQuery}" (BM25)\n`); - if (data.results.length === 0) { - console.log(' No results found.'); - } else { - for (const r of data.results) { - console.log( - ` BM25 ${r.bm25Score.toFixed(2)} ${kindIcon(r.kind)} ${r.name} -- ${r.file}:${r.line}`, - ); - } - } - console.log(`\n ${data.results.length} results shown\n`); +function formatKeywordResults( + singleQuery: string, + customDbPath: string | undefined, + opts: SearchOpts, +): void { + const data = ftsSearchData(singleQuery, customDbPath, opts); + if (!data) { + console.log('No FTS5 index found. Run `codegraph embed` to build the keyword index.'); return; } - - // Semantic-only mode - if (mode === 'semantic') { - if (queries.length <= 1) { - const singleQuery = queries[0] || query; - const data = await searchData(singleQuery, customDbPath, opts); - if (!data) return; - if (opts.json) { - console.log(JSON.stringify(data, null, 2)); - return; - } - console.log(`\nSemantic search: "${singleQuery}"\n`); - if (data.results.length === 0) { - console.log(' No results above threshold.'); - } else { - for (const r of data.results) { - const bar = '#'.repeat(Math.round(r.similarity * 20)); - console.log(` ${(r.similarity * 100).toFixed(1)}% ${bar}`); - console.log(` ${kindIcon(r.kind)} ${r.name} -- ${r.file}:${r.line}`); - } - } - console.log(`\n ${data.results.length} results shown\n`); - } else { - const data = await multiSearchData(queries, customDbPath, opts); - if (!data) return; - if (opts.json) { - console.log(JSON.stringify(data, null, 2)); - return; - } - console.log(`\nMulti-query semantic search (RRF, k=${opts.rrfK || 60}):`); - for (let i = 0; i < queries.length; i++) console.log(` [${i + 1}] "${queries[i]}"`); - console.log(); - if (data.results.length === 0) { - console.log(' No results above threshold.'); - } else { - for (const r of data.results) { - console.log( - ` RRF ${r.rrf.toFixed(4)} ${kindIcon(r.kind)} ${r.name} -- ${r.file}:${r.line}`, - ); - for (const qs of r.queryScores) { - const bar = '#'.repeat(Math.round(qs.similarity * 20)); - console.log( - ` [${queries.indexOf(qs.query) + 1}] ${(qs.similarity * 100).toFixed(1)}% ${bar} (rank ${qs.rank})`, - ); - } - } - } - console.log(`\n ${data.results.length} results shown\n`); - } + if (opts.json) { + console.log(JSON.stringify(data, null, 2)); return; } + console.log(`\nKeyword search: "${singleQuery}" (BM25)\n`); + if (data.results.length === 0) { + console.log(' No results found.'); + } else { + for (const r of data.results) { + console.log( + ` BM25 ${r.bm25Score.toFixed(2)} ${kindIcon(r.kind)} ${r.name} -- ${r.file}:${r.line}`, + ); + } + } + console.log(`\n ${data.results.length} results shown\n`); +} - // Hybrid mode (default) - const data = await hybridSearchData(query, customDbPath, opts); - - if (!data) { - warn( - 'FTS5 index not found — using semantic search only. Re-run `codegraph embed` to enable hybrid mode.', - ); - return search(query, customDbPath, { ...opts, mode: 'semantic' }); +async function formatSemanticSingle( + singleQuery: string, + customDbPath: string | undefined, + opts: SearchOpts, +): Promise { + const data = await searchData(singleQuery, customDbPath, opts); + if (!data) return; + if (opts.json) { + console.log(JSON.stringify(data, null, 2)); + return; + } + console.log(`\nSemantic search: "${singleQuery}"\n`); + if (data.results.length === 0) { + console.log(' No results above threshold.'); + } else { + for (const r of data.results) { + const bar = '#'.repeat(Math.round(r.similarity * 20)); + console.log(` ${(r.similarity * 100).toFixed(1)}% ${bar}`); + console.log(` ${kindIcon(r.kind)} ${r.name} -- ${r.file}:${r.line}`); + } } + console.log(`\n ${data.results.length} results shown\n`); +} +async function formatSemanticMulti( + queries: string[], + customDbPath: string | undefined, + opts: SearchOpts, +): Promise { + const data = await multiSearchData(queries, customDbPath, opts); + if (!data) return; if (opts.json) { console.log(JSON.stringify(data, null, 2)); return; } + console.log(`\nMulti-query semantic search (RRF, k=${opts.rrfK || 60}):`); + for (let i = 0; i < queries.length; i++) console.log(` [${i + 1}] "${queries[i]}"`); + console.log(); + if (data.results.length === 0) { + console.log(' No results above threshold.'); + } else { + for (const r of data.results) { + console.log( + ` RRF ${r.rrf.toFixed(4)} ${kindIcon(r.kind)} ${r.name} -- ${r.file}:${r.line}`, + ); + for (const qs of r.queryScores) { + const bar = '#'.repeat(Math.round(qs.similarity * 20)); + console.log( + ` [${queries.indexOf(qs.query) + 1}] ${(qs.similarity * 100).toFixed(1)}% ${bar} (rank ${qs.rank})`, + ); + } + } + } + console.log(`\n ${data.results.length} results shown\n`); +} +function formatHybridResults( + queries: string[], + query: string, + data: { results: any[] }, + opts: SearchOpts, +): void { const rrfK = opts.rrfK || 60; if (queries.length <= 1) { const singleQuery = queries[0] || query; @@ -150,3 +135,45 @@ export async function search( console.log(`\n ${data.results.length} results shown\n`); } + +export async function search( + query: string, + customDbPath: string | undefined, + opts: SearchOpts = {}, +): Promise { + const mode = opts.mode || 'hybrid'; + + const queries = query + .split(';') + .map((q) => q.trim()) + .filter((q) => q.length > 0); + + if (mode === 'keyword') { + const singleQuery = queries.length === 1 ? queries[0]! : query; + return formatKeywordResults(singleQuery, customDbPath, opts); + } + + if (mode === 'semantic') { + if (queries.length <= 1) { + return formatSemanticSingle(queries[0] || query, customDbPath, opts); + } + return formatSemanticMulti(queries, customDbPath, opts); + } + + // Hybrid mode (default) + const data = await hybridSearchData(query, customDbPath, opts); + + if (!data) { + warn( + 'FTS5 index not found — using semantic search only. Re-run `codegraph embed` to enable hybrid mode.', + ); + return search(query, customDbPath, { ...opts, mode: 'semantic' }); + } + + if (opts.json) { + console.log(JSON.stringify(data, null, 2)); + return; + } + + formatHybridResults(queries, query, data, opts); +} diff --git a/src/extractors/csharp.ts b/src/extractors/csharp.ts index 16ed0b90..8ac9d542 100644 --- a/src/extractors/csharp.ts +++ b/src/extractors/csharp.ts @@ -14,6 +14,7 @@ import { lastPathSegment, MAX_WALK_DEPTH, nodeEndLine, + setTypeMapEntry, } from './helpers.js'; /** @@ -322,6 +323,31 @@ function extractCSharpTypeMap(node: TreeSitterNode, ctx: ExtractorOutput): void extractCSharpTypeMapDepth(node, ctx, 0); } +/** Extract type info from a variable_declaration node (local vars with explicit types). */ +function handleCSharpVarDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { + const typeNode = node.childForFieldName('type') || node.child(0); + if (!typeNode || typeNode.type === 'var_keyword') return; + const typeName = extractCSharpTypeName(typeNode); + if (!typeName) return; + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child || child.type !== 'variable_declarator') continue; + const nameNode = child.childForFieldName('name') || child.child(0); + if (nameNode && nameNode.type === 'identifier' && ctx.typeMap) { + setTypeMapEntry(ctx.typeMap, nameNode.text, typeName, 0.9); + } + } +} + +/** Extract type info from a parameter node. */ +function handleCSharpParam(node: TreeSitterNode, ctx: ExtractorOutput): void { + const typeNode = node.childForFieldName('type'); + const nameNode = node.childForFieldName('name'); + if (!typeNode || !nameNode) return; + const typeName = extractCSharpTypeName(typeNode); + if (typeName && ctx.typeMap) setTypeMapEntry(ctx.typeMap, nameNode.text, typeName, 0.9); +} + function extractCSharpTypeMapDepth( node: TreeSitterNode, ctx: ExtractorOutput, @@ -329,33 +355,10 @@ function extractCSharpTypeMapDepth( ): void { if (depth >= MAX_WALK_DEPTH) return; - // local_declaration_statement → variable_declaration → type + variable_declarator(s) if (node.type === 'variable_declaration') { - const typeNode = node.childForFieldName('type') || node.child(0); - if (typeNode && typeNode.type !== 'var_keyword') { - const typeName = extractCSharpTypeName(typeNode); - if (typeName) { - for (let i = 0; i < node.childCount; i++) { - const child = node.child(i); - if (child && child.type === 'variable_declarator') { - const nameNode = child.childForFieldName('name') || child.child(0); - if (nameNode && nameNode.type === 'identifier') { - ctx.typeMap?.set(nameNode.text, { type: typeName, confidence: 0.9 }); - } - } - } - } - } - } - - // Method/constructor parameter: parameter node has type + name fields - if (node.type === 'parameter') { - const typeNode = node.childForFieldName('type'); - const nameNode = node.childForFieldName('name'); - if (typeNode && nameNode) { - const typeName = extractCSharpTypeName(typeNode); - if (typeName) ctx.typeMap?.set(nameNode.text, { type: typeName, confidence: 0.9 }); - } + handleCSharpVarDecl(node, ctx); + } else if (node.type === 'parameter') { + handleCSharpParam(node, ctx); } for (let i = 0; i < node.childCount; i++) { diff --git a/src/extractors/go.ts b/src/extractors/go.ts index 3e857b28..7c7b7e8d 100644 --- a/src/extractors/go.ts +++ b/src/extractors/go.ts @@ -4,7 +4,6 @@ import type { SubDeclaration, TreeSitterNode, TreeSitterTree, - TypeMapEntry, } from '../types.js'; import { findChild, @@ -12,6 +11,7 @@ import { lastPathSegment, MAX_WALK_DEPTH, nodeEndLine, + setTypeMapEntry, stripQuotes, } from './helpers.js'; @@ -113,43 +113,63 @@ function handleGoTypeDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { if (!spec || spec.type !== 'type_spec') continue; const nameNode = spec.childForFieldName('name'); const typeNode = spec.childForFieldName('type'); - if (nameNode && typeNode) { - if (typeNode.type === 'struct_type') { - const fields = extractStructFields(typeNode); - ctx.definitions.push({ - name: nameNode.text, - kind: 'struct', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: fields.length > 0 ? fields : undefined, - }); - } else if (typeNode.type === 'interface_type') { - ctx.definitions.push({ - name: nameNode.text, - kind: 'interface', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - for (let j = 0; j < typeNode.childCount; j++) { - const member = typeNode.child(j); - if (member && member.type === 'method_elem') { - const methName = member.childForFieldName('name'); - if (methName) { - ctx.definitions.push({ - name: `${nameNode.text}.${methName.text}`, - kind: 'method', - line: member.startPosition.row + 1, - endLine: member.endPosition.row + 1, - }); - } - } - } - } else { + if (!nameNode || !typeNode) continue; + + if (typeNode.type === 'struct_type') { + handleGoStructType(node, nameNode, typeNode, ctx); + } else if (typeNode.type === 'interface_type') { + handleGoInterfaceType(node, nameNode, typeNode, ctx); + } else { + ctx.definitions.push({ + name: nameNode.text, + kind: 'type', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } + } +} + +/** Handle a struct type_spec: emit struct definition with field children. */ +function handleGoStructType( + declNode: TreeSitterNode, + nameNode: TreeSitterNode, + typeNode: TreeSitterNode, + ctx: ExtractorOutput, +): void { + const fields = extractStructFields(typeNode); + ctx.definitions.push({ + name: nameNode.text, + kind: 'struct', + line: declNode.startPosition.row + 1, + endLine: nodeEndLine(declNode), + children: fields.length > 0 ? fields : undefined, + }); +} + +/** Handle an interface type_spec: emit interface definition + method definitions. */ +function handleGoInterfaceType( + declNode: TreeSitterNode, + nameNode: TreeSitterNode, + typeNode: TreeSitterNode, + ctx: ExtractorOutput, +): void { + ctx.definitions.push({ + name: nameNode.text, + kind: 'interface', + line: declNode.startPosition.row + 1, + endLine: nodeEndLine(declNode), + }); + for (let j = 0; j < typeNode.childCount; j++) { + const member = typeNode.child(j); + if (member && member.type === 'method_elem') { + const methName = member.childForFieldName('name'); + if (methName) { ctx.definitions.push({ - name: nameNode.text, - kind: 'type', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), + name: `${nameNode.text}.${methName.text}`, + kind: 'method', + line: member.startPosition.row + 1, + endLine: member.endPosition.row + 1, }); } } @@ -227,113 +247,104 @@ function extractGoTypeMap(node: TreeSitterNode, ctx: ExtractorOutput): void { extractGoTypeMapDepth(node, ctx, 0); } -function setIfHigher( - typeMap: Map, - name: string, - type: string, - confidence: number, +/** Map identifiers in a typed declaration node to their type (confidence 0.9). */ +function handleTypedIdentifiers( + node: TreeSitterNode, + typeMap: Map, ): void { - const existing = typeMap.get(name); - if (!existing || confidence > existing.confidence) { - typeMap.set(name, { type, confidence }); + const typeNode = node.childForFieldName('type'); + if (!typeNode) return; + const typeName = extractGoTypeName(typeNode); + if (!typeName) return; + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child && child.type === 'identifier') { + setTypeMapEntry(typeMap, child.text, typeName, 0.9); + } } } -function extractGoTypeMapDepth(node: TreeSitterNode, ctx: ExtractorOutput, depth: number): void { - if (depth >= MAX_WALK_DEPTH) return; - - // var x MyType = ... or var x, y MyType → var_declaration > var_spec (confidence 0.9) - if (node.type === 'var_spec') { - const typeNode = node.childForFieldName('type'); +/** Infer type from a single RHS expression in a short var declaration. */ +function inferShortVarType( + varNode: TreeSitterNode, + rhs: TreeSitterNode, + typeMap: Map, +): void { + // x := Struct{...} — composite literal (confidence 1.0) + if (rhs.type === 'composite_literal') { + const typeNode = rhs.childForFieldName('type'); if (typeNode) { const typeName = extractGoTypeName(typeNode); - if (typeName) { - for (let i = 0; i < node.childCount; i++) { - const child = node.child(i); - if (child && child.type === 'identifier') { - if (ctx.typeMap) setIfHigher(ctx.typeMap, child.text, typeName, 0.9); - } - } + if (typeName) setTypeMapEntry(typeMap, varNode.text, typeName, 1.0); + } + } + // x := &Struct{...} — address-of composite literal (confidence 1.0) + if (rhs.type === 'unary_expression') { + const operand = rhs.childForFieldName('operand'); + if (operand && operand.type === 'composite_literal') { + const typeNode = operand.childForFieldName('type'); + if (typeNode) { + const typeName = extractGoTypeName(typeNode); + if (typeName) setTypeMapEntry(typeMap, varNode.text, typeName, 1.0); } } } - - // Function/method parameter types: parameter_declaration (confidence 0.9) - if (node.type === 'parameter_declaration') { - const typeNode = node.childForFieldName('type'); - if (typeNode) { - const typeName = extractGoTypeName(typeNode); - if (typeName) { - for (let i = 0; i < node.childCount; i++) { - const child = node.child(i); - if (child && child.type === 'identifier') { - if (ctx.typeMap) setIfHigher(ctx.typeMap, child.text, typeName, 0.9); - } - } + // x := NewFoo() or x := pkg.NewFoo() — factory function (confidence 0.7) + if (rhs.type === 'call_expression') { + const fn = rhs.childForFieldName('function'); + if (fn && fn.type === 'selector_expression') { + const field = fn.childForFieldName('field'); + if (field?.text.startsWith('New')) { + const typeName = field.text.slice(3); + if (typeName) setTypeMapEntry(typeMap, varNode.text, typeName, 0.7); } + } else if (fn && fn.type === 'identifier' && fn.text.startsWith('New')) { + const typeName = fn.text.slice(3); + if (typeName) setTypeMapEntry(typeMap, varNode.text, typeName, 0.7); } } +} - // short_var_declaration: x := Struct{}, x := &Struct{}, x := NewFoo() - // Handles multi-variable forms: x, y := A{}, B{} - if (node.type === 'short_var_declaration') { - const left = node.childForFieldName('left'); - const right = node.childForFieldName('right'); - if (left && right) { - const lefts = - left.type === 'expression_list' - ? Array.from({ length: left.childCount }, (_, i) => left.child(i)).filter( - (c): c is TreeSitterNode => c?.type === 'identifier', - ) - : left.type === 'identifier' - ? [left] - : []; - const rights = - right.type === 'expression_list' - ? Array.from({ length: right.childCount }, (_, i) => right.child(i)).filter( - (c): c is TreeSitterNode => !!c?.type, - ) - : [right]; - - for (let idx = 0; idx < lefts.length; idx++) { - const varNode = lefts[idx]; - const rhs = rights[idx]; - if (!varNode || !rhs) continue; - - // x := Struct{...} — composite literal (confidence 1.0) - if (rhs.type === 'composite_literal') { - const typeNode = rhs.childForFieldName('type'); - if (typeNode) { - const typeName = extractGoTypeName(typeNode); - if (typeName && ctx.typeMap) setIfHigher(ctx.typeMap, varNode.text, typeName, 1.0); - } - } - // x := &Struct{...} — address-of composite literal (confidence 1.0) - if (rhs.type === 'unary_expression') { - const operand = rhs.childForFieldName('operand'); - if (operand && operand.type === 'composite_literal') { - const typeNode = operand.childForFieldName('type'); - if (typeNode) { - const typeName = extractGoTypeName(typeNode); - if (typeName && ctx.typeMap) setIfHigher(ctx.typeMap, varNode.text, typeName, 1.0); - } - } - } - // x := NewFoo() or x := pkg.NewFoo() — factory function (confidence 0.7) - if (rhs.type === 'call_expression') { - const fn = rhs.childForFieldName('function'); - if (fn && fn.type === 'selector_expression') { - const field = fn.childForFieldName('field'); - if (field?.text.startsWith('New')) { - const typeName = field.text.slice(3); - if (typeName && ctx.typeMap) setIfHigher(ctx.typeMap, varNode.text, typeName, 0.7); - } - } else if (fn && fn.type === 'identifier' && fn.text.startsWith('New')) { - const typeName = fn.text.slice(3); - if (typeName && ctx.typeMap) setIfHigher(ctx.typeMap, varNode.text, typeName, 0.7); - } - } - } +/** Handle short_var_declaration: x := Struct{}, x := &Struct{}, x := NewFoo(). */ +function handleShortVarDecl( + node: TreeSitterNode, + typeMap: Map, +): void { + const left = node.childForFieldName('left'); + const right = node.childForFieldName('right'); + if (!left || !right) return; + + const lefts = + left.type === 'expression_list' + ? Array.from({ length: left.childCount }, (_, i) => left.child(i)).filter( + (c): c is TreeSitterNode => c?.type === 'identifier', + ) + : left.type === 'identifier' + ? [left] + : []; + const rights = + right.type === 'expression_list' + ? Array.from({ length: right.childCount }, (_, i) => right.child(i)).filter( + (c): c is TreeSitterNode => !!c?.type, + ) + : [right]; + + for (let idx = 0; idx < lefts.length; idx++) { + const varNode = lefts[idx]; + const rhs = rights[idx]; + if (!varNode || !rhs) continue; + inferShortVarType(varNode, rhs, typeMap); + } +} + +function extractGoTypeMapDepth(node: TreeSitterNode, ctx: ExtractorOutput, depth: number): void { + if (depth >= MAX_WALK_DEPTH) return; + + if (ctx.typeMap) { + if (node.type === 'var_spec' || node.type === 'parameter_declaration') { + handleTypedIdentifiers(node, ctx.typeMap); + } else if (node.type === 'short_var_declaration') { + handleShortVarDecl(node, ctx.typeMap); } } diff --git a/src/extractors/helpers.ts b/src/extractors/helpers.ts index 589cb2da..1f881f01 100644 --- a/src/extractors/helpers.ts +++ b/src/extractors/helpers.ts @@ -1,4 +1,4 @@ -import type { SubDeclaration, TreeSitterNode } from '../types.js'; +import type { SubDeclaration, TreeSitterNode, TypeMapEntry } from '../types.js'; /** * Maximum recursion depth for tree-sitter AST walkers. @@ -18,6 +18,22 @@ export function findChild(node: TreeSitterNode, type: string): TreeSitterNode | return null; } +/** + * Merge a type-map entry, keeping the higher-confidence one. + * Shared across all language extractors that build type maps for call resolution. + */ +export function setTypeMapEntry( + typeMap: Map, + name: string, + type: string, + confidence: number, +): void { + const existing = typeMap.get(name); + if (!existing || confidence > existing.confidence) { + typeMap.set(name, { type, confidence }); + } +} + /** * Extract visibility from a node by scanning its children for modifier keywords. * Works for Java, C#, PHP, and similar languages where modifiers are child nodes. diff --git a/src/extractors/java.ts b/src/extractors/java.ts index b29d053c..b13b3c65 100644 --- a/src/extractors/java.ts +++ b/src/extractors/java.ts @@ -111,6 +111,25 @@ function handleJavaClassDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { } } +const JAVA_TYPE_NODE_TYPES = new Set(['type_identifier', 'identifier', 'generic_type']); + +/** Resolve interface name from a type node (handles generic_type unwrapping). */ +function resolveJavaIfaceName(node: TreeSitterNode): string | undefined { + return node.type === 'generic_type' ? node.child(0)?.text : node.text; +} + +/** Push a single interface type node as an implements entry. */ +function pushJavaIface( + node: TreeSitterNode, + className: string, + line: number, + ctx: ExtractorOutput, +): void { + if (!JAVA_TYPE_NODE_TYPES.has(node.type)) return; + const ifaceName = resolveJavaIfaceName(node); + if (ifaceName) ctx.classes.push({ name: className, implements: ifaceName, line }); +} + function extractJavaInterfaces( interfaces: TreeSitterNode, className: string, @@ -119,28 +138,15 @@ function extractJavaInterfaces( ): void { for (let i = 0; i < interfaces.childCount; i++) { const child = interfaces.child(i); - if ( - child && - (child.type === 'type_identifier' || - child.type === 'identifier' || - child.type === 'type_list' || - child.type === 'generic_type') - ) { - if (child.type === 'type_list') { - for (let j = 0; j < child.childCount; j++) { - const t = child.child(j); - if ( - t && - (t.type === 'type_identifier' || t.type === 'identifier' || t.type === 'generic_type') - ) { - const ifaceName = t.type === 'generic_type' ? t.child(0)?.text : t.text; - if (ifaceName) ctx.classes.push({ name: className, implements: ifaceName, line }); - } - } - } else { - const ifaceName = child.type === 'generic_type' ? child.child(0)?.text : child.text; - if (ifaceName) ctx.classes.push({ name: className, implements: ifaceName, line }); + if (!child) continue; + + if (child.type === 'type_list') { + for (let j = 0; j < child.childCount; j++) { + const t = child.child(j); + if (t) pushJavaIface(t, className, line, ctx); } + } else { + pushJavaIface(child, className, line, ctx); } } } diff --git a/src/extractors/javascript.ts b/src/extractors/javascript.ts index 3b083ed7..4a4f5f8b 100644 --- a/src/extractors/javascript.ts +++ b/src/extractors/javascript.ts @@ -12,7 +12,13 @@ import type { TreeSitterTree, TypeMapEntry, } from '../types.js'; -import { findChild, findParentNode, MAX_WALK_DEPTH, nodeEndLine } from './helpers.js'; +import { + findChild, + findParentNode, + MAX_WALK_DEPTH, + nodeEndLine, + setTypeMapEntry, +} from './helpers.js'; /** Built-in globals that start with uppercase but are not user-defined types. */ const BUILTIN_GLOBALS: Set = new Set([ @@ -87,6 +93,182 @@ export function extractSymbols( // ── Query-based extraction (fast path) ────────────────────────────────────── +/** Handle function_declaration capture. */ +function handleFnCapture(c: Record, definitions: Definition[]): void { + const fnChildren = extractParameters(c.fn_node!); + definitions.push({ + name: c.fn_name!.text, + kind: 'function', + line: c.fn_node!.startPosition.row + 1, + endLine: nodeEndLine(c.fn_node!), + children: fnChildren.length > 0 ? fnChildren : undefined, + }); +} + +/** Handle variable_declarator with arrow_function / function_expression capture. */ +function handleVarFnCapture(c: Record, definitions: Definition[]): void { + const declNode = c.varfn_name!.parent?.parent; + const line = declNode ? declNode.startPosition.row + 1 : c.varfn_name!.startPosition.row + 1; + const varFnChildren = extractParameters(c.varfn_value!); + definitions.push({ + name: c.varfn_name!.text, + kind: 'function', + line, + endLine: nodeEndLine(c.varfn_value!), + children: varFnChildren.length > 0 ? varFnChildren : undefined, + }); +} + +/** Handle class_declaration capture. */ +function handleClassCapture( + c: Record, + definitions: Definition[], + classes: ClassRelation[], +): void { + const className = c.cls_name!.text; + const startLine = c.cls_node!.startPosition.row + 1; + const clsChildren = extractClassProperties(c.cls_node!); + definitions.push({ + name: className, + kind: 'class', + line: startLine, + endLine: nodeEndLine(c.cls_node!), + children: clsChildren.length > 0 ? clsChildren : undefined, + }); + const heritage = + c.cls_node!.childForFieldName('heritage') || findChild(c.cls_node!, 'class_heritage'); + if (heritage) { + const superName = extractSuperclass(heritage); + if (superName) classes.push({ name: className, extends: superName, line: startLine }); + const implementsList = extractImplements(heritage); + for (const iface of implementsList) { + classes.push({ name: className, implements: iface, line: startLine }); + } + } +} + +/** Handle method_definition capture. */ +function handleMethodCapture(c: Record, definitions: Definition[]): void { + const methName = c.meth_name!.text; + const parentClass = findParentClass(c.meth_node!); + const fullName = parentClass ? `${parentClass}.${methName}` : methName; + const methChildren = extractParameters(c.meth_node!); + const methVis = extractVisibility(c.meth_node!); + definitions.push({ + name: fullName, + kind: 'method', + line: c.meth_node!.startPosition.row + 1, + endLine: nodeEndLine(c.meth_node!), + children: methChildren.length > 0 ? methChildren : undefined, + visibility: methVis, + }); +} + +/** Handle export_statement capture. */ +function handleExportCapture( + c: Record, + exps: Export[], + imports: Import[], +): void { + const exportLine = c.exp_node!.startPosition.row + 1; + const decl = c.exp_node!.childForFieldName('declaration'); + if (decl) { + const declType = decl.type; + const kindMap: Record = { + function_declaration: 'function', + class_declaration: 'class', + interface_declaration: 'interface', + type_alias_declaration: 'type', + }; + const kind = kindMap[declType]; + if (kind) { + const n = decl.childForFieldName('name'); + if (n) exps.push({ name: n.text, kind: kind as Export['kind'], line: exportLine }); + } + } + const source = c.exp_node!.childForFieldName('source') || findChild(c.exp_node!, 'string'); + if (source && !decl) { + const modPath = source.text.replace(/['"]/g, ''); + const reexportNames = extractImportNames(c.exp_node!); + const nodeText = c.exp_node!.text; + const isWildcard = nodeText.includes('export *') || nodeText.includes('export*'); + imports.push({ + source: modPath, + names: reexportNames, + line: exportLine, + reexport: true, + wildcardReexport: isWildcard && reexportNames.length === 0, + }); + } +} + +/** Dispatch a single query match to the appropriate handler. */ +function dispatchQueryMatch( + c: Record, + definitions: Definition[], + calls: Call[], + imports: Import[], + classes: ClassRelation[], + exps: Export[], +): void { + if (c.fn_node) { + handleFnCapture(c, definitions); + } else if (c.varfn_name) { + handleVarFnCapture(c, definitions); + } else if (c.cls_node) { + handleClassCapture(c, definitions, classes); + } else if (c.meth_node) { + handleMethodCapture(c, definitions); + } else if (c.iface_node) { + const ifaceName = c.iface_name!.text; + definitions.push({ + name: ifaceName, + kind: 'interface', + line: c.iface_node.startPosition.row + 1, + endLine: nodeEndLine(c.iface_node), + }); + const body = + c.iface_node.childForFieldName('body') || + findChild(c.iface_node, 'interface_body') || + findChild(c.iface_node, 'object_type'); + if (body) extractInterfaceMethods(body, ifaceName, definitions); + } else if (c.type_node) { + definitions.push({ + name: c.type_name!.text, + kind: 'type', + line: c.type_node.startPosition.row + 1, + endLine: nodeEndLine(c.type_node), + }); + } else if (c.imp_node) { + const isTypeOnly = c.imp_node.text.startsWith('import type'); + const modPath = c.imp_source!.text.replace(/['"]/g, ''); + const names = extractImportNames(c.imp_node); + imports.push({ + source: modPath, + names, + line: c.imp_node.startPosition.row + 1, + typeOnly: isTypeOnly, + }); + } else if (c.exp_node) { + handleExportCapture(c, exps, imports); + } else if (c.callfn_node) { + calls.push({ + name: c.callfn_name!.text, + line: c.callfn_node.startPosition.row + 1, + }); + } else if (c.callmem_node) { + const callInfo = extractCallInfo(c.callmem_fn!, c.callmem_node); + if (callInfo) calls.push(callInfo); + const cbDef = extractCallbackDefinition(c.callmem_node, c.callmem_fn); + if (cbDef) definitions.push(cbDef); + } else if (c.callsub_node) { + const callInfo = extractCallInfo(c.callsub_fn!, c.callsub_node); + if (callInfo) calls.push(callInfo); + } else if (c.assign_node) { + handleCommonJSAssignment(c.assign_left!, c.assign_right!, c.assign_node, imports); + } +} + function extractSymbolsQuery(tree: TreeSitterTree, query: TreeSitterQuery): ExtractorOutput { const definitions: Definition[] = []; const calls: Call[] = []; @@ -101,151 +283,7 @@ function extractSymbolsQuery(tree: TreeSitterTree, query: TreeSitterQuery): Extr // Build capture lookup for this match (1-3 captures each, very fast) const c: Record = Object.create(null); for (const cap of match.captures) c[cap.name] = cap.node; - - if (c.fn_node) { - // function_declaration - const fnChildren = extractParameters(c.fn_node); - definitions.push({ - name: c.fn_name!.text, - kind: 'function', - line: c.fn_node.startPosition.row + 1, - endLine: nodeEndLine(c.fn_node), - children: fnChildren.length > 0 ? fnChildren : undefined, - }); - } else if (c.varfn_name) { - // variable_declarator with arrow_function / function_expression - const declNode = c.varfn_name.parent?.parent; - const line = declNode ? declNode.startPosition.row + 1 : c.varfn_name.startPosition.row + 1; - const varFnChildren = extractParameters(c.varfn_value!); - definitions.push({ - name: c.varfn_name.text, - kind: 'function', - line, - endLine: nodeEndLine(c.varfn_value!), - children: varFnChildren.length > 0 ? varFnChildren : undefined, - }); - } else if (c.cls_node) { - // class_declaration - const className = c.cls_name!.text; - const startLine = c.cls_node.startPosition.row + 1; - const clsChildren = extractClassProperties(c.cls_node); - definitions.push({ - name: className, - kind: 'class', - line: startLine, - endLine: nodeEndLine(c.cls_node), - children: clsChildren.length > 0 ? clsChildren : undefined, - }); - const heritage = - c.cls_node.childForFieldName('heritage') || findChild(c.cls_node, 'class_heritage'); - if (heritage) { - const superName = extractSuperclass(heritage); - if (superName) classes.push({ name: className, extends: superName, line: startLine }); - const implementsList = extractImplements(heritage); - for (const iface of implementsList) { - classes.push({ name: className, implements: iface, line: startLine }); - } - } - } else if (c.meth_node) { - // method_definition - const methName = c.meth_name!.text; - const parentClass = findParentClass(c.meth_node); - const fullName = parentClass ? `${parentClass}.${methName}` : methName; - const methChildren = extractParameters(c.meth_node); - const methVis = extractVisibility(c.meth_node); - definitions.push({ - name: fullName, - kind: 'method', - line: c.meth_node.startPosition.row + 1, - endLine: nodeEndLine(c.meth_node), - children: methChildren.length > 0 ? methChildren : undefined, - visibility: methVis, - }); - } else if (c.iface_node) { - // interface_declaration (TS/TSX only) - const ifaceName = c.iface_name!.text; - definitions.push({ - name: ifaceName, - kind: 'interface', - line: c.iface_node.startPosition.row + 1, - endLine: nodeEndLine(c.iface_node), - }); - const body = - c.iface_node.childForFieldName('body') || - findChild(c.iface_node, 'interface_body') || - findChild(c.iface_node, 'object_type'); - if (body) extractInterfaceMethods(body, ifaceName, definitions); - } else if (c.type_node) { - // type_alias_declaration (TS/TSX only) - definitions.push({ - name: c.type_name!.text, - kind: 'type', - line: c.type_node.startPosition.row + 1, - endLine: nodeEndLine(c.type_node), - }); - } else if (c.imp_node) { - // import_statement - const isTypeOnly = c.imp_node.text.startsWith('import type'); - const modPath = c.imp_source!.text.replace(/['"]/g, ''); - const names = extractImportNames(c.imp_node); - imports.push({ - source: modPath, - names, - line: c.imp_node.startPosition.row + 1, - typeOnly: isTypeOnly, - }); - } else if (c.exp_node) { - // export_statement - const exportLine = c.exp_node.startPosition.row + 1; - const decl = c.exp_node.childForFieldName('declaration'); - if (decl) { - const declType = decl.type; - const kindMap: Record = { - function_declaration: 'function', - class_declaration: 'class', - interface_declaration: 'interface', - type_alias_declaration: 'type', - }; - const kind = kindMap[declType]; - if (kind) { - const n = decl.childForFieldName('name'); - if (n) exps.push({ name: n.text, kind: kind as Export['kind'], line: exportLine }); - } - } - const source = c.exp_node.childForFieldName('source') || findChild(c.exp_node, 'string'); - if (source && !decl) { - const modPath = source.text.replace(/['"]/g, ''); - const reexportNames = extractImportNames(c.exp_node); - const nodeText = c.exp_node.text; - const isWildcard = nodeText.includes('export *') || nodeText.includes('export*'); - imports.push({ - source: modPath, - names: reexportNames, - line: exportLine, - reexport: true, - wildcardReexport: isWildcard && reexportNames.length === 0, - }); - } - } else if (c.callfn_node) { - // call_expression with identifier function - calls.push({ - name: c.callfn_name!.text, - line: c.callfn_node.startPosition.row + 1, - }); - } else if (c.callmem_node) { - // call_expression with member_expression function - const callInfo = extractCallInfo(c.callmem_fn!, c.callmem_node); - if (callInfo) calls.push(callInfo); - const cbDef = extractCallbackDefinition(c.callmem_node, c.callmem_fn); - if (cbDef) definitions.push(cbDef); - } else if (c.callsub_node) { - // call_expression with subscript_expression function - const callInfo = extractCallInfo(c.callsub_fn!, c.callsub_node); - if (callInfo) calls.push(callInfo); - } else if (c.assign_node) { - // CommonJS: module.exports = require(...) / module.exports = { ...require(...) } - handleCommonJSAssignment(c.assign_left!, c.assign_right!, c.assign_node, imports); - } + dispatchQueryMatch(c, definitions, calls, imports, classes, exps); } // Extract top-level constants via targeted walk (query patterns don't cover these) @@ -371,48 +409,49 @@ function handleCommonJSAssignment( const leftText = left.text; if (!leftText.startsWith('module.exports') && leftText !== 'exports') return; - const rightType = right.type; const assignLine = node.startPosition.row + 1; - if (rightType === 'call_expression') { - const fn = right.childForFieldName('function'); - const args = right.childForFieldName('arguments') || findChild(right, 'arguments'); - if (fn && fn.text === 'require' && args) { - const strArg = findChild(args, 'string'); - if (strArg) { - imports.push({ - source: strArg.text.replace(/['"]/g, ''), - names: [], - line: assignLine, - reexport: true, - wildcardReexport: true, - }); - } + // module.exports = require("…") — direct re-export + if (right.type === 'call_expression') { + extractRequireReexport(right, assignLine, imports); + } + + // module.exports = { ...require("…") } — spread re-export + if (right.type === 'object') { + extractSpreadRequireReexports(right, assignLine, imports); + } +} + +/** Extract a direct `require()` re-export from a call_expression. */ +function extractRequireReexport(callExpr: TreeSitterNode, line: number, imports: Import[]): void { + const fn = callExpr.childForFieldName('function'); + const args = callExpr.childForFieldName('arguments') || findChild(callExpr, 'arguments'); + if (fn && fn.text === 'require' && args) { + const strArg = findChild(args, 'string'); + if (strArg) { + imports.push({ + source: strArg.text.replace(/['"]/g, ''), + names: [], + line, + reexport: true, + wildcardReexport: true, + }); } } +} - if (rightType === 'object') { - for (let ci = 0; ci < right.childCount; ci++) { - const child = right.child(ci); - if (child && child.type === 'spread_element') { - const spreadExpr = child.child(1) || child.childForFieldName('value'); - if (spreadExpr && spreadExpr.type === 'call_expression') { - const fn2 = spreadExpr.childForFieldName('function'); - const args2 = - spreadExpr.childForFieldName('arguments') || findChild(spreadExpr, 'arguments'); - if (fn2 && fn2.text === 'require' && args2) { - const strArg2 = findChild(args2, 'string'); - if (strArg2) { - imports.push({ - source: strArg2.text.replace(/['"]/g, ''), - names: [], - line: assignLine, - reexport: true, - wildcardReexport: true, - }); - } - } - } +/** Extract `...require()` re-exports from spread elements inside an object literal. */ +function extractSpreadRequireReexports( + objectNode: TreeSitterNode, + line: number, + imports: Import[], +): void { + for (let ci = 0; ci < objectNode.childCount; ci++) { + const child = objectNode.child(ci); + if (child && child.type === 'spread_element') { + const spreadExpr = child.child(1) || child.childForFieldName('value'); + if (spreadExpr && spreadExpr.type === 'call_expression') { + extractRequireReexport(spreadExpr, line, imports); } } } @@ -936,56 +975,13 @@ function extractNewExprTypeName(newExprNode: TreeSitterNode): string | null { * Higher-confidence entries take priority when the same variable is seen twice. */ function extractTypeMapWalk(rootNode: TreeSitterNode, typeMap: Map): void { - function setIfHigher(name: string, type: string, confidence: number): void { - const existing = typeMap.get(name); - if (!existing || confidence > existing.confidence) { - typeMap.set(name, { type, confidence }); - } - } - function walk(node: TreeSitterNode, depth: number): void { if (depth >= MAX_WALK_DEPTH) return; const t = node.type; if (t === 'variable_declarator') { - const nameN = node.childForFieldName('name'); - if (nameN && nameN.type === 'identifier') { - const typeAnno = findChild(node, 'type_annotation'); - if (typeAnno) { - const typeName = extractSimpleTypeName(typeAnno); - if (typeName) setIfHigher(nameN.text, typeName, 0.9); - } - const valueN = node.childForFieldName('value'); - if (valueN) { - // Constructor: const x = new Foo() → confidence 1.0 - if (valueN.type === 'new_expression') { - const ctorType = extractNewExprTypeName(valueN); - if (ctorType) setIfHigher(nameN.text, ctorType, 1.0); - } - // Factory method: const x = Foo.create() → confidence 0.7 - else if (valueN.type === 'call_expression') { - const fn = valueN.childForFieldName('function'); - if (fn && fn.type === 'member_expression') { - const obj = fn.childForFieldName('object'); - if (obj && obj.type === 'identifier') { - const objName = obj.text; - if (objName[0]! !== objName[0]!.toLowerCase() && !BUILTIN_GLOBALS.has(objName)) { - setIfHigher(nameN.text, objName, 0.7); - } - } - } - } - } - } + handleVarDeclaratorTypeMap(node, typeMap); } else if (t === 'required_parameter' || t === 'optional_parameter') { - const nameNode = - node.childForFieldName('pattern') || node.childForFieldName('left') || node.child(0); - if (nameNode && nameNode.type === 'identifier') { - const typeAnno = findChild(node, 'type_annotation'); - if (typeAnno) { - const typeName = extractSimpleTypeName(typeAnno); - if (typeName) setIfHigher(nameNode.text, typeName, 0.9); - } - } + handleParamTypeMap(node, typeMap); } for (let i = 0; i < node.childCount; i++) { walk(node.child(i)!, depth + 1); @@ -994,6 +990,56 @@ function extractTypeMapWalk(rootNode: TreeSitterNode, typeMap: Map, +): void { + const nameN = node.childForFieldName('name'); + if (!nameN || nameN.type !== 'identifier') return; + + // Type annotation: const x: Foo = … + const typeAnno = findChild(node, 'type_annotation'); + if (typeAnno) { + const typeName = extractSimpleTypeName(typeAnno); + if (typeName) setTypeMapEntry(typeMap, nameN.text, typeName, 0.9); + } + + const valueN = node.childForFieldName('value'); + if (!valueN) return; + + // Constructor: const x = new Foo() → confidence 1.0 + if (valueN.type === 'new_expression') { + const ctorType = extractNewExprTypeName(valueN); + if (ctorType) setTypeMapEntry(typeMap, nameN.text, ctorType, 1.0); + } + // Factory method: const x = Foo.create() → confidence 0.7 + else if (valueN.type === 'call_expression') { + const fn = valueN.childForFieldName('function'); + if (fn && fn.type === 'member_expression') { + const obj = fn.childForFieldName('object'); + if (obj && obj.type === 'identifier') { + const objName = obj.text; + if (objName[0]! !== objName[0]!.toLowerCase() && !BUILTIN_GLOBALS.has(objName)) { + setTypeMapEntry(typeMap, nameN.text, objName, 0.7); + } + } + } + } +} + +/** Extract type info from a required_parameter or optional_parameter. */ +function handleParamTypeMap(node: TreeSitterNode, typeMap: Map): void { + const nameNode = + node.childForFieldName('pattern') || node.childForFieldName('left') || node.child(0); + if (!nameNode || nameNode.type !== 'identifier') return; + const typeAnno = findChild(node, 'type_annotation'); + if (typeAnno) { + const typeName = extractSimpleTypeName(typeAnno); + if (typeName) setTypeMapEntry(typeMap, nameNode.text, typeName, 0.9); + } +} + function extractReceiverName(objNode: TreeSitterNode | null): string | undefined { if (!objNode) return undefined; const t = objNode.type; @@ -1006,57 +1052,66 @@ function extractCallInfo(fn: TreeSitterNode, callNode: TreeSitterNode): Call | n if (fnType === 'identifier') { return { name: fn.text, line: callNode.startPosition.row + 1 }; } - if (fnType === 'member_expression') { - const obj = fn.childForFieldName('object'); - const prop = fn.childForFieldName('property'); - if (!prop) return null; - - const callLine = callNode.startPosition.row + 1; - const propText = prop.text; - - if (propText === 'call' || propText === 'apply' || propText === 'bind') { - if (obj && obj.type === 'identifier') - return { name: obj.text, line: callLine, dynamic: true }; - if (obj && obj.type === 'member_expression') { - const innerProp = obj.childForFieldName('property'); - if (innerProp) return { name: innerProp.text, line: callLine, dynamic: true }; - } - } + return extractMemberExprCallInfo(fn, callNode); + } + if (fnType === 'subscript_expression') { + return extractSubscriptCallInfo(fn, callNode); + } + return null; +} - const propType = prop.type; - if (propType === 'string' || propType === 'string_fragment') { - const methodName = propText.replace(/['"]/g, ''); - if (methodName) { - const receiver = extractReceiverName(obj); - return { name: methodName, line: callLine, dynamic: true, receiver }; - } - } +/** Extract call info from a member_expression function node (obj.method()). */ +function extractMemberExprCallInfo(fn: TreeSitterNode, callNode: TreeSitterNode): Call | null { + const obj = fn.childForFieldName('object'); + const prop = fn.childForFieldName('property'); + if (!prop) return null; - const receiver = extractReceiverName(obj); - return { name: propText, line: callLine, receiver }; + const callLine = callNode.startPosition.row + 1; + const propText = prop.text; + + // .call()/.apply()/.bind() — dynamic invocation + if (propText === 'call' || propText === 'apply' || propText === 'bind') { + if (obj && obj.type === 'identifier') return { name: obj.text, line: callLine, dynamic: true }; + if (obj && obj.type === 'member_expression') { + const innerProp = obj.childForFieldName('property'); + if (innerProp) return { name: innerProp.text, line: callLine, dynamic: true }; + } } - if (fnType === 'subscript_expression') { - const obj = fn.childForFieldName('object'); - const index = fn.childForFieldName('index'); - if (index) { - const indexType = index.type; - if (indexType === 'string' || indexType === 'template_string') { - const methodName = index.text.replace(/['"`]/g, ''); - if (methodName && !methodName.includes('$')) { - const receiver = extractReceiverName(obj); - return { - name: methodName, - line: callNode.startPosition.row + 1, - dynamic: true, - receiver, - }; - } - } + // Computed property: obj["method"]() + const propType = prop.type; + if (propType === 'string' || propType === 'string_fragment') { + const methodName = propText.replace(/['"]/g, ''); + if (methodName) { + const receiver = extractReceiverName(obj); + return { name: methodName, line: callLine, dynamic: true, receiver }; } } + const receiver = extractReceiverName(obj); + return { name: propText, line: callLine, receiver }; +} + +/** Extract call info from a subscript_expression function node (obj["method"]()). */ +function extractSubscriptCallInfo(fn: TreeSitterNode, callNode: TreeSitterNode): Call | null { + const obj = fn.childForFieldName('object'); + const index = fn.childForFieldName('index'); + if (!index) return null; + + const indexType = index.type; + if (indexType === 'string' || indexType === 'template_string') { + const methodName = index.text.replace(/['"`]/g, ''); + if (methodName && !methodName.includes('$')) { + const receiver = extractReceiverName(obj); + return { + name: methodName, + line: callNode.startPosition.row + 1, + dynamic: true, + receiver, + }; + } + } return null; } diff --git a/src/extractors/php.ts b/src/extractors/php.ts index dc2820fd..95e75d8c 100644 --- a/src/extractors/php.ts +++ b/src/extractors/php.ts @@ -32,6 +32,39 @@ function extractPhpParameters(fnNode: TreeSitterNode): SubDeclaration[] { return params; } +/** Extract property declarations from a PHP class member. */ +function extractPhpProperties(member: TreeSitterNode, children: SubDeclaration[]): void { + for (let j = 0; j < member.childCount; j++) { + const el = member.child(j); + if (!el || el.type !== 'property_element') continue; + const varNode = findChild(el, 'variable_name'); + if (varNode) { + children.push({ + name: varNode.text, + kind: 'property', + line: member.startPosition.row + 1, + visibility: extractModifierVisibility(member), + }); + } + } +} + +/** Extract constant declarations from a PHP class member. */ +function extractPhpConstants(member: TreeSitterNode, children: SubDeclaration[]): void { + for (let j = 0; j < member.childCount; j++) { + const el = member.child(j); + if (!el || el.type !== 'const_element') continue; + const nameNode = el.childForFieldName('name') || findChild(el, 'name'); + if (nameNode) { + children.push({ + name: nameNode.text, + kind: 'constant', + line: member.startPosition.row + 1, + }); + } + } +} + function extractPhpClassChildren(classNode: TreeSitterNode): SubDeclaration[] { const children: SubDeclaration[] = []; const body = classNode.childForFieldName('body') || findChild(classNode, 'declaration_list'); @@ -40,32 +73,9 @@ function extractPhpClassChildren(classNode: TreeSitterNode): SubDeclaration[] { const member = body.child(i); if (!member) continue; if (member.type === 'property_declaration') { - for (let j = 0; j < member.childCount; j++) { - const el = member.child(j); - if (!el || el.type !== 'property_element') continue; - const varNode = findChild(el, 'variable_name'); - if (varNode) { - children.push({ - name: varNode.text, - kind: 'property', - line: member.startPosition.row + 1, - visibility: extractModifierVisibility(member), - }); - } - } + extractPhpProperties(member, children); } else if (member.type === 'const_declaration') { - for (let j = 0; j < member.childCount; j++) { - const el = member.child(j); - if (!el || el.type !== 'const_element') continue; - const nameNode = el.childForFieldName('name') || findChild(el, 'name'); - if (nameNode) { - children.push({ - name: nameNode.text, - kind: 'constant', - line: member.startPosition.row + 1, - }); - } - } + extractPhpConstants(member, children); } } return children; diff --git a/src/extractors/python.ts b/src/extractors/python.ts index 8f98ca34..a29b3ebe 100644 --- a/src/extractors/python.ts +++ b/src/extractors/python.ts @@ -4,7 +4,6 @@ import type { SubDeclaration, TreeSitterNode, TreeSitterTree, - TypeMapEntry, } from '../types.js'; import { findChild, @@ -12,6 +11,7 @@ import { MAX_WALK_DEPTH, nodeEndLine, pythonVisibility, + setTypeMapEntry, } from './helpers.js'; /** Built-in globals that start with uppercase but are not user-defined types. */ @@ -274,6 +274,37 @@ function extractPythonParameters(fnNode: TreeSitterNode): SubDeclaration[] { return params; } +/** Extract class-level assignment properties from expression statements. */ +function extractClassAssignment( + child: TreeSitterNode, + seen: Set, + props: SubDeclaration[], +): void { + const assignment = findChild(child, 'assignment'); + if (!assignment) return; + const left = assignment.childForFieldName('left'); + if (!left || left.type !== 'identifier' || seen.has(left.text)) return; + seen.add(left.text); + props.push({ + name: left.text, + kind: 'property', + line: child.startPosition.row + 1, + visibility: pythonVisibility(left.text), + }); +} + +/** If node is an __init__ method, walk its body for self.x assignments. */ +function extractInitProperties( + node: TreeSitterNode, + seen: Set, + props: SubDeclaration[], +): void { + const fnName = node.childForFieldName('name'); + if (!fnName || fnName.text !== '__init__') return; + const initBody = node.childForFieldName('body') || findChild(node, 'block'); + if (initBody) walkInitBody(initBody, seen, props); +} + function extractPythonClassProperties(classNode: TreeSitterNode): SubDeclaration[] { const props: SubDeclaration[] = []; const seen = new Set(); @@ -285,42 +316,14 @@ function extractPythonClassProperties(classNode: TreeSitterNode): SubDeclaration if (!child) continue; if (child.type === 'expression_statement') { - const assignment = findChild(child, 'assignment'); - if (assignment) { - const left = assignment.childForFieldName('left'); - if (left && left.type === 'identifier' && !seen.has(left.text)) { - seen.add(left.text); - props.push({ - name: left.text, - kind: 'property', - line: child.startPosition.row + 1, - visibility: pythonVisibility(left.text), - }); - } - } - } - - if (child.type === 'function_definition') { - const fnName = child.childForFieldName('name'); - if (fnName && fnName.text === '__init__') { - const initBody = child.childForFieldName('body') || findChild(child, 'block'); - if (initBody) { - walkInitBody(initBody, seen, props); - } - } - } - - if (child.type === 'decorated_definition') { + extractClassAssignment(child, seen, props); + } else if (child.type === 'function_definition') { + extractInitProperties(child, seen, props); + } else if (child.type === 'decorated_definition') { for (let j = 0; j < child.childCount; j++) { const inner = child.child(j); if (inner && inner.type === 'function_definition') { - const fnName = inner.childForFieldName('name'); - if (fnName && fnName.text === '__init__') { - const initBody = inner.childForFieldName('body') || findChild(inner, 'block'); - if (initBody) { - walkInitBody(initBody, seen, props); - } - } + extractInitProperties(inner, seen, props); } } } @@ -354,15 +357,37 @@ function extractPythonTypeMap(node: TreeSitterNode, ctx: ExtractorOutput): void extractPythonTypeMapDepth(node, ctx, 0); } -function setIfHigherPy( - typeMap: Map, - name: string, - type: string, - confidence: number, -): void { - const existing = typeMap.get(name); - if (!existing || confidence > existing.confidence) { - typeMap.set(name, { type, confidence }); +/** Handle typed_parameter or typed_default_parameter for type map. */ +function handlePyTypedParam(node: TreeSitterNode, ctx: ExtractorOutput): void { + const isDefault = node.type === 'typed_default_parameter'; + const nameNode = isDefault ? node.childForFieldName('name') : node.child(0); + const typeNode = node.childForFieldName('type'); + if (!nameNode || nameNode.type !== 'identifier' || !typeNode) return; + if (nameNode.text === 'self' || nameNode.text === 'cls') return; + const typeName = extractPythonTypeName(typeNode); + if (typeName && ctx.typeMap) setTypeMapEntry(ctx.typeMap, nameNode.text, typeName, 0.9); +} + +/** Handle assignment for constructor/factory type inference. */ +function handlePyAssignmentType(node: TreeSitterNode, ctx: ExtractorOutput): void { + const left = node.childForFieldName('left'); + const right = node.childForFieldName('right'); + if (!left || left.type !== 'identifier' || !right || right.type !== 'call') return; + + const fn = right.childForFieldName('function'); + if (!fn) return; + if (fn.type === 'identifier') { + const name = fn.text; + if (name[0] && name[0] !== name[0].toLowerCase()) { + if (ctx.typeMap) setTypeMapEntry(ctx.typeMap, left.text, name, 1.0); + } + } else if (fn.type === 'attribute') { + const obj = fn.childForFieldName('object'); + if (!obj || obj.type !== 'identifier') return; + const objName = obj.text; + if (objName[0] && objName[0] !== objName[0].toLowerCase() && !BUILTIN_GLOBALS_PY.has(objName)) { + if (ctx.typeMap) setTypeMapEntry(ctx.typeMap, left.text, objName, 0.7); + } } } @@ -373,57 +398,10 @@ function extractPythonTypeMapDepth( ): void { if (depth >= MAX_WALK_DEPTH) return; - // typed_parameter: identifier : type (confidence 0.9) - if (node.type === 'typed_parameter') { - const nameNode = node.child(0); - const typeNode = node.childForFieldName('type'); - if (nameNode && nameNode.type === 'identifier' && typeNode) { - const typeName = extractPythonTypeName(typeNode); - if (typeName && nameNode.text !== 'self' && nameNode.text !== 'cls') { - if (ctx.typeMap) setIfHigherPy(ctx.typeMap, nameNode.text, typeName, 0.9); - } - } - } - - // typed_default_parameter: name : type = default (confidence 0.9) - if (node.type === 'typed_default_parameter') { - const nameNode = node.childForFieldName('name'); - const typeNode = node.childForFieldName('type'); - if (nameNode && nameNode.type === 'identifier' && typeNode) { - const typeName = extractPythonTypeName(typeNode); - if (typeName && nameNode.text !== 'self' && nameNode.text !== 'cls') { - if (ctx.typeMap) setIfHigherPy(ctx.typeMap, nameNode.text, typeName, 0.9); - } - } - } - - // assignment: x = SomeClass(...) → constructor (confidence 1.0) - // x = SomeClass.create(...) → factory (confidence 0.7) - if (node.type === 'assignment') { - const left = node.childForFieldName('left'); - const right = node.childForFieldName('right'); - if (left && left.type === 'identifier' && right && right.type === 'call') { - const fn = right.childForFieldName('function'); - if (fn && fn.type === 'identifier') { - const name = fn.text; - if (name[0] && name[0] !== name[0].toLowerCase()) { - if (ctx.typeMap) setIfHigherPy(ctx.typeMap, left.text, name, 1.0); - } - } - if (fn && fn.type === 'attribute') { - const obj = fn.childForFieldName('object'); - if (obj && obj.type === 'identifier') { - const objName = obj.text; - if ( - objName[0] && - objName[0] !== objName[0].toLowerCase() && - !BUILTIN_GLOBALS_PY.has(objName) - ) { - if (ctx.typeMap) setIfHigherPy(ctx.typeMap, left.text, objName, 0.7); - } - } - } - } + if (node.type === 'typed_parameter' || node.type === 'typed_default_parameter') { + handlePyTypedParam(node, ctx); + } else if (node.type === 'assignment') { + handlePyAssignmentType(node, ctx); } for (let i = 0; i < node.childCount; i++) { diff --git a/src/extractors/rust.ts b/src/extractors/rust.ts index 169ef1e5..6214d47f 100644 --- a/src/extractors/rust.ts +++ b/src/extractors/rust.ts @@ -8,10 +8,10 @@ import type { import { extractBodyMembers, findParentNode, - lastPathSegment, MAX_WALK_DEPTH, nodeEndLine, rustVisibility, + setTypeMapEntry, } from './helpers.js'; /** @@ -264,7 +264,7 @@ function extractRustTypeMapDepth(node: TreeSitterNode, ctx: ExtractorOutput, dep const typeNode = node.childForFieldName('type'); if (pattern && pattern.type === 'identifier' && typeNode) { const typeName = extractRustTypeName(typeNode); - if (typeName) ctx.typeMap?.set(pattern.text, { type: typeName, confidence: 0.9 }); + if (typeName && ctx.typeMap) setTypeMapEntry(ctx.typeMap, pattern.text, typeName, 0.9); } } @@ -276,7 +276,7 @@ function extractRustTypeMapDepth(node: TreeSitterNode, ctx: ExtractorOutput, dep const name = pattern.type === 'identifier' ? pattern.text : null; if (name && name !== 'self' && name !== '&self' && name !== '&mut self') { const typeName = extractRustTypeName(typeNode); - if (typeName) ctx.typeMap?.set(name, { type: typeName, confidence: 0.9 }); + if (typeName && ctx.typeMap) setTypeMapEntry(ctx.typeMap, name, typeName, 0.9); } } } @@ -309,56 +309,55 @@ function extractRustTypeName(typeNode: TreeSitterNode): string | null { return null; } -function extractRustUsePath(node: TreeSitterNode | null): { source: string; names: string[] }[] { - if (!node) return []; - - if (node.type === 'use_list') { - const results: { source: string; names: string[] }[] = []; - for (let i = 0; i < node.childCount; i++) { - results.push(...extractRustUsePath(node.child(i))); +/** Collect names from a scoped_use_list's list node. */ +function collectScopedNames(listNode: TreeSitterNode): string[] { + const names: string[] = []; + for (let i = 0; i < listNode.childCount; i++) { + const child = listNode.child(i); + if (!child) continue; + if (child.type === 'identifier' || child.type === 'self') { + names.push(child.text); + } else if (child.type === 'use_as_clause') { + const name = (child.childForFieldName('alias') || child.childForFieldName('name'))?.text; + if (name) names.push(name); } - return results; } + return names; +} - if (node.type === 'scoped_use_list') { - const pathNode = node.childForFieldName('path'); - const listNode = node.childForFieldName('list'); - const prefix = pathNode ? pathNode.text : ''; - if (listNode) { - const names: string[] = []; - for (let i = 0; i < listNode.childCount; i++) { - const child = listNode.child(i); - if ( - child && - (child.type === 'identifier' || child.type === 'use_as_clause' || child.type === 'self') - ) { - const name = - child.type === 'use_as_clause' - ? (child.childForFieldName('alias') || child.childForFieldName('name'))?.text - : child.text; - if (name) names.push(name); - } +function extractRustUsePath(node: TreeSitterNode | null): { source: string; names: string[] }[] { + if (!node) return []; + + switch (node.type) { + case 'use_list': { + const results: { source: string; names: string[] }[] = []; + for (let i = 0; i < node.childCount; i++) { + results.push(...extractRustUsePath(node.child(i))); } - return [{ source: prefix, names }]; + return results; } - return [{ source: prefix, names: [] }]; - } - - if (node.type === 'use_as_clause') { - const name = node.childForFieldName('alias') || node.childForFieldName('name'); - return [{ source: node.text, names: name ? [name.text] : [] }]; - } - - if (node.type === 'use_wildcard') { - const pathNode = node.childForFieldName('path'); - return [{ source: pathNode ? pathNode.text : '*', names: ['*'] }]; - } - - if (node.type === 'scoped_identifier' || node.type === 'identifier') { - const text = node.text; - const lastName = lastPathSegment(text, '::'); - return [{ source: text, names: [lastName] }]; + case 'scoped_use_list': { + const pathNode = node.childForFieldName('path'); + const listNode = node.childForFieldName('list'); + const prefix = pathNode ? pathNode.text : ''; + if (!listNode) return [{ source: prefix, names: [] }]; + return [{ source: prefix, names: collectScopedNames(listNode) }]; + } + case 'use_as_clause': { + const name = node.childForFieldName('alias') || node.childForFieldName('name'); + return [{ source: node.text, names: name ? [name.text] : [] }]; + } + case 'use_wildcard': { + const pathNode = node.childForFieldName('path'); + return [{ source: pathNode ? pathNode.text : '*', names: ['*'] }]; + } + case 'scoped_identifier': + case 'identifier': { + const text = node.text; + const lastName = text.split('::').pop() ?? text; + return [{ source: text, names: [lastName] }]; + } + default: + return []; } - - return []; } diff --git a/src/features/audit.ts b/src/features/audit.ts index 50d18a2c..d6c398f6 100644 --- a/src/features/audit.ts +++ b/src/features/audit.ts @@ -183,27 +183,8 @@ export function auditData( let functions: unknown[]; try { if (explained.kind === 'file') { - // File target: explainData returns file-level info with publicApi + internal - // We need to enrich each symbol - functions = []; - for (const fileResult of results) { - const allSymbols = [ - ...(fileResult.publicApi || []), - ...(fileResult.internal || []), - ] as FileSymbol[]; - if (kind) { - const filtered = allSymbols.filter((s) => s.kind === kind); - for (const sym of filtered) { - functions.push(enrichSymbol(db, sym, fileResult.file, noTests, maxDepth, thresholds)); - } - } else { - for (const sym of allSymbols) { - functions.push(enrichSymbol(db, sym, fileResult.file, noTests, maxDepth, thresholds)); - } - } - } + functions = enrichFileResults(db, results, kind, noTests, maxDepth, thresholds); } else { - // Function target: explainData returns per-function results functions = results.map((r: ExplainResult) => enrichFunction(db, r, noTests, maxDepth, thresholds), ); @@ -232,6 +213,29 @@ interface ExplainResult { relatedTests?: { file: string }[]; } +/** Enrich all symbols from file-target results. */ +function enrichFileResults( + db: BetterSqlite3Database, + results: any[], + kind: string | undefined, + noTests: boolean, + maxDepth: number, + thresholds: Record, +): unknown[] { + const functions: unknown[] = []; + for (const fileResult of results) { + let allSymbols = [ + ...(fileResult.publicApi || []), + ...(fileResult.internal || []), + ] as FileSymbol[]; + if (kind) allSymbols = allSymbols.filter((s) => s.kind === kind); + for (const sym of allSymbols) { + functions.push(enrichSymbol(db, sym, fileResult.file, noTests, maxDepth, thresholds)); + } + } + return functions; +} + function enrichFunction( db: BetterSqlite3Database, r: ExplainResult, diff --git a/src/features/cfg.ts b/src/features/cfg.ts index d6999486..194831ce 100644 --- a/src/features/cfg.ts +++ b/src/features/cfg.ts @@ -275,6 +275,96 @@ function allCfgNative(fileSymbols: Map): boolean { return hasCfgFile; } +/** Persist native CFG data for a single file (fast path — no tree/visitor needed). */ +function persistNativeFileCfg( + db: BetterSqlite3Database, + symbols: FileSymbols, + relPath: string, + insertBlock: ReturnType, + insertEdge: ReturnType, +): number { + let count = 0; + for (const def of symbols.definitions) { + if (def.kind !== 'function' && def.kind !== 'method') continue; + if (!def.line) continue; + + const nodeId = getFunctionNodeId(db, def.name, relPath, def.line); + if (!nodeId) continue; + + deleteCfgForNode(db, nodeId); + if (!def.cfg?.blocks?.length) continue; + + persistCfg( + def.cfg as unknown as { blocks: CfgBuildBlock[]; edges: CfgBuildEdge[] }, + nodeId, + insertBlock, + insertEdge, + ); + count++; + } + return count; +} + +/** Resolve CFG for a definition from native data or visitor results. */ +function resolveCfgForDef( + def: Definition, + visitorCfgByLine: Map | null, +): { blocks: CfgBuildBlock[]; edges: CfgBuildEdge[] } | null { + if (def.cfg?.blocks?.length) { + return def.cfg as unknown as { blocks: CfgBuildBlock[]; edges: CfgBuildEdge[] }; + } + if (!visitorCfgByLine) return null; + const candidates = visitorCfgByLine.get(def.line); + if (!candidates) return null; + const r = + candidates.length === 1 + ? candidates[0] + : (candidates.find((c) => { + const n = c.funcNode.childForFieldName?.('name'); + return n && n.text === def.name; + }) ?? candidates[0]); + return r ? { blocks: r.blocks, edges: r.edges } : null; +} + +/** Persist CFG data for a single file using visitor/native hybrid path. */ +function persistVisitorFileCfg( + db: BetterSqlite3Database, + symbols: FileSymbols, + relPath: string, + rootDir: string, + extToLang: Map, + parsers: unknown, + getParserFn: unknown, + insertBlock: ReturnType, + insertEdge: ReturnType, +): number { + const treeLang = getTreeAndLang(symbols, relPath, rootDir, extToLang, parsers, getParserFn); + if (!treeLang) return 0; + const { tree, langId } = treeLang; + + const cfgRules = CFG_RULES.get(langId); + if (!cfgRules) return 0; + + const visitorCfgByLine = buildVisitorCfgMap(tree, cfgRules, symbols, langId); + let count = 0; + + for (const def of symbols.definitions) { + if (def.kind !== 'function' && def.kind !== 'method') continue; + if (!def.line) continue; + + const nodeId = getFunctionNodeId(db, def.name, relPath, def.line); + if (!nodeId) continue; + + const cfg = resolveCfgForDef(def, visitorCfgByLine); + deleteCfgForNode(db, nodeId); + if (!cfg || cfg.blocks.length === 0) continue; + + persistCfg(cfg, nodeId, insertBlock, insertEdge); + count++; + } + return count; +} + export async function buildCFGData( db: BetterSqlite3Database, fileSymbols: Map, @@ -353,74 +443,22 @@ export async function buildCFGData( const ext = path.extname(relPath).toLowerCase(); if (!CFG_EXTENSIONS.has(ext)) continue; - // Native fast path: skip tree/visitor setup when all CFG is pre-computed. - // Only apply to files without _tree — files with _tree were WASM-parsed - // and need the slow path (visitor) to compute CFG. if (allNative && !symbols._tree) { - for (const def of symbols.definitions) { - if (def.kind !== 'function' && def.kind !== 'method') continue; - if (!def.line) continue; - - const nodeId = getFunctionNodeId(db, def.name, relPath, def.line); - if (!nodeId) continue; - - // Always delete stale CFG rows (handles body-removed case) - deleteCfgForNode(db, nodeId); - if (!def.cfg?.blocks?.length) continue; - - persistCfg( - def.cfg as unknown as { blocks: CfgBuildBlock[]; edges: CfgBuildEdge[] }, - nodeId, - insertBlock, - insertEdge, - ); - analyzed++; - } + analyzed += persistNativeFileCfg(db, symbols, relPath, insertBlock, insertEdge); continue; } - // When allNative=true, parsers/getParserFn are null. This is safe because - // _tree files use symbols._tree directly in getTreeAndLang (the parser - // code path is never reached). Non-_tree files are handled by the fast path above. - const treeLang = getTreeAndLang(symbols, relPath, rootDir, extToLang, parsers, getParserFn); - if (!treeLang) continue; - const { tree, langId } = treeLang; - - const cfgRules = CFG_RULES.get(langId); - if (!cfgRules) continue; - - const visitorCfgByLine = buildVisitorCfgMap(tree, cfgRules, symbols, langId); - - for (const def of symbols.definitions) { - if (def.kind !== 'function' && def.kind !== 'method') continue; - if (!def.line) continue; - - const nodeId = getFunctionNodeId(db, def.name, relPath, def.line); - if (!nodeId) continue; - - let cfg: { blocks: CfgBuildBlock[]; edges: CfgBuildEdge[] } | null = null; - if (def.cfg?.blocks?.length) { - cfg = def.cfg as unknown as { blocks: CfgBuildBlock[]; edges: CfgBuildEdge[] }; - } else if (visitorCfgByLine) { - const candidates = visitorCfgByLine.get(def.line); - const r = !candidates - ? undefined - : candidates.length === 1 - ? candidates[0] - : (candidates.find((c) => { - const n = c.funcNode.childForFieldName?.('name'); - return n && n.text === def.name; - }) ?? candidates[0]); - if (r) cfg = { blocks: r.blocks, edges: r.edges }; - } - - // Always purge stale rows (handles body-removed case) - deleteCfgForNode(db, nodeId); - if (!cfg || cfg.blocks.length === 0) continue; - - persistCfg(cfg, nodeId, insertBlock, insertEdge); - analyzed++; - } + analyzed += persistVisitorFileCfg( + db, + symbols, + relPath, + rootDir, + extToLang, + parsers, + getParserFn, + insertBlock, + insertEdge, + ); } }); diff --git a/src/features/check.ts b/src/features/check.ts index 0e7c7d55..28902280 100644 --- a/src/features/check.ts +++ b/src/features/check.ts @@ -291,6 +291,85 @@ interface CheckOpts { config?: CodegraphConfig; } +/** Walk up from repoRoot to find the nearest .git directory. */ +function findGitRoot(repoRoot: string): string | null { + let dir = repoRoot; + while (dir) { + if (fs.existsSync(path.join(dir, '.git'))) return dir; + const parent = path.dirname(dir); + if (parent === dir) break; + dir = parent; + } + return null; +} + +/** Run git diff and return the raw output string. */ +function getGitDiff(repoRoot: string, opts: { staged?: boolean; ref?: string }): string { + const args = opts.staged + ? ['diff', '--cached', '--unified=0', '--no-color'] + : ['diff', opts.ref || 'HEAD', '--unified=0', '--no-color']; + return execFileSync('git', args, { + cwd: repoRoot, + encoding: 'utf-8', + maxBuffer: 10 * 1024 * 1024, + stdio: ['pipe', 'pipe', 'pipe'], + }); +} + +/** Resolve which check predicates are enabled from opts + config. */ +function resolveCheckFlags(opts: CheckOpts, config: CodegraphConfig) { + const checkConfig = config.check || ({} as CodegraphConfig['check']); + return { + enableCycles: opts.cycles ?? checkConfig.cycles ?? true, + enableSignatures: opts.signatures ?? checkConfig.signatures ?? true, + enableBoundaries: opts.boundaries ?? checkConfig.boundaries ?? true, + blastRadiusThreshold: opts.blastRadius ?? checkConfig.blastRadius ?? null, + }; +} + +/** Run all enabled check predicates and return the results. */ +function runPredicates( + db: BetterSqlite3Database, + diff: ParsedDiff, + flags: ReturnType, + repoRoot: string, + noTests: boolean, + maxDepth: number, +): PredicateResult[] { + const changedFiles = new Set(diff.changedRanges.keys()); + const predicates: PredicateResult[] = []; + + if (flags.enableCycles) { + predicates.push({ name: 'cycles', ...checkNoNewCycles(db, changedFiles, noTests) }); + } + if (flags.blastRadiusThreshold != null) { + predicates.push({ + name: 'blast-radius', + ...checkMaxBlastRadius(db, diff.changedRanges, flags.blastRadiusThreshold, noTests, maxDepth), + }); + } + if (flags.enableSignatures) { + predicates.push({ + name: 'signatures', + ...checkNoSignatureChanges(db, diff.oldRanges, noTests), + }); + } + if (flags.enableBoundaries) { + predicates.push({ + name: 'boundaries', + ...checkNoBoundaryViolations(db, changedFiles, repoRoot, noTests), + }); + } + + return predicates; +} + +const EMPTY_CHECK: CheckResult = { + predicates: [], + summary: { total: 0, passed: 0, failed: 0, changedFiles: 0, newFiles: 0 }, + passed: true, +}; + export function checkData(customDbPath: string | undefined, opts: CheckOpts = {}): CheckResult { const db = openReadonlyOrFail(customDbPath); @@ -301,89 +380,26 @@ export function checkData(customDbPath: string | undefined, opts: CheckOpts = {} const maxDepth = opts.depth || 3; const config = opts.config || loadConfig(repoRoot); - const checkConfig = config.check || ({} as CodegraphConfig['check']); - - const enableCycles = opts.cycles ?? checkConfig.cycles ?? true; - const enableSignatures = opts.signatures ?? checkConfig.signatures ?? true; - const enableBoundaries = opts.boundaries ?? checkConfig.boundaries ?? true; - const blastRadiusThreshold = opts.blastRadius ?? checkConfig.blastRadius ?? null; - - let checkDir = repoRoot; - let isGitRepo = false; - while (checkDir) { - if (fs.existsSync(path.join(checkDir, '.git'))) { - isGitRepo = true; - break; - } - const parent = path.dirname(checkDir); - if (parent === checkDir) break; - checkDir = parent; - } - if (!isGitRepo) { + const flags = resolveCheckFlags(opts, config); + + const gitRoot = findGitRoot(repoRoot); + if (!gitRoot) { return { error: `Not a git repository: ${repoRoot}` }; } let diffOutput: string; try { - const args = opts.staged - ? ['diff', '--cached', '--unified=0', '--no-color'] - : ['diff', opts.ref || 'HEAD', '--unified=0', '--no-color']; - diffOutput = execFileSync('git', args, { - cwd: repoRoot, - encoding: 'utf-8', - maxBuffer: 10 * 1024 * 1024, - stdio: ['pipe', 'pipe', 'pipe'], - }); + diffOutput = getGitDiff(repoRoot, opts); } catch (e) { return { error: `Failed to run git diff: ${(e as Error).message}` }; } - if (!diffOutput.trim()) { - return { - predicates: [], - summary: { total: 0, passed: 0, failed: 0, changedFiles: 0, newFiles: 0 }, - passed: true, - }; - } - - const { changedRanges, oldRanges, newFiles } = parseDiffOutput(diffOutput); - if (changedRanges.size === 0) { - return { - predicates: [], - summary: { total: 0, passed: 0, failed: 0, changedFiles: 0, newFiles: 0 }, - passed: true, - }; - } - - const changedFiles = new Set(changedRanges.keys()); + if (!diffOutput.trim()) return EMPTY_CHECK; - const predicates: PredicateResult[] = []; + const diff = parseDiffOutput(diffOutput); + if (diff.changedRanges.size === 0) return EMPTY_CHECK; - if (enableCycles) { - const result = checkNoNewCycles(db, changedFiles, noTests); - predicates.push({ name: 'cycles', ...result }); - } - - if (blastRadiusThreshold != null) { - const result = checkMaxBlastRadius( - db, - changedRanges, - blastRadiusThreshold, - noTests, - maxDepth, - ); - predicates.push({ name: 'blast-radius', ...result }); - } - - if (enableSignatures) { - const result = checkNoSignatureChanges(db, oldRanges, noTests); - predicates.push({ name: 'signatures', ...result }); - } - - if (enableBoundaries) { - const result = checkNoBoundaryViolations(db, changedFiles, repoRoot, noTests); - predicates.push({ name: 'boundaries', ...result }); - } + const predicates = runPredicates(db, diff, flags, repoRoot, noTests, maxDepth); const passedCount = predicates.filter((p) => p.passed).length; const failedCount = predicates.length - passedCount; @@ -394,8 +410,8 @@ export function checkData(customDbPath: string | undefined, opts: CheckOpts = {} total: predicates.length, passed: passedCount, failed: failedCount, - changedFiles: changedFiles.size, - newFiles: newFiles.size, + changedFiles: diff.changedRanges.size, + newFiles: diff.newFiles.size, }, passed: failedCount === 0, }; diff --git a/src/features/complexity-query.ts b/src/features/complexity-query.ts index e494b6b2..27eb5fc0 100644 --- a/src/features/complexity-query.ts +++ b/src/features/complexity-query.ts @@ -33,6 +33,176 @@ interface ComplexityRow { halstead_bugs: number; } +const isValidThreshold = (v: unknown): v is number => typeof v === 'number' && Number.isFinite(v); + +/** Build WHERE clause and params for complexity query filtering. */ +function buildComplexityWhere(opts: { + noTests: boolean; + target: string | null; + fileFilter: string | null; + kindFilter: string | null; +}): { where: string; params: unknown[] } { + let where = "WHERE n.kind IN ('function','method')"; + const params: unknown[] = []; + + if (opts.noTests) { + where += ` AND n.file NOT LIKE '%.test.%' + AND n.file NOT LIKE '%.spec.%' + AND n.file NOT LIKE '%__test__%' + AND n.file NOT LIKE '%__tests__%' + AND n.file NOT LIKE '%.stories.%'`; + } + if (opts.target) { + where += ' AND n.name LIKE ?'; + params.push(`%${opts.target}%`); + } + { + const fc = buildFileConditionSQL(opts.fileFilter as string, 'n.file'); + where += fc.sql; + params.push(...fc.params); + } + if (opts.kindFilter) { + where += ' AND n.kind = ?'; + params.push(opts.kindFilter); + } + return { where, params }; +} + +/** Build HAVING clause for above-threshold filtering. */ +function buildThresholdHaving(thresholds: any): string { + const conditions: string[] = []; + if (isValidThreshold(thresholds.cognitive?.warn)) { + conditions.push(`fc.cognitive >= ${thresholds.cognitive.warn}`); + } + if (isValidThreshold(thresholds.cyclomatic?.warn)) { + conditions.push(`fc.cyclomatic >= ${thresholds.cyclomatic.warn}`); + } + if (isValidThreshold(thresholds.maxNesting?.warn)) { + conditions.push(`fc.max_nesting >= ${thresholds.maxNesting.warn}`); + } + if (isValidThreshold(thresholds.maintainabilityIndex?.warn)) { + conditions.push( + `fc.maintainability_index > 0 AND fc.maintainability_index <= ${thresholds.maintainabilityIndex.warn}`, + ); + } + return conditions.length > 0 ? `AND (${conditions.join(' OR ')})` : ''; +} + +/** Map a raw DB row to the public complexity result shape. */ +function mapComplexityRow(r: ComplexityRow, thresholds: any): Record { + const exceeds: string[] = []; + if ( + isValidThreshold(thresholds.cognitive?.warn) && + r.cognitive >= (thresholds.cognitive?.warn ?? 0) + ) + exceeds.push('cognitive'); + if ( + isValidThreshold(thresholds.cyclomatic?.warn) && + r.cyclomatic >= (thresholds.cyclomatic?.warn ?? 0) + ) + exceeds.push('cyclomatic'); + if ( + isValidThreshold(thresholds.maxNesting?.warn) && + r.max_nesting >= (thresholds.maxNesting?.warn ?? 0) + ) + exceeds.push('maxNesting'); + if ( + isValidThreshold(thresholds.maintainabilityIndex?.warn) && + r.maintainability_index > 0 && + r.maintainability_index <= (thresholds.maintainabilityIndex?.warn ?? 0) + ) + exceeds.push('maintainabilityIndex'); + + return { + name: r.name, + kind: r.kind, + file: r.file, + line: r.line, + endLine: r.end_line || null, + cognitive: r.cognitive, + cyclomatic: r.cyclomatic, + maxNesting: r.max_nesting, + loc: r.loc || 0, + sloc: r.sloc || 0, + maintainabilityIndex: r.maintainability_index || 0, + halstead: { + volume: r.halstead_volume || 0, + difficulty: r.halstead_difficulty || 0, + effort: r.halstead_effort || 0, + bugs: r.halstead_bugs || 0, + }, + exceeds: exceeds.length > 0 ? exceeds : undefined, + }; +} + +/** Check whether a row exceeds any threshold (for summary counting). */ +function exceedsAnyThreshold( + r: { cognitive: number; cyclomatic: number; max_nesting: number; maintainability_index: number }, + thresholds: any, +): boolean { + return ( + (isValidThreshold(thresholds.cognitive?.warn) && + r.cognitive >= (thresholds.cognitive?.warn ?? 0)) || + (isValidThreshold(thresholds.cyclomatic?.warn) && + r.cyclomatic >= (thresholds.cyclomatic?.warn ?? 0)) || + (isValidThreshold(thresholds.maxNesting?.warn) && + r.max_nesting >= (thresholds.maxNesting?.warn ?? 0)) || + (isValidThreshold(thresholds.maintainabilityIndex?.warn) && + r.maintainability_index > 0 && + r.maintainability_index <= (thresholds.maintainabilityIndex?.warn ?? 0)) + ); +} + +/** Compute summary statistics across all complexity rows. */ +function computeComplexitySummary( + db: ReturnType, + noTests: boolean, + thresholds: any, +): Record | null { + try { + const allRows = db + .prepare<{ + cognitive: number; + cyclomatic: number; + max_nesting: number; + maintainability_index: number; + }>( + `SELECT fc.cognitive, fc.cyclomatic, fc.max_nesting, fc.maintainability_index + FROM function_complexity fc JOIN nodes n ON fc.node_id = n.id + WHERE n.kind IN ('function','method') + ${noTests ? `AND n.file NOT LIKE '%.test.%' AND n.file NOT LIKE '%.spec.%' AND n.file NOT LIKE '%__test__%' AND n.file NOT LIKE '%__tests__%' AND n.file NOT LIKE '%.stories.%'` : ''}`, + ) + .all(); + + if (allRows.length === 0) return null; + + const miValues = allRows.map((r) => r.maintainability_index || 0); + return { + analyzed: allRows.length, + avgCognitive: +(allRows.reduce((s, r) => s + r.cognitive, 0) / allRows.length).toFixed(1), + avgCyclomatic: +(allRows.reduce((s, r) => s + r.cyclomatic, 0) / allRows.length).toFixed(1), + maxCognitive: Math.max(...allRows.map((r) => r.cognitive)), + maxCyclomatic: Math.max(...allRows.map((r) => r.cyclomatic)), + avgMI: +(miValues.reduce((s, v) => s + v, 0) / miValues.length).toFixed(1), + minMI: +Math.min(...miValues).toFixed(1), + aboveWarn: allRows.filter((r) => exceedsAnyThreshold(r, thresholds)).length, + }; + } catch (e: unknown) { + debug(`complexity summary query failed: ${(e as Error).message}`); + return null; + } +} + +/** Check if graph has nodes (used when complexity table is missing). */ +function checkHasGraph(db: ReturnType): boolean { + try { + return (db.prepare<{ c: number }>('SELECT COUNT(*) as c FROM nodes').get()?.c ?? 0) > 0; + } catch (e: unknown) { + debug(`nodes table check failed: ${(e as Error).message}`); + return false; + } +} + export function complexityData( customDbPath?: string, opts: { @@ -52,11 +222,7 @@ export function complexityData( const sort = opts.sort || 'cognitive'; const noTests = opts.noTests || false; const aboveThreshold = opts.aboveThreshold || false; - const target = opts.target || null; - const fileFilter = opts.file || null; - const kindFilter = opts.kind || null; - // Load thresholds from config const config = opts.config || loadConfig(process.cwd()); const thresholds: any = config.manifesto?.rules || { cognitive: { warn: 15, fail: null }, @@ -65,55 +231,14 @@ export function complexityData( maintainabilityIndex: { warn: 20, fail: null }, }; - // Build query - let where = "WHERE n.kind IN ('function','method')"; - const params: unknown[] = []; - - if (noTests) { - where += ` AND n.file NOT LIKE '%.test.%' - AND n.file NOT LIKE '%.spec.%' - AND n.file NOT LIKE '%__test__%' - AND n.file NOT LIKE '%__tests__%' - AND n.file NOT LIKE '%.stories.%'`; - } - if (target) { - where += ' AND n.name LIKE ?'; - params.push(`%${target}%`); - } - { - const fc = buildFileConditionSQL(fileFilter as string, 'n.file'); - where += fc.sql; - params.push(...fc.params); - } - if (kindFilter) { - where += ' AND n.kind = ?'; - params.push(kindFilter); - } - - const isValidThreshold = (v: unknown): v is number => - typeof v === 'number' && Number.isFinite(v); + const { where, params } = buildComplexityWhere({ + noTests, + target: opts.target || null, + fileFilter: opts.file || null, + kindFilter: opts.kind || null, + }); - let having = ''; - if (aboveThreshold) { - const conditions: string[] = []; - if (isValidThreshold(thresholds.cognitive?.warn)) { - conditions.push(`fc.cognitive >= ${thresholds.cognitive.warn}`); - } - if (isValidThreshold(thresholds.cyclomatic?.warn)) { - conditions.push(`fc.cyclomatic >= ${thresholds.cyclomatic.warn}`); - } - if (isValidThreshold(thresholds.maxNesting?.warn)) { - conditions.push(`fc.max_nesting >= ${thresholds.maxNesting.warn}`); - } - if (isValidThreshold(thresholds.maintainabilityIndex?.warn)) { - conditions.push( - `fc.maintainability_index > 0 AND fc.maintainability_index <= ${thresholds.maintainabilityIndex.warn}`, - ); - } - if (conditions.length > 0) { - having = `AND (${conditions.join(' OR ')})`; - } - } + const having = aboveThreshold ? buildThresholdHaving(thresholds) : ''; const orderMap: Record = { cognitive: 'fc.cognitive DESC', @@ -143,121 +268,14 @@ export function complexityData( .all(...params); } catch (e: unknown) { debug(`complexity query failed (table may not exist): ${(e as Error).message}`); - // Check if graph has nodes even though complexity table is missing/empty - let hasGraph = false; - try { - hasGraph = (db.prepare<{ c: number }>('SELECT COUNT(*) as c FROM nodes').get()?.c ?? 0) > 0; - } catch (e2: unknown) { - debug(`nodes table check failed: ${(e2 as Error).message}`); - } - return { functions: [], summary: null, thresholds, hasGraph }; + return { functions: [], summary: null, thresholds, hasGraph: checkHasGraph(db) }; } - // Post-filter test files if needed (belt-and-suspenders for isTestFile) const filtered = noTests ? rows.filter((r) => !isTestFile(r.file)) : rows; + const functions = filtered.map((r) => mapComplexityRow(r, thresholds)); - const functions = filtered.map((r) => { - const exceeds: string[] = []; - if ( - isValidThreshold(thresholds.cognitive?.warn) && - r.cognitive >= (thresholds.cognitive?.warn ?? 0) - ) - exceeds.push('cognitive'); - if ( - isValidThreshold(thresholds.cyclomatic?.warn) && - r.cyclomatic >= (thresholds.cyclomatic?.warn ?? 0) - ) - exceeds.push('cyclomatic'); - if ( - isValidThreshold(thresholds.maxNesting?.warn) && - r.max_nesting >= (thresholds.maxNesting?.warn ?? 0) - ) - exceeds.push('maxNesting'); - if ( - isValidThreshold(thresholds.maintainabilityIndex?.warn) && - r.maintainability_index > 0 && - r.maintainability_index <= (thresholds.maintainabilityIndex?.warn ?? 0) - ) - exceeds.push('maintainabilityIndex'); - - return { - name: r.name, - kind: r.kind, - file: r.file, - line: r.line, - endLine: r.end_line || null, - cognitive: r.cognitive, - cyclomatic: r.cyclomatic, - maxNesting: r.max_nesting, - loc: r.loc || 0, - sloc: r.sloc || 0, - maintainabilityIndex: r.maintainability_index || 0, - halstead: { - volume: r.halstead_volume || 0, - difficulty: r.halstead_difficulty || 0, - effort: r.halstead_effort || 0, - bugs: r.halstead_bugs || 0, - }, - exceeds: exceeds.length > 0 ? exceeds : undefined, - }; - }); - - // Summary stats - let summary: Record | null = null; - try { - const allRows = db - .prepare<{ - cognitive: number; - cyclomatic: number; - max_nesting: number; - maintainability_index: number; - }>( - `SELECT fc.cognitive, fc.cyclomatic, fc.max_nesting, fc.maintainability_index - FROM function_complexity fc JOIN nodes n ON fc.node_id = n.id - WHERE n.kind IN ('function','method') - ${noTests ? `AND n.file NOT LIKE '%.test.%' AND n.file NOT LIKE '%.spec.%' AND n.file NOT LIKE '%__test__%' AND n.file NOT LIKE '%__tests__%' AND n.file NOT LIKE '%.stories.%'` : ''}`, - ) - .all(); - - if (allRows.length > 0) { - const miValues = allRows.map((r) => r.maintainability_index || 0); - summary = { - analyzed: allRows.length, - avgCognitive: +(allRows.reduce((s, r) => s + r.cognitive, 0) / allRows.length).toFixed(1), - avgCyclomatic: +(allRows.reduce((s, r) => s + r.cyclomatic, 0) / allRows.length).toFixed( - 1, - ), - maxCognitive: Math.max(...allRows.map((r) => r.cognitive)), - maxCyclomatic: Math.max(...allRows.map((r) => r.cyclomatic)), - avgMI: +(miValues.reduce((s, v) => s + v, 0) / miValues.length).toFixed(1), - minMI: +Math.min(...miValues).toFixed(1), - aboveWarn: allRows.filter( - (r) => - (isValidThreshold(thresholds.cognitive?.warn) && - r.cognitive >= (thresholds.cognitive?.warn ?? 0)) || - (isValidThreshold(thresholds.cyclomatic?.warn) && - r.cyclomatic >= (thresholds.cyclomatic?.warn ?? 0)) || - (isValidThreshold(thresholds.maxNesting?.warn) && - r.max_nesting >= (thresholds.maxNesting?.warn ?? 0)) || - (isValidThreshold(thresholds.maintainabilityIndex?.warn) && - r.maintainability_index > 0 && - r.maintainability_index <= (thresholds.maintainabilityIndex?.warn ?? 0)), - ).length, - }; - } - } catch (e: unknown) { - debug(`complexity summary query failed: ${(e as Error).message}`); - } - - // When summary is null (no complexity rows), check if graph has nodes - let hasGraph = false; - if (summary === null) { - try { - hasGraph = (db.prepare<{ c: number }>('SELECT COUNT(*) as c FROM nodes').get()?.c ?? 0) > 0; - } catch (e: unknown) { - debug(`nodes table check failed: ${(e as Error).message}`); - } - } + const summary = computeComplexitySummary(db, noTests, thresholds); + const hasGraph = summary === null ? checkHasGraph(db) : false; const base = { functions, summary, thresholds, hasGraph }; return paginateResult(base, 'functions', { limit: opts.limit, offset: opts.offset }); diff --git a/src/features/dataflow.ts b/src/features/dataflow.ts index dfd36ba0..7ca597ba 100644 --- a/src/features/dataflow.ts +++ b/src/features/dataflow.ts @@ -407,6 +407,131 @@ export async function buildDataflowEdges( // findNodes imported from ./shared/find-nodes.js +interface DataflowStmts { + flowsToOut: ReturnType; + flowsToIn: ReturnType; + returnsOut: ReturnType; + returnsIn: ReturnType; + mutatesOut: ReturnType; + mutatesIn: ReturnType; +} + +function prepareDataflowStmts(db: BetterSqlite3Database): DataflowStmts { + return { + flowsToOut: db.prepare( + `SELECT d.*, n.name AS target_name, n.kind AS target_kind, n.file AS target_file, n.line AS target_line + FROM dataflow d JOIN nodes n ON d.target_id = n.id + WHERE d.source_id = ? AND d.kind = 'flows_to'`, + ), + flowsToIn: db.prepare( + `SELECT d.*, n.name AS source_name, n.kind AS source_kind, n.file AS source_file, n.line AS source_line + FROM dataflow d JOIN nodes n ON d.source_id = n.id + WHERE d.target_id = ? AND d.kind = 'flows_to'`, + ), + returnsOut: db.prepare( + `SELECT d.*, n.name AS target_name, n.kind AS target_kind, n.file AS target_file, n.line AS target_line + FROM dataflow d JOIN nodes n ON d.target_id = n.id + WHERE d.source_id = ? AND d.kind = 'returns'`, + ), + returnsIn: db.prepare( + `SELECT d.*, n.name AS source_name, n.kind AS source_kind, n.file AS source_file, n.line AS source_line + FROM dataflow d JOIN nodes n ON d.source_id = n.id + WHERE d.target_id = ? AND d.kind = 'returns'`, + ), + mutatesOut: db.prepare( + `SELECT d.*, n.name AS target_name, n.kind AS target_kind, n.file AS target_file, n.line AS target_line + FROM dataflow d JOIN nodes n ON d.target_id = n.id + WHERE d.source_id = ? AND d.kind = 'mutates'`, + ), + mutatesIn: db.prepare( + `SELECT d.*, n.name AS source_name, n.kind AS source_kind, n.file AS source_file, n.line AS source_line + FROM dataflow d JOIN nodes n ON d.source_id = n.id + WHERE d.target_id = ? AND d.kind = 'mutates'`, + ), + }; +} + +function buildNodeDataflowResult( + node: NodeRow, + stmts: DataflowStmts, + db: BetterSqlite3Database, + hc: Map, + noTests: boolean, +): Record { + const sym = normalizeSymbol(node, db, hc); + + const flowsTo = stmts.flowsToOut.all(node.id).map((r: any) => ({ + target: r.target_name, + kind: r.target_kind, + file: r.target_file, + line: r.line, + paramIndex: r.param_index, + expression: r.expression, + confidence: r.confidence, + })); + + const flowsFrom = stmts.flowsToIn.all(node.id).map((r: any) => ({ + source: r.source_name, + kind: r.source_kind, + file: r.source_file, + line: r.line, + paramIndex: r.param_index, + expression: r.expression, + confidence: r.confidence, + })); + + const returnConsumers = stmts.returnsOut.all(node.id).map((r: any) => ({ + consumer: r.target_name, + kind: r.target_kind, + file: r.target_file, + line: r.line, + expression: r.expression, + })); + + const returnedBy = stmts.returnsIn.all(node.id).map((r: any) => ({ + producer: r.source_name, + kind: r.source_kind, + file: r.source_file, + line: r.line, + expression: r.expression, + })); + + const mutatesTargets = stmts.mutatesOut.all(node.id).map((r: any) => ({ + target: r.target_name, + expression: r.expression, + line: r.line, + })); + + const mutatedBy = stmts.mutatesIn.all(node.id).map((r: any) => ({ + source: r.source_name, + expression: r.expression, + line: r.line, + })); + + if (noTests) { + const filter = (arr: any[]) => arr.filter((r: any) => !isTestFile(r.file)); + return { + ...sym, + flowsTo: filter(flowsTo), + flowsFrom: filter(flowsFrom), + returns: returnConsumers.filter((r) => !isTestFile(r.file)), + returnedBy: returnedBy.filter((r) => !isTestFile(r.file)), + mutates: mutatesTargets, + mutatedBy, + }; + } + + return { + ...sym, + flowsTo, + flowsFrom, + returns: returnConsumers, + returnedBy, + mutates: mutatesTargets, + mutatedBy, + }; +} + export function dataflowData( name: string, customDbPath?: string, @@ -512,107 +637,11 @@ export function dataflowData( } // ── JS fallback ─────────────────────────────────────────────────── - const flowsToOut = db.prepare( - `SELECT d.*, n.name AS target_name, n.kind AS target_kind, n.file AS target_file, n.line AS target_line - FROM dataflow d JOIN nodes n ON d.target_id = n.id - WHERE d.source_id = ? AND d.kind = 'flows_to'`, - ); - const flowsToIn = db.prepare( - `SELECT d.*, n.name AS source_name, n.kind AS source_kind, n.file AS source_file, n.line AS source_line - FROM dataflow d JOIN nodes n ON d.source_id = n.id - WHERE d.target_id = ? AND d.kind = 'flows_to'`, - ); - const returnsOut = db.prepare( - `SELECT d.*, n.name AS target_name, n.kind AS target_kind, n.file AS target_file, n.line AS target_line - FROM dataflow d JOIN nodes n ON d.target_id = n.id - WHERE d.source_id = ? AND d.kind = 'returns'`, - ); - const returnsIn = db.prepare( - `SELECT d.*, n.name AS source_name, n.kind AS source_kind, n.file AS source_file, n.line AS source_line - FROM dataflow d JOIN nodes n ON d.source_id = n.id - WHERE d.target_id = ? AND d.kind = 'returns'`, - ); - const mutatesOut = db.prepare( - `SELECT d.*, n.name AS target_name, n.kind AS target_kind, n.file AS target_file, n.line AS target_line - FROM dataflow d JOIN nodes n ON d.target_id = n.id - WHERE d.source_id = ? AND d.kind = 'mutates'`, - ); - const mutatesIn = db.prepare( - `SELECT d.*, n.name AS source_name, n.kind AS source_kind, n.file AS source_file, n.line AS source_line - FROM dataflow d JOIN nodes n ON d.source_id = n.id - WHERE d.target_id = ? AND d.kind = 'mutates'`, - ); - + const stmts = prepareDataflowStmts(db); const hc = new Map(); - const results = nodes.map((node: NodeRow) => { - const sym = normalizeSymbol(node, db, hc); - - const flowsTo = flowsToOut.all(node.id).map((r: any) => ({ - target: r.target_name, - kind: r.target_kind, - file: r.target_file, - line: r.line, - paramIndex: r.param_index, - expression: r.expression, - confidence: r.confidence, - })); - const flowsFrom = flowsToIn.all(node.id).map((r: any) => ({ - source: r.source_name, - kind: r.source_kind, - file: r.source_file, - line: r.line, - paramIndex: r.param_index, - expression: r.expression, - confidence: r.confidence, - })); - const returnConsumers = returnsOut.all(node.id).map((r: any) => ({ - consumer: r.target_name, - kind: r.target_kind, - file: r.target_file, - line: r.line, - expression: r.expression, - })); - const returnedBy = returnsIn.all(node.id).map((r: any) => ({ - producer: r.source_name, - kind: r.source_kind, - file: r.source_file, - line: r.line, - expression: r.expression, - })); - const mutatesTargets = mutatesOut.all(node.id).map((r: any) => ({ - target: r.target_name, - expression: r.expression, - line: r.line, - })); - const mutatedBy = mutatesIn.all(node.id).map((r: any) => ({ - source: r.source_name, - expression: r.expression, - line: r.line, - })); - - if (noTests) { - const filter = (arr: any[]) => arr.filter((r: any) => !isTestFile(r.file)); - return { - ...sym, - flowsTo: filter(flowsTo), - flowsFrom: filter(flowsFrom), - returns: returnConsumers.filter((r) => !isTestFile(r.file)), - returnedBy: returnedBy.filter((r) => !isTestFile(r.file)), - mutates: mutatesTargets, - mutatedBy, - }; - } - - return { - ...sym, - flowsTo, - flowsFrom, - returns: returnConsumers, - returnedBy, - mutates: mutatesTargets, - mutatedBy, - }; - }); + const results = nodes.map((node: NodeRow) => + buildNodeDataflowResult(node, stmts, db, hc, noTests), + ); const base = { name, results }; return paginateResult(base, 'results', { limit: opts.limit, offset: opts.offset }); @@ -621,6 +650,105 @@ export function dataflowData( } } +interface BfsParentEntry { + parentId: number; + edgeKind: string; + expression: string; +} + +/** BFS through dataflow edges to find a path from source to target. */ +function bfsDataflowPath( + db: BetterSqlite3Database, + sourceId: number, + targetId: number, + maxDepth: number, + noTests: boolean, +): Map | null { + const neighborStmt = db.prepare( + `SELECT n.id, n.name, n.kind, n.file, n.line, d.kind AS edge_kind, d.expression + FROM dataflow d JOIN nodes n ON d.target_id = n.id + WHERE d.source_id = ? AND d.kind IN ('flows_to', 'returns')`, + ); + + const visited = new Set([sourceId]); + const parent = new Map(); + let queue = [sourceId]; + let found = false; + + for (let depth = 1; depth <= maxDepth; depth++) { + const nextQueue: number[] = []; + for (const currentId of queue) { + const neighbors = neighborStmt.all(currentId) as Array<{ + id: number; + file: string; + edge_kind: string; + expression: string; + }>; + for (const n of neighbors) { + if (noTests && isTestFile(n.file)) continue; + if (n.id === targetId) { + if (!found) { + found = true; + parent.set(n.id, { + parentId: currentId, + edgeKind: n.edge_kind, + expression: n.expression, + }); + } + continue; + } + if (!visited.has(n.id)) { + visited.add(n.id); + parent.set(n.id, { + parentId: currentId, + edgeKind: n.edge_kind, + expression: n.expression, + }); + nextQueue.push(n.id); + } + } + } + if (found) break; + queue = nextQueue; + if (queue.length === 0) break; + } + + return found ? parent : null; +} + +/** Reconstruct a path from BFS parent map. */ +function reconstructDataflowPath( + db: BetterSqlite3Database, + parent: Map, + sourceId: number, + targetId: number, +): Array> { + const nodeById = db.prepare('SELECT * FROM nodes WHERE id = ?'); + const hc = new Map(); + const pathItems: Array> = []; + let cur: number | undefined = targetId; + while (cur !== undefined) { + const nodeRow = nodeById.get(cur) as NodeRow; + const parentInfo = parent.get(cur); + pathItems.unshift({ + ...normalizeSymbol(nodeRow, db, hc), + edgeKind: parentInfo?.edgeKind ?? null, + expression: parentInfo?.expression ?? null, + }); + cur = parentInfo?.parentId; + if (cur === sourceId) { + const srcRow = nodeById.get(cur) as NodeRow; + pathItems.unshift({ + ...normalizeSymbol(srcRow, db, hc), + edgeKind: null, + expression: null, + }); + break; + } + } + return pathItems; +} + export function dataflowPathData( from: string, to: string, @@ -676,103 +804,54 @@ export function dataflowPathData( if (sourceNode.id === targetNode.id) { const hc = new Map(); const sym = normalizeSymbol(sourceNode, db, hc); - return { - from, - to, - found: true, - hops: 0, - path: [{ ...sym, edgeKind: null }], - }; + return { from, to, found: true, hops: 0, path: [{ ...sym, edgeKind: null }] }; } - // BFS through flows_to and returns edges - const neighborStmt = db.prepare( - `SELECT n.id, n.name, n.kind, n.file, n.line, d.kind AS edge_kind, d.expression - FROM dataflow d JOIN nodes n ON d.target_id = n.id - WHERE d.source_id = ? AND d.kind IN ('flows_to', 'returns')`, - ); - - const visited = new Set([sourceNode.id]); - const parent = new Map(); - let queue = [sourceNode.id]; - let found = false; - - for (let depth = 1; depth <= maxDepth; depth++) { - const nextQueue: number[] = []; - for (const currentId of queue) { - const neighbors = neighborStmt.all(currentId) as Array<{ - id: number; - name: string; - kind: string; - file: string; - line: number; - edge_kind: string; - expression: string; - }>; - for (const n of neighbors) { - if (noTests && isTestFile(n.file)) continue; - if (n.id === targetNode.id) { - if (!found) { - found = true; - parent.set(n.id, { - parentId: currentId, - edgeKind: n.edge_kind, - expression: n.expression, - }); - } - continue; - } - if (!visited.has(n.id)) { - visited.add(n.id); - parent.set(n.id, { - parentId: currentId, - edgeKind: n.edge_kind, - expression: n.expression, - }); - nextQueue.push(n.id); - } - } - } - if (found) break; - queue = nextQueue; - if (queue.length === 0) break; - } - - if (!found) { + const parent = bfsDataflowPath(db, sourceNode.id, targetNode.id, maxDepth, noTests); + if (!parent) { return { from, to, found: false }; } - // Reconstruct path - const nodeById = db.prepare('SELECT * FROM nodes WHERE id = ?'); - const hc = new Map(); - const pathItems: Array> = []; - let cur: number | undefined = targetNode.id; - while (cur !== undefined) { - const nodeRow = nodeById.get(cur) as NodeRow; - const parentInfo = parent.get(cur); - pathItems.unshift({ - ...normalizeSymbol(nodeRow, db, hc), - edgeKind: parentInfo?.edgeKind ?? null, - expression: parentInfo?.expression ?? null, - }); - cur = parentInfo?.parentId; - if (cur === sourceNode.id) { - const srcRow = nodeById.get(cur) as NodeRow; - pathItems.unshift({ - ...normalizeSymbol(srcRow, db, hc), - edgeKind: null, - expression: null, - }); - break; - } - } - + const pathItems = reconstructDataflowPath(db, parent, sourceNode.id, targetNode.id); return { from, to, found: true, hops: pathItems.length - 1, path: pathItems }; } finally { db.close(); } } +/** BFS forward through return-value consumers to build impact levels. */ +function bfsReturnConsumers( + node: NodeRow, + consumersStmt: ReturnType, + db: BetterSqlite3Database, + hc: Map, + maxDepth: number, + noTests: boolean, +): { levels: Record; totalAffected: number } { + const visited = new Set([node.id]); + const levels: Record = {}; + let frontier = [node.id]; + + for (let d = 1; d <= maxDepth; d++) { + const nextFrontier: number[] = []; + for (const fid of frontier) { + const consumers = consumersStmt.all(fid) as NodeRow[]; + for (const c of consumers) { + if (!visited.has(c.id) && (!noTests || !isTestFile(c.file))) { + visited.add(c.id); + nextFrontier.push(c.id); + if (!levels[d]) levels[d] = []; + levels[d]!.push(normalizeSymbol(c, db, hc)); + } + } + } + frontier = nextFrontier; + if (frontier.length === 0) break; + } + + return { levels, totalAffected: visited.size - 1 }; +} + export function dataflowImpactData( name: string, customDbPath?: string, @@ -809,7 +888,6 @@ export function dataflowImpactData( return { name, results: [] }; } - // Forward BFS: who consumes this function's return value (directly or transitively)? const consumersStmt = db.prepare( `SELECT DISTINCT n.* FROM dataflow d JOIN nodes n ON d.target_id = n.id @@ -819,32 +897,15 @@ export function dataflowImpactData( const hc = new Map(); const results = nodes.map((node: NodeRow) => { const sym = normalizeSymbol(node, db, hc); - const visited = new Set([node.id]); - const levels: Record = {}; - let frontier = [node.id]; - - for (let d = 1; d <= maxDepth; d++) { - const nextFrontier: number[] = []; - for (const fid of frontier) { - const consumers = consumersStmt.all(fid) as NodeRow[]; - for (const c of consumers) { - if (!visited.has(c.id) && (!noTests || !isTestFile(c.file))) { - visited.add(c.id); - nextFrontier.push(c.id); - if (!levels[d]) levels[d] = []; - levels[d]!.push(normalizeSymbol(c, db, hc)); - } - } - } - frontier = nextFrontier; - if (frontier.length === 0) break; - } - - return { - ...sym, - levels, - totalAffected: visited.size - 1, - }; + const { levels, totalAffected } = bfsReturnConsumers( + node, + consumersStmt, + db, + hc, + maxDepth, + noTests, + ); + return { ...sym, levels, totalAffected }; }); const base = { name, results }; diff --git a/src/features/graph-enrichment.ts b/src/features/graph-enrichment.ts index d1af5ad0..564cc500 100644 --- a/src/features/graph-enrichment.ts +++ b/src/features/graph-enrichment.ts @@ -83,50 +83,18 @@ interface FunctionEdgeRow { edge_kind: string; } -function prepareFunctionLevelData( - db: BetterSqlite3Database, - noTests: boolean, - minConf: number, - cfg: PlotConfig, -): GraphData { - let edges = db - .prepare( - ` - SELECT n1.id AS source_id, n1.name AS source_name, n1.kind AS source_kind, - n1.file AS source_file, n1.line AS source_line, n1.role AS source_role, - n2.id AS target_id, n2.name AS target_name, n2.kind AS target_kind, - n2.file AS target_file, n2.line AS target_line, n2.role AS target_role, - e.kind AS edge_kind - FROM edges e - JOIN nodes n1 ON e.source_id = n1.id - JOIN nodes n2 ON e.target_id = n2.id - WHERE n1.kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module', 'constant') - AND n2.kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module', 'constant') - AND e.kind = 'calls' - AND e.confidence >= ? - `, - ) - .all(minConf); - if (noTests) - edges = edges.filter((e) => !isTestFile(e.source_file) && !isTestFile(e.target_file)); - - if (cfg.filter?.kinds) { - const kinds = new Set(cfg.filter.kinds); - edges = edges.filter((e) => kinds.has(e.source_kind) && kinds.has(e.target_kind)); - } - if (cfg.filter?.files) { - const patterns = cfg.filter.files; - edges = edges.filter( - (e) => - patterns.some((p) => e.source_file.includes(p)) && - patterns.some((p) => e.target_file.includes(p)), - ); - } +type NodeInfo = { + id: number; + name: string; + kind: string; + file: string; + line: number; + role: string | null; +}; - const nodeMap = new Map< - number, - { id: number; name: string; kind: string; file: string; line: number; role: string | null } - >(); +/** Build node map from edge rows, collecting unique source/target nodes. */ +function buildNodeMapFromEdges(edges: FunctionEdgeRow[]): Map { + const nodeMap = new Map(); for (const e of edges) { if (!nodeMap.has(e.source_id)) { nodeMap.set(e.source_id, { @@ -149,17 +117,13 @@ function prepareFunctionLevelData( }); } } + return nodeMap; +} - if (cfg.filter?.roles) { - const roles = new Set(cfg.filter.roles); - for (const [id, n] of nodeMap) { - if (n.role === null || !roles.has(n.role)) nodeMap.delete(id); - } - const nodeIds = new Set(nodeMap.keys()); - edges = edges.filter((e) => nodeIds.has(e.source_id) && nodeIds.has(e.target_id)); - } - - // Complexity data +/** Load complexity data from function_complexity table. */ +function loadComplexityMap( + db: BetterSqlite3Database, +): Map { const complexityMap = new Map< number, { cognitive: number; cyclomatic: number; maintainabilityIndex: number } @@ -186,19 +150,17 @@ function prepareFunctionLevelData( } catch { // table may not exist in old DBs } + return complexityMap; +} - // Fan-in / fan-out via graph subsystem - const fnGraph = new CodeGraph(); - for (const [id] of nodeMap) fnGraph.addNode(String(id)); - for (const e of edges) { - const src = String(e.source_id); - const tgt = String(e.target_id); - if (src !== tgt && !fnGraph.hasEdge(src, tgt)) fnGraph.addEdge(src, tgt); - } - - // Use DB-level fan-in/fan-out (counts ALL call edges, not just visible) +/** Load fan-in and fan-out maps from edges table. */ +function loadFanMaps(db: BetterSqlite3Database): { + fanInMap: Map; + fanOutMap: Map; +} { const fanInMap = new Map(); const fanOutMap = new Map(); + const fanInRows = db .prepare<{ node_id: number; fan_in: number }>( "SELECT target_id AS node_id, COUNT(*) AS fan_in FROM edges WHERE kind = 'calls' GROUP BY target_id", @@ -213,6 +175,138 @@ function prepareFunctionLevelData( .all(); for (const r of fanOutRows) fanOutMap.set(r.node_id, r.fan_out); + return { fanInMap, fanOutMap }; +} + +/** Build an enriched VisNode from raw node info and computed maps. */ +function buildEnrichedVisNode( + n: NodeInfo, + complexityMap: Map< + number, + { cognitive: number; cyclomatic: number; maintainabilityIndex: number } + >, + fanInMap: Map, + fanOutMap: Map, + communityMap: Map, + cfg: PlotConfig, +): VisNode { + const cx = complexityMap.get(n.id) || null; + const fanIn = fanInMap.get(n.id) || 0; + const fanOut = fanOutMap.get(n.id) || 0; + const community = communityMap.get(n.id) ?? null; + const directory = path.dirname(n.file); + const risk: string[] = []; + if (n.role?.startsWith('dead')) risk.push('dead-code'); + if (fanIn >= (cfg.riskThresholds?.highBlastRadius ?? 10)) risk.push('high-blast-radius'); + if (cx && cx.maintainabilityIndex < (cfg.riskThresholds?.lowMI ?? 40)) risk.push('low-mi'); + + const color: string = + cfg.colorBy === 'role' && n.role + ? cfg.roleColors?.[n.role] || + (DEFAULT_ROLE_COLORS as Record)[n.role] || + '#ccc' + : cfg.colorBy === 'community' && community !== null + ? COMMUNITY_COLORS[community % COMMUNITY_COLORS.length] || '#ccc' + : cfg.nodeColors?.[n.kind] || + (DEFAULT_NODE_COLORS as Record)[n.kind] || + '#ccc'; + + return { + id: n.id, + label: n.name, + title: `${n.file}:${n.line} (${n.kind}${n.role ? `, ${n.role}` : ''})`, + color, + kind: n.kind, + role: n.role || '', + file: n.file, + line: n.line, + community, + cognitive: cx?.cognitive ?? null, + cyclomatic: cx?.cyclomatic ?? null, + maintainabilityIndex: cx?.maintainabilityIndex ?? null, + fanIn, + fanOut, + directory, + risk, + }; +} + +/** Select seed node IDs based on configured strategy. */ +function selectSeedNodes(visNodes: VisNode[], cfg: PlotConfig): (number | string)[] { + if (cfg.seedStrategy === 'top-fanin') { + const sorted = [...visNodes].sort((a, b) => b.fanIn - a.fanIn); + return sorted.slice(0, cfg.seedCount || 30).map((n) => n.id); + } + if (cfg.seedStrategy === 'entry') { + return visNodes.filter((n) => n.role === 'entry').map((n) => n.id); + } + return visNodes.map((n) => n.id); +} + +function prepareFunctionLevelData( + db: BetterSqlite3Database, + noTests: boolean, + minConf: number, + cfg: PlotConfig, +): GraphData { + let edges = db + .prepare( + ` + SELECT n1.id AS source_id, n1.name AS source_name, n1.kind AS source_kind, + n1.file AS source_file, n1.line AS source_line, n1.role AS source_role, + n2.id AS target_id, n2.name AS target_name, n2.kind AS target_kind, + n2.file AS target_file, n2.line AS target_line, n2.role AS target_role, + e.kind AS edge_kind + FROM edges e + JOIN nodes n1 ON e.source_id = n1.id + JOIN nodes n2 ON e.target_id = n2.id + WHERE n1.kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module', 'constant') + AND n2.kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module', 'constant') + AND e.kind = 'calls' + AND e.confidence >= ? + `, + ) + .all(minConf); + if (noTests) + edges = edges.filter((e) => !isTestFile(e.source_file) && !isTestFile(e.target_file)); + + if (cfg.filter?.kinds) { + const kinds = new Set(cfg.filter.kinds); + edges = edges.filter((e) => kinds.has(e.source_kind) && kinds.has(e.target_kind)); + } + if (cfg.filter?.files) { + const patterns = cfg.filter.files; + edges = edges.filter( + (e) => + patterns.some((p) => e.source_file.includes(p)) && + patterns.some((p) => e.target_file.includes(p)), + ); + } + + const nodeMap = buildNodeMapFromEdges(edges); + + if (cfg.filter?.roles) { + const roles = new Set(cfg.filter.roles); + for (const [id, n] of nodeMap) { + if (n.role === null || !roles.has(n.role)) nodeMap.delete(id); + } + const nodeIds = new Set(nodeMap.keys()); + edges = edges.filter((e) => nodeIds.has(e.source_id) && nodeIds.has(e.target_id)); + } + + const complexityMap = loadComplexityMap(db); + + // Build CodeGraph for Louvain community detection + const fnGraph = new CodeGraph(); + for (const [id] of nodeMap) fnGraph.addNode(String(id)); + for (const e of edges) { + const src = String(e.source_id); + const tgt = String(e.target_id); + if (src !== tgt && !fnGraph.hasEdge(src, tgt)) fnGraph.addEdge(src, tgt); + } + + const { fanInMap, fanOutMap } = loadFanMaps(db); + // Communities (Louvain) via graph subsystem const communityMap = new Map(); if (nodeMap.size > 0) { @@ -224,48 +318,9 @@ function prepareFunctionLevelData( } } - // Build enriched nodes - const visNodes: VisNode[] = [...nodeMap.values()].map((n) => { - const cx = complexityMap.get(n.id) || null; - const fanIn = fanInMap.get(n.id) || 0; - const fanOut = fanOutMap.get(n.id) || 0; - const community = communityMap.get(n.id) ?? null; - const directory = path.dirname(n.file); - const risk: string[] = []; - if (n.role?.startsWith('dead')) risk.push('dead-code'); - if (fanIn >= (cfg.riskThresholds?.highBlastRadius ?? 10)) risk.push('high-blast-radius'); - if (cx && cx.maintainabilityIndex < (cfg.riskThresholds?.lowMI ?? 40)) risk.push('low-mi'); - - const color: string = - cfg.colorBy === 'role' && n.role - ? cfg.roleColors?.[n.role] || - (DEFAULT_ROLE_COLORS as Record)[n.role] || - '#ccc' - : cfg.colorBy === 'community' && community !== null - ? COMMUNITY_COLORS[community % COMMUNITY_COLORS.length] || '#ccc' - : cfg.nodeColors?.[n.kind] || - (DEFAULT_NODE_COLORS as Record)[n.kind] || - '#ccc'; - - return { - id: n.id, - label: n.name, - title: `${n.file}:${n.line} (${n.kind}${n.role ? `, ${n.role}` : ''})`, - color, - kind: n.kind, - role: n.role || '', - file: n.file, - line: n.line, - community, - cognitive: cx?.cognitive ?? null, - cyclomatic: cx?.cyclomatic ?? null, - maintainabilityIndex: cx?.maintainabilityIndex ?? null, - fanIn, - fanOut, - directory, - risk, - }; - }); + const visNodes: VisNode[] = [...nodeMap.values()].map((n) => + buildEnrichedVisNode(n, complexityMap, fanInMap, fanOutMap, communityMap, cfg), + ); const visEdges: VisEdge[] = edges.map((e, i) => ({ id: `e${i}`, @@ -273,18 +328,7 @@ function prepareFunctionLevelData( to: e.target_id, })); - // Seed strategy - let seedNodeIds: (number | string)[]; - if (cfg.seedStrategy === 'top-fanin') { - const sorted = [...visNodes].sort((a, b) => b.fanIn - a.fanIn); - seedNodeIds = sorted.slice(0, cfg.seedCount || 30).map((n) => n.id); - } else if (cfg.seedStrategy === 'entry') { - seedNodeIds = visNodes.filter((n) => n.role === 'entry').map((n) => n.id); - } else { - seedNodeIds = visNodes.map((n) => n.id); - } - - return { nodes: visNodes, edges: visEdges, seedNodeIds }; + return { nodes: visNodes, edges: visEdges, seedNodeIds: selectSeedNodes(visNodes, cfg) }; } interface FileLevelEdge { diff --git a/src/features/structure.ts b/src/features/structure.ts index 58b57666..471fa6be 100644 --- a/src/features/structure.ts +++ b/src/features/structure.ts @@ -199,14 +199,11 @@ function computeFileMetrics( })(); } -function computeDirectoryMetrics( - db: BetterSqlite3Database, - upsertMetric: SqliteStatement, - getNodeIdStmt: NodeIdStmt, - fileSymbols: Map, +/** Map each directory to the files it transitively contains. */ +function buildDirFilesMap( allDirs: Set, - importEdges: ImportEdge[], -): void { + fileSymbols: Map, +): Map { const dirFiles = new Map(); for (const dir of allDirs) { dirFiles.set(dir, []); @@ -220,7 +217,11 @@ function computeDirectoryMetrics( d = normalizePath(path.dirname(d)); } } + return dirFiles; +} +/** Build reverse map: file -> set of ancestor directories. */ +function buildFileToAncestorDirs(dirFiles: Map): Map> { const fileToAncestorDirs = new Map>(); for (const [dir, files] of dirFiles) { for (const f of files) { @@ -228,7 +229,15 @@ function computeDirectoryMetrics( fileToAncestorDirs.get(f)?.add(dir); } } + return fileToAncestorDirs; +} +/** Count intra-directory, fan-in, and fan-out edges per directory. */ +function countDirectoryEdges( + allDirs: Set, + importEdges: ImportEdge[], + fileToAncestorDirs: Map>, +): Map { const dirEdgeCounts = new Map(); for (const dir of allDirs) { dirEdgeCounts.set(dir, { intra: 0, fanIn: 0, fanOut: 0 }); @@ -258,6 +267,39 @@ function computeDirectoryMetrics( } } } + return dirEdgeCounts; +} + +/** Count unique symbols in a list of files. */ +function countSymbolsInFiles(files: string[], fileSymbols: Map): number { + let symbolCount = 0; + for (const f of files) { + const sym = fileSymbols.get(f); + if (sym) { + const seen = new Set(); + for (const d of sym.definitions) { + const key = `${d.name}|${d.kind}|${d.line}`; + if (!seen.has(key)) { + seen.add(key); + symbolCount++; + } + } + } + } + return symbolCount; +} + +function computeDirectoryMetrics( + db: BetterSqlite3Database, + upsertMetric: SqliteStatement, + getNodeIdStmt: NodeIdStmt, + fileSymbols: Map, + allDirs: Set, + importEdges: ImportEdge[], +): void { + const dirFiles = buildDirFilesMap(allDirs, fileSymbols); + const fileToAncestorDirs = buildFileToAncestorDirs(dirFiles); + const dirEdgeCounts = countDirectoryEdges(allDirs, importEdges, fileToAncestorDirs); db.transaction(() => { for (const [dir, files] of dirFiles) { @@ -265,21 +307,7 @@ function computeDirectoryMetrics( if (!dirRow) continue; const fileCount = files.length; - let symbolCount = 0; - - for (const f of files) { - const sym = fileSymbols.get(f); - if (sym) { - const seen = new Set(); - for (const d of sym.definitions) { - const key = `${d.name}|${d.kind}|${d.line}`; - if (!seen.has(key)) { - seen.add(key); - symbolCount++; - } - } - } - } + const symbolCount = countSymbolsInFiles(files, fileSymbols); const counts = dirEdgeCounts.get(dir) || { intra: 0, fanIn: 0, fanOut: 0 }; const totalEdges = counts.intra + counts.fanIn + counts.fanOut; diff --git a/src/graph/algorithms/leiden/adapter.ts b/src/graph/algorithms/leiden/adapter.ts index 1661cab2..4425cbb3 100644 --- a/src/graph/algorithms/leiden/adapter.ts +++ b/src/graph/algorithms/leiden/adapter.ts @@ -50,6 +50,111 @@ function taAdd(a: Float64Array, i: number, v: number): void { a[i] = taGet(a, i) + v; } +/** + * Populate edge arrays for a directed graph. Each edge is stored once in + * outEdges[from] and inEdges[to]. Self-loops are tracked in both the selfLoop + * array and the adjacency lists (partition.ts accounts for this). + */ +function populateDirectedEdges( + graph: CodeGraph, + idToIndex: Map, + linkWeight: (attrs: EdgeAttrs) => number, + selfLoop: Float64Array, + outEdges: EdgeEntry[][], + inEdges: InEdgeEntry[][], + strengthOut: Float64Array, + strengthIn: Float64Array, +): void { + for (const [src, tgt, attrs] of graph.edges()) { + const from = idToIndex.get(src); + const to = idToIndex.get(tgt); + if (from == null || to == null) continue; + const w: number = +linkWeight(attrs) || 0; + if (from === to) { + taAdd(selfLoop, from, w); + // Self-loop is intentionally kept in outEdges/inEdges as well. + // partition.ts's moveNodeToCommunity (directed path) accounts for this + // by subtracting selfLoopWeight once from outToOld+inFromOld to avoid + // triple-counting (see partition.ts moveNodeToCommunity directed block). + } + (outEdges[from] as EdgeEntry[]).push({ to, w }); + (inEdges[to] as InEdgeEntry[]).push({ from, w }); + taAdd(strengthOut, from, w); + taAdd(strengthIn, to, w); + } +} + +/** + * Populate edge arrays for an undirected graph. Reciprocal pairs are + * symmetrized and averaged to produce a single weight per undirected edge. + * Self-loops use single-w convention (matching modularity.ts formulas). + */ +function populateUndirectedEdges( + graph: CodeGraph, + idToIndex: Map, + linkWeight: (attrs: EdgeAttrs) => number, + n: number, + selfLoop: Float64Array, + outEdges: EdgeEntry[][], + inEdges: InEdgeEntry[][], + strengthOut: Float64Array, + strengthIn: Float64Array, +): void { + const pairAgg = new Map(); + + for (const [src, tgt, attrs] of graph.edges()) { + const a = idToIndex.get(src); + const b = idToIndex.get(tgt); + if (a == null || b == null) continue; + const w: number = +linkWeight(attrs) || 0; + if (a === b) { + taAdd(selfLoop, a, w); + continue; + } + const i = a < b ? a : b; + const j = a < b ? b : a; + const key = `${i}:${j}`; + let rec = pairAgg.get(key); + if (!rec) { + rec = { sum: 0, seenAB: 0, seenBA: 0 }; + pairAgg.set(key, rec); + } + rec.sum += w; + if (a === i) rec.seenAB = 1; + else rec.seenBA = 1; + } + + for (const [key, rec] of pairAgg.entries()) { + const parts = key.split(':'); + const i = +(parts[0] as string); + const j = +(parts[1] as string); + const dirCount: number = (rec.seenAB ? 1 : 0) + (rec.seenBA ? 1 : 0); + const w: number = dirCount > 0 ? rec.sum / dirCount : 0; + if (w === 0) continue; + (outEdges[i] as EdgeEntry[]).push({ to: j, w }); + (outEdges[j] as EdgeEntry[]).push({ to: i, w }); + (inEdges[i] as InEdgeEntry[]).push({ from: j, w }); + (inEdges[j] as InEdgeEntry[]).push({ from: i, w }); + taAdd(strengthOut, i, w); + taAdd(strengthOut, j, w); + taAdd(strengthIn, i, w); + taAdd(strengthIn, j, w); + } + + // Add self-loops into adjacency and strengths. + // Note: uses single-w convention (not standard 2w) — the modularity formulas in + // modularity.ts are written to match this convention, keeping the system self-consistent. + for (let v = 0; v < n; v++) { + const w: number = taGet(selfLoop, v); + if (w !== 0) { + (outEdges[v] as EdgeEntry[]).push({ to: v, w }); + (inEdges[v] as InEdgeEntry[]).push({ from: v, w }); + taAdd(strengthOut, v, w); + taAdd(strengthIn, v, w); + } + } +} + export function makeGraphAdapter(graph: CodeGraph, opts: GraphAdapterOptions = {}): GraphAdapter { const linkWeight: (attrs: EdgeAttrs) => number = opts.linkWeight || ((attrs) => (attrs && typeof attrs.weight === 'number' ? attrs.weight : 1)); @@ -92,78 +197,28 @@ export function makeGraphAdapter(graph: CodeGraph, opts: GraphAdapterOptions = { // Populate from graph if (directed) { - for (const [src, tgt, attrs] of graph.edges()) { - const from = idToIndex.get(src); - const to = idToIndex.get(tgt); - if (from == null || to == null) continue; - const w: number = +linkWeight(attrs) || 0; - if (from === to) { - taAdd(selfLoop, from, w); - // Self-loop is intentionally kept in outEdges/inEdges as well. - // partition.ts's moveNodeToCommunity (directed path) accounts for this - // by subtracting selfLoopWeight once from outToOld+inFromOld to avoid - // triple-counting (see partition.ts moveNodeToCommunity directed block). - } - (outEdges[from] as EdgeEntry[]).push({ to, w }); - (inEdges[to] as InEdgeEntry[]).push({ from, w }); - taAdd(strengthOut, from, w); - taAdd(strengthIn, to, w); - } + populateDirectedEdges( + graph, + idToIndex, + linkWeight, + selfLoop, + outEdges, + inEdges, + strengthOut, + strengthIn, + ); } else { - // Undirected: symmetrize and average reciprocal pairs - const pairAgg = new Map(); - - for (const [src, tgt, attrs] of graph.edges()) { - const a = idToIndex.get(src); - const b = idToIndex.get(tgt); - if (a == null || b == null) continue; - const w: number = +linkWeight(attrs) || 0; - if (a === b) { - taAdd(selfLoop, a, w); - continue; - } - const i = a < b ? a : b; - const j = a < b ? b : a; - const key = `${i}:${j}`; - let rec = pairAgg.get(key); - if (!rec) { - rec = { sum: 0, seenAB: 0, seenBA: 0 }; - pairAgg.set(key, rec); - } - rec.sum += w; - if (a === i) rec.seenAB = 1; - else rec.seenBA = 1; - } - - for (const [key, rec] of pairAgg.entries()) { - const parts = key.split(':'); - const i = +(parts[0] as string); - const j = +(parts[1] as string); - const dirCount: number = (rec.seenAB ? 1 : 0) + (rec.seenBA ? 1 : 0); - const w: number = dirCount > 0 ? rec.sum / dirCount : 0; - if (w === 0) continue; - (outEdges[i] as EdgeEntry[]).push({ to: j, w }); - (outEdges[j] as EdgeEntry[]).push({ to: i, w }); - (inEdges[i] as InEdgeEntry[]).push({ from: j, w }); - (inEdges[j] as InEdgeEntry[]).push({ from: i, w }); - taAdd(strengthOut, i, w); - taAdd(strengthOut, j, w); - taAdd(strengthIn, i, w); - taAdd(strengthIn, j, w); - } - - // Add self-loops into adjacency and strengths. - // Note: uses single-w convention (not standard 2w) — the modularity formulas in - // modularity.ts are written to match this convention, keeping the system self-consistent. - for (let v = 0; v < n; v++) { - const w: number = taGet(selfLoop, v); - if (w !== 0) { - (outEdges[v] as EdgeEntry[]).push({ to: v, w }); - (inEdges[v] as InEdgeEntry[]).push({ from: v, w }); - taAdd(strengthOut, v, w); - taAdd(strengthIn, v, w); - } - } + populateUndirectedEdges( + graph, + idToIndex, + linkWeight, + n, + selfLoop, + outEdges, + inEdges, + strengthOut, + strengthIn, + ); } // Node sizes diff --git a/src/graph/algorithms/leiden/index.ts b/src/graph/algorithms/leiden/index.ts index fb627951..a69df7ca 100644 --- a/src/graph/algorithms/leiden/index.ts +++ b/src/graph/algorithms/leiden/index.ts @@ -119,34 +119,17 @@ interface OriginalPartition { getInEdgeWeightFromCommunity(c: number): number; } -function buildOriginalPartition(g: GraphAdapter, communityMap: Int32Array): OriginalPartition { - const n: number = g.n; - let maxC: number = 0; - for (let i = 0; i < n; i++) { - const ci = iget(communityMap, i); - if (ci > maxC) maxC = ci; - } - const cc: number = maxC + 1; - - const nodeCommunity = communityMap; - const internalWeight = new Float64Array(cc); - const totalStr = new Float64Array(cc); - const totalOutStr = new Float64Array(cc); - const totalInStr = new Float64Array(cc); - const totalSize = new Float64Array(cc); - - for (let i = 0; i < n; i++) { - const c: number = iget(communityMap, i); - totalSize[c] = fget(totalSize, c) + fget(g.size, i); - if (g.directed) { - totalOutStr[c] = fget(totalOutStr, c) + fget(g.strengthOut, i); - totalInStr[c] = fget(totalInStr, c) + fget(g.strengthIn, i); - } else { - totalStr[c] = fget(totalStr, c) + fget(g.strengthOut, i); - } - if (fget(g.selfLoop, i)) internalWeight[c] = fget(internalWeight, c) + fget(g.selfLoop, i); - } - +/** + * Accumulate intra-community edge weights for quality evaluation. + * For directed graphs, counts all intra-community non-self edges. + * For undirected, counts each edge once (j > i) to avoid double-counting. + */ +function accumulateInternalEdgeWeights( + g: GraphAdapter, + communityMap: Int32Array, + n: number, + internalWeight: Float64Array, +): void { if (g.directed) { for (let i = 0; i < n; i++) { const ci: number = iget(communityMap, i); @@ -168,6 +151,62 @@ function buildOriginalPartition(g: GraphAdapter, communityMap: Int32Array): Orig } } } +} + +/** + * Accumulate per-community node-level aggregates (size, strength) from + * the graph adapter and community mapping. + */ +function accumulateNodeAggregates( + g: GraphAdapter, + communityMap: Int32Array, + n: number, + totalSize: Float64Array, + totalStr: Float64Array, + totalOutStr: Float64Array, + totalInStr: Float64Array, + internalWeight: Float64Array, +): void { + for (let i = 0; i < n; i++) { + const c: number = iget(communityMap, i); + totalSize[c] = fget(totalSize, c) + fget(g.size, i); + if (g.directed) { + totalOutStr[c] = fget(totalOutStr, c) + fget(g.strengthOut, i); + totalInStr[c] = fget(totalInStr, c) + fget(g.strengthIn, i); + } else { + totalStr[c] = fget(totalStr, c) + fget(g.strengthOut, i); + } + if (fget(g.selfLoop, i)) internalWeight[c] = fget(internalWeight, c) + fget(g.selfLoop, i); + } +} + +function buildOriginalPartition(g: GraphAdapter, communityMap: Int32Array): OriginalPartition { + const n: number = g.n; + let maxC: number = 0; + for (let i = 0; i < n; i++) { + const ci = iget(communityMap, i); + if (ci > maxC) maxC = ci; + } + const cc: number = maxC + 1; + + const nodeCommunity = communityMap; + const internalWeight = new Float64Array(cc); + const totalStr = new Float64Array(cc); + const totalOutStr = new Float64Array(cc); + const totalInStr = new Float64Array(cc); + const totalSize = new Float64Array(cc); + + accumulateNodeAggregates( + g, + communityMap, + n, + totalSize, + totalStr, + totalOutStr, + totalInStr, + internalWeight, + ); + accumulateInternalEdgeWeights(g, communityMap, n, internalWeight); return { communityCount: cc, diff --git a/src/graph/algorithms/leiden/optimiser.ts b/src/graph/algorithms/leiden/optimiser.ts index d658b895..0a0ba363 100644 --- a/src/graph/algorithms/leiden/optimiser.ts +++ b/src/graph/algorithms/leiden/optimiser.ts @@ -129,83 +129,15 @@ export function runLouvainUndirectedModularity( const nodeIndex: number = order[idx]!; if (level === 0 && fixedNodeMask && fixedNodeMask[nodeIndex]) continue; const candidateCount: number = partition.accumulateNeighborCommunityEdgeWeights(nodeIndex); - let bestCommunityId: number = partition.nodeCommunity[nodeIndex]!; - let bestGain: number = 0; - const maxCommunitySize: number = options.maxCommunitySize; - if (strategyCode === CandidateStrategy.All) { - for (let communityId = 0; communityId < partition.communityCount; communityId++) { - if (communityId === partition.nodeCommunity[nodeIndex]!) continue; - if ( - maxCommunitySize < Infinity && - partition.getCommunityTotalSize(communityId) + graphAdapter.size[nodeIndex]! > - maxCommunitySize - ) - continue; - const gain: number = computeQualityGain(partition, nodeIndex, communityId, options); - if (gain > bestGain) { - bestGain = gain; - bestCommunityId = communityId; - } - } - } else if (strategyCode === CandidateStrategy.RandomAny) { - const tries: number = Math.min(10, Math.max(1, partition.communityCount)); - for (let trialIndex = 0; trialIndex < tries; trialIndex++) { - const communityId: number = (random() * partition.communityCount) | 0; - if (communityId === partition.nodeCommunity[nodeIndex]!) continue; - if ( - maxCommunitySize < Infinity && - partition.getCommunityTotalSize(communityId) + graphAdapter.size[nodeIndex]! > - maxCommunitySize - ) - continue; - const gain: number = computeQualityGain(partition, nodeIndex, communityId, options); - if (gain > bestGain) { - bestGain = gain; - bestCommunityId = communityId; - } - } - } else if (strategyCode === CandidateStrategy.RandomNeighbor) { - const tries: number = Math.min(10, Math.max(1, candidateCount)); - for (let trialIndex = 0; trialIndex < tries; trialIndex++) { - const communityId: number = partition.getCandidateCommunityAt( - (random() * candidateCount) | 0, - ); - if (communityId === partition.nodeCommunity[nodeIndex]!) continue; - if ( - maxCommunitySize < Infinity && - partition.getCommunityTotalSize(communityId) + graphAdapter.size[nodeIndex]! > - maxCommunitySize - ) - continue; - const gain: number = computeQualityGain(partition, nodeIndex, communityId, options); - if (gain > bestGain) { - bestGain = gain; - bestCommunityId = communityId; - } - } - } else { - for (let trialIndex = 0; trialIndex < candidateCount; trialIndex++) { - const communityId: number = partition.getCandidateCommunityAt(trialIndex); - if (maxCommunitySize < Infinity) { - const nextSize: number = - partition.getCommunityTotalSize(communityId) + graphAdapter.size[nodeIndex]!; - if (nextSize > maxCommunitySize) continue; - } - const gain: number = computeQualityGain(partition, nodeIndex, communityId, options); - if (gain > bestGain) { - bestGain = gain; - bestCommunityId = communityId; - } - } - } - if (options.allowNewCommunity) { - const newCommunityId: number = partition.communityCount; - const gain: number = computeQualityGain(partition, nodeIndex, newCommunityId, options); - if (gain > bestGain) { - bestGain = gain; - bestCommunityId = newCommunityId; - } - } + const { bestCommunityId, bestGain } = findBestCommunityMove( + partition, + graphAdapter, + nodeIndex, + candidateCount, + strategyCode, + options, + random, + ); if (bestCommunityId !== partition.nodeCommunity[nodeIndex]! && bestGain > GAIN_EPSILON) { partition.moveNodeToCommunity(nodeIndex, bestCommunityId); improved = true; @@ -267,6 +199,109 @@ export function runLouvainUndirectedModularity( }; } +/** + * Evaluate all candidate communities for a node and return the best move. + * Encapsulates the four candidate-selection strategies (All, RandomAny, + * RandomNeighbor, Neighbors) and the optional new-community probe. + */ +function findBestCommunityMove( + partition: Partition, + graphAdapter: GraphAdapter, + nodeIndex: number, + candidateCount: number, + strategyCode: CandidateStrategyCode, + options: NormalizedOptions, + random: () => number, +): { bestCommunityId: number; bestGain: number } { + let bestCommunityId: number = partition.nodeCommunity[nodeIndex]!; + let bestGain: number = 0; + const maxCommunitySize: number = options.maxCommunitySize; + + const evaluateCandidate = (communityId: number): void => { + if (communityId === partition.nodeCommunity[nodeIndex]!) return; + if ( + maxCommunitySize < Infinity && + partition.getCommunityTotalSize(communityId) + graphAdapter.size[nodeIndex]! > + maxCommunitySize + ) + return; + const gain: number = computeQualityGain(partition, nodeIndex, communityId, options); + if (gain > bestGain) { + bestGain = gain; + bestCommunityId = communityId; + } + }; + + if (strategyCode === CandidateStrategy.All) { + for (let communityId = 0; communityId < partition.communityCount; communityId++) { + evaluateCandidate(communityId); + } + } else if (strategyCode === CandidateStrategy.RandomAny) { + const tries: number = Math.min(10, Math.max(1, partition.communityCount)); + for (let trialIndex = 0; trialIndex < tries; trialIndex++) { + evaluateCandidate((random() * partition.communityCount) | 0); + } + } else if (strategyCode === CandidateStrategy.RandomNeighbor) { + const tries: number = Math.min(10, Math.max(1, candidateCount)); + for (let trialIndex = 0; trialIndex < tries; trialIndex++) { + evaluateCandidate(partition.getCandidateCommunityAt((random() * candidateCount) | 0)); + } + } else { + for (let trialIndex = 0; trialIndex < candidateCount; trialIndex++) { + evaluateCandidate(partition.getCandidateCommunityAt(trialIndex)); + } + } + + if (options.allowNewCommunity) { + const newCommunityId: number = partition.communityCount; + const gain: number = computeQualityGain(partition, nodeIndex, newCommunityId, options); + if (gain > bestGain) { + bestGain = gain; + bestCommunityId = newCommunityId; + } + } + + return { bestCommunityId, bestGain }; +} + +/** + * Run a BFS on the subgraph induced by `inCommunity` starting from `start`. + * Returns the list of visited nodes. Works on both directed (weak connectivity + * via both outEdges and inEdges) and undirected graphs. + */ +function bfsComponent( + g: GraphAdapter, + start: number, + inCommunity: Uint8Array, + visited: Uint8Array, +): number[] { + const queue: number[] = [start]; + visited[start] = 1; + let head: number = 0; + while (head < queue.length) { + const v: number = queue[head++]!; + const out: EdgeEntry[] = g.outEdges[v]!; + for (let k = 0; k < out.length; k++) { + const w: number = out[k]!.to; + if (inCommunity[w] && !visited[w]) { + visited[w] = 1; + queue.push(w); + } + } + if (g.directed) { + const inc: InEdgeEntry[] = g.inEdges[v]!; + for (let k = 0; k < inc.length; k++) { + const w: number = inc[k]!.from; + if (inCommunity[w] && !visited[w]) { + visited[w] = 1; + queue.push(w); + } + } + } + } + return queue; +} + // Build a coarse graph where each community becomes a single node. // Self-loops (g.selfLoop[]) don't need separate handling here because they // are already present in g.outEdges (directed path keeps them in both arrays). @@ -450,38 +485,12 @@ function splitDisconnectedCommunities(g: GraphAdapter, partition: Partition): vo if (visited[start]) continue; componentCount++; - // BFS within the community subgraph. - // For directed graphs, traverse both outEdges and inEdges to check - // weak connectivity (reachability ignoring edge direction). - const queue: number[] = [start]; - visited[start] = 1; - let head: number = 0; - while (head < queue.length) { - const v: number = queue[head++]!; - const out: EdgeEntry[] = g.outEdges[v]!; - for (let k = 0; k < out.length; k++) { - const w: number = out[k]!.to; - if (inCommunity[w] && !visited[w]) { - visited[w] = 1; - queue.push(w); - } - } - if (g.directed) { - const inc: InEdgeEntry[] = g.inEdges[v]!; - for (let k = 0; k < inc.length; k++) { - const w: number = inc[k]!.from; - if (inCommunity[w] && !visited[w]) { - visited[w] = 1; - queue.push(w); - } - } - } - } + const component: number[] = bfsComponent(g, start, inCommunity, visited); if (componentCount > 1) { // Secondary component — assign new community ID directly. const newC: number = nextC++; - for (let q = 0; q < queue.length; q++) nc[queue[q]!] = newC; + for (let q = 0; q < component.length; q++) nc[component[q]!] = newC; didSplit = true; } } diff --git a/src/graph/algorithms/leiden/partition.ts b/src/graph/algorithms/leiden/partition.ts index ffa6c46b..2495cb5e 100644 --- a/src/graph/algorithms/leiden/partition.ts +++ b/src/graph/algorithms/leiden/partition.ts @@ -56,6 +56,109 @@ function u8get(a: Uint8Array, i: number): number { return a[i] as number; } +/** + * Accumulate per-community node-level totals (size, count, strength) into the + * provided aggregate arrays. Both `initializeAggregates` and `compactCommunityIds` + * share this logic — extracting it eliminates the duplication. + */ +function accumulateNodeAggregates( + graph: GraphAdapter, + nodeCommunity: Int32Array, + n: number, + totalSize: Float64Array, + nodeCount: Int32Array, + internalEdgeWeight: Float64Array, + totalStrength: Float64Array, + totalOutStrength: Float64Array, + totalInStrength: Float64Array, +): void { + for (let i = 0; i < n; i++) { + const c: number = iget(nodeCommunity, i); + totalSize[c] = fget(totalSize, c) + fget(graph.size, i); + nodeCount[c] = iget(nodeCount, c) + 1; + if (graph.directed) { + totalOutStrength[c] = fget(totalOutStrength, c) + fget(graph.strengthOut, i); + totalInStrength[c] = fget(totalInStrength, c) + fget(graph.strengthIn, i); + } else { + totalStrength[c] = fget(totalStrength, c) + fget(graph.strengthOut, i); + } + if (fget(graph.selfLoop, i) !== 0) + internalEdgeWeight[c] = fget(internalEdgeWeight, c) + fget(graph.selfLoop, i); + } +} + +/** + * Accumulate intra-community edge weights. For directed graphs, counts all + * intra-community non-self edges. For undirected, counts each edge once (j > i). + */ +function accumulateInternalEdgeWeights( + graph: GraphAdapter, + nodeCommunity: Int32Array, + n: number, + internalEdgeWeight: Float64Array, +): void { + if (graph.directed) { + for (let i = 0; i < n; i++) { + const ci: number = iget(nodeCommunity, i); + const neighbors = graph.outEdges[i]!; + for (let k = 0; k < neighbors.length; k++) { + const { to: j, w } = neighbors[k]!; + if (i === j) continue; // self-loop already counted via graph.selfLoop[i] + if (ci === iget(nodeCommunity, j)) + internalEdgeWeight[ci] = fget(internalEdgeWeight, ci) + w; + } + } + } else { + for (let i = 0; i < n; i++) { + const ci: number = iget(nodeCommunity, i); + const neighbors = graph.outEdges[i]!; + for (let k = 0; k < neighbors.length; k++) { + const { to: j, w } = neighbors[k]!; + if (j <= i) continue; + if (ci === iget(nodeCommunity, j)) + internalEdgeWeight[ci] = fget(internalEdgeWeight, ci) + w; + } + } + } +} + +/** + * Sort community IDs according to the compaction options: preserve original + * order, respect a user-provided label map, or sort by descending size. + * Returns the sorted list of non-empty community IDs. + */ +function buildSortedCommunityIds( + ids: number[], + opts: CompactOptions, + communityTotalSize: Float64Array, + communityNodeCount: Int32Array, +): void { + if (opts.keepOldOrder) { + ids.sort((a, b) => a - b); + } else if (opts.preserveMap instanceof Map) { + const preserveMap = opts.preserveMap; + ids.sort((a, b) => { + const pa = preserveMap.get(a); + const pb = preserveMap.get(b); + if (pa != null && pb != null && pa !== pb) return pa - pb; + if (pa != null && pb == null) return -1; + if (pb != null && pa == null) return 1; + return ( + fget(communityTotalSize, b) - fget(communityTotalSize, a) || + iget(communityNodeCount, b) - iget(communityNodeCount, a) || + a - b + ); + }); + } else { + ids.sort( + (a, b) => + fget(communityTotalSize, b) - fget(communityTotalSize, a) || + iget(communityNodeCount, b) - iget(communityNodeCount, a) || + a - b, + ); + } +} + export function makePartition(graph: GraphAdapter): Partition { const n: number = graph.n; const nodeCommunity = new Int32Array(n); @@ -94,44 +197,18 @@ export function makePartition(graph: GraphAdapter): Partition { communityTotalStrength.fill(0); communityTotalOutStrength.fill(0); communityTotalInStrength.fill(0); - for (let i = 0; i < n; i++) { - const c: number = iget(nodeCommunity, i); - communityTotalSize[c] = fget(communityTotalSize, c) + fget(graph.size, i); - communityNodeCount[c] = iget(communityNodeCount, c) + 1; - if (graph.directed) { - communityTotalOutStrength[c] = - fget(communityTotalOutStrength, c) + fget(graph.strengthOut, i); - communityTotalInStrength[c] = fget(communityTotalInStrength, c) + fget(graph.strengthIn, i); - } else { - communityTotalStrength[c] = fget(communityTotalStrength, c) + fget(graph.strengthOut, i); - } - if (fget(graph.selfLoop, i) !== 0) - communityInternalEdgeWeight[c] = - fget(communityInternalEdgeWeight, c) + fget(graph.selfLoop, i); - } - if (graph.directed) { - for (let i = 0; i < n; i++) { - const ci: number = iget(nodeCommunity, i); - const neighbors = graph.outEdges[i]!; - for (let k = 0; k < neighbors.length; k++) { - const { to: j, w } = neighbors[k]!; - if (i === j) continue; // self-loop already counted via graph.selfLoop[i] - if (ci === iget(nodeCommunity, j)) - communityInternalEdgeWeight[ci] = fget(communityInternalEdgeWeight, ci) + w; - } - } - } else { - for (let i = 0; i < n; i++) { - const ci: number = iget(nodeCommunity, i); - const neighbors = graph.outEdges[i]!; - for (let k = 0; k < neighbors.length; k++) { - const { to: j, w } = neighbors[k]!; - if (j <= i) continue; - if (ci === iget(nodeCommunity, j)) - communityInternalEdgeWeight[ci] = fget(communityInternalEdgeWeight, ci) + w; - } - } - } + accumulateNodeAggregates( + graph, + nodeCommunity, + n, + communityTotalSize, + communityNodeCount, + communityInternalEdgeWeight, + communityTotalStrength, + communityTotalOutStrength, + communityTotalInStrength, + ); + accumulateInternalEdgeWeights(graph, nodeCommunity, n, communityInternalEdgeWeight); } function resetScratch(): void { @@ -323,36 +400,15 @@ export function makePartition(graph: GraphAdapter): Partition { function compactCommunityIds(opts: CompactOptions = {}): void { const ids: number[] = []; for (let c = 0; c < communityCount; c++) if (iget(communityNodeCount, c) > 0) ids.push(c); - if (opts.keepOldOrder) { - ids.sort((a, b) => a - b); - } else if (opts.preserveMap instanceof Map) { - const preserveMap = opts.preserveMap; - ids.sort((a, b) => { - const pa = preserveMap.get(a); - const pb = preserveMap.get(b); - if (pa != null && pb != null && pa !== pb) return pa - pb; - if (pa != null && pb == null) return -1; - if (pb != null && pa == null) return 1; - return ( - fget(communityTotalSize, b) - fget(communityTotalSize, a) || - iget(communityNodeCount, b) - iget(communityNodeCount, a) || - a - b - ); - }); - } else { - ids.sort( - (a, b) => - fget(communityTotalSize, b) - fget(communityTotalSize, a) || - iget(communityNodeCount, b) - iget(communityNodeCount, a) || - a - b, - ); - } + buildSortedCommunityIds(ids, opts, communityTotalSize, communityNodeCount); + const newId = new Int32Array(communityCount).fill(-1); ids.forEach((c, i) => { newId[c] = i; }); for (let i = 0; i < nodeCommunity.length; i++) nodeCommunity[i] = iget(newId, iget(nodeCommunity, i)); + const remappedCount: number = ids.length; const newTotalSize = new Float64Array(remappedCount); const newNodeCount = new Int32Array(remappedCount); @@ -360,42 +416,19 @@ export function makePartition(graph: GraphAdapter): Partition { const newTotalStrength = new Float64Array(remappedCount); const newTotalOutStrength = new Float64Array(remappedCount); const newTotalInStrength = new Float64Array(remappedCount); - for (let i = 0; i < n; i++) { - const c: number = iget(nodeCommunity, i); - newTotalSize[c] = fget(newTotalSize, c) + fget(graph.size, i); - newNodeCount[c] = iget(newNodeCount, c) + 1; - if (graph.directed) { - newTotalOutStrength[c] = fget(newTotalOutStrength, c) + fget(graph.strengthOut, i); - newTotalInStrength[c] = fget(newTotalInStrength, c) + fget(graph.strengthIn, i); - } else { - newTotalStrength[c] = fget(newTotalStrength, c) + fget(graph.strengthOut, i); - } - if (fget(graph.selfLoop, i) !== 0) - newInternalEdgeWeight[c] = fget(newInternalEdgeWeight, c) + fget(graph.selfLoop, i); - } - if (graph.directed) { - for (let i = 0; i < n; i++) { - const ci: number = iget(nodeCommunity, i); - const list = graph.outEdges[i]!; - for (let k = 0; k < list.length; k++) { - const { to: j, w } = list[k]!; - if (i === j) continue; // self-loop already counted via graph.selfLoop[i] - if (ci === iget(nodeCommunity, j)) - newInternalEdgeWeight[ci] = fget(newInternalEdgeWeight, ci) + w; - } - } - } else { - for (let i = 0; i < n; i++) { - const ci: number = iget(nodeCommunity, i); - const list = graph.outEdges[i]!; - for (let k = 0; k < list.length; k++) { - const { to: j, w } = list[k]!; - if (j <= i) continue; - if (ci === iget(nodeCommunity, j)) - newInternalEdgeWeight[ci] = fget(newInternalEdgeWeight, ci) + w; - } - } - } + accumulateNodeAggregates( + graph, + nodeCommunity, + n, + newTotalSize, + newNodeCount, + newInternalEdgeWeight, + newTotalStrength, + newTotalOutStrength, + newTotalInStrength, + ); + accumulateInternalEdgeWeights(graph, nodeCommunity, n, newInternalEdgeWeight); + communityCount = remappedCount; communityTotalSize = newTotalSize; communityNodeCount = newNodeCount; diff --git a/src/graph/model.ts b/src/graph/model.ts index 60ce5bc2..80146e11 100644 --- a/src/graph/model.ts +++ b/src/graph/model.ts @@ -103,15 +103,27 @@ export class CodeGraph { } *edges(): Generator<[string, string, EdgeAttrs]> { - const seen = this._directed ? null : new Set(); + if (this._directed) { + yield* this._directedEdges(); + } else { + yield* this._undirectedEdges(); + } + } + + private *_directedEdges(): Generator<[string, string, EdgeAttrs]> { + for (const [src, targets] of this._successors) { + for (const [tgt, attrs] of targets) yield [src, tgt, attrs]; + } + } + + private *_undirectedEdges(): Generator<[string, string, EdgeAttrs]> { + // \0 is safe as separator — node IDs are file paths/symbols, never contain null bytes + const seen = new Set(); for (const [src, targets] of this._successors) { for (const [tgt, attrs] of targets) { - if (!this._directed) { - // \0 is safe as separator — node IDs are file paths/symbols, never contain null bytes - const key = src < tgt ? `${src}\0${tgt}` : `${tgt}\0${src}`; - if (seen!.has(key)) continue; - seen!.add(key); - } + const key = src < tgt ? `${src}\0${tgt}` : `${tgt}\0${src}`; + if (seen.has(key)) continue; + seen.add(key); yield [src, tgt, attrs]; } } diff --git a/src/infrastructure/config.ts b/src/infrastructure/config.ts index 79208c01..5633c18d 100644 --- a/src/infrastructure/config.ts +++ b/src/infrastructure/config.ts @@ -323,79 +323,70 @@ function resolveWorkspaceEntry(pkgDir: string): string | null { * 2. package.json — `workspaces` field (npm/yarn) * 3. lerna.json — `packages` array */ -export function detectWorkspaces(rootDir: string): Map { - const workspaces = new Map(); - const patterns: string[] = []; - - // 1. pnpm-workspace.yaml +/** Read pnpm-workspace.yaml and return workspace glob patterns. */ +function readPnpmWorkspacePatterns(rootDir: string): string[] { const pnpmPath = path.join(rootDir, 'pnpm-workspace.yaml'); - if (fs.existsSync(pnpmPath)) { - try { - const raw = fs.readFileSync(pnpmPath, 'utf-8'); - // Simple YAML parse for `packages:` array — no dependency needed - const packagesMatch = raw.match(/^packages:\s*\n((?:\s+-\s+.+\n?)*)/m); - if (packagesMatch) { - const lines = packagesMatch[1]!.match(/^\s+-\s+['"]?([^'"#\n]+)['"]?\s*$/gm); - if (lines) { - for (const line of lines) { - const m = line.match(/^\s+-\s+['"]?([^'"#\n]+?)['"]?\s*$/); - if (m) patterns.push(m[1]!.trim()); - } - } - } - } catch (e) { - debug(`detectWorkspaces: failed to parse pnpm-workspace.yaml: ${toErrorMessage(e)}`); + if (!fs.existsSync(pnpmPath)) return []; + try { + const raw = fs.readFileSync(pnpmPath, 'utf-8'); + const packagesMatch = raw.match(/^packages:\s*\n((?:\s+-\s+.+\n?)*)/m); + if (!packagesMatch) return []; + const lines = packagesMatch[1]!.match(/^\s+-\s+['"]?([^'"#\n]+)['"]?\s*$/gm); + if (!lines) return []; + const patterns: string[] = []; + for (const line of lines) { + const m = line.match(/^\s+-\s+['"]?([^'"#\n]+?)['"]?\s*$/); + if (m) patterns.push(m[1]!.trim()); } + return patterns; + } catch (e) { + debug(`detectWorkspaces: failed to parse pnpm-workspace.yaml: ${toErrorMessage(e)}`); + return []; } +} - // 2. package.json workspaces (npm/yarn) - if (patterns.length === 0) { - const rootPkgPath = path.join(rootDir, 'package.json'); - if (fs.existsSync(rootPkgPath)) { - try { - const raw = fs.readFileSync(rootPkgPath, 'utf-8'); - const pkg = JSON.parse(raw); - const ws = pkg.workspaces; - if (Array.isArray(ws)) { - patterns.push(...ws); - } else if (ws && Array.isArray(ws.packages)) { - // Yarn classic format: { packages: [...], nohoist: [...] } - patterns.push(...ws.packages); - } - } catch (e) { - debug(`detectWorkspaces: failed to parse package.json workspaces: ${toErrorMessage(e)}`); - } - } +/** Read package.json workspaces field (npm/yarn) and return glob patterns. */ +function readNpmWorkspacePatterns(rootDir: string): string[] { + const rootPkgPath = path.join(rootDir, 'package.json'); + if (!fs.existsSync(rootPkgPath)) return []; + try { + const raw = fs.readFileSync(rootPkgPath, 'utf-8'); + const pkg = JSON.parse(raw); + const ws = pkg.workspaces; + if (Array.isArray(ws)) return ws; + if (ws && Array.isArray(ws.packages)) return ws.packages; + return []; + } catch (e) { + debug(`detectWorkspaces: failed to parse package.json workspaces: ${toErrorMessage(e)}`); + return []; } +} - // 3. lerna.json - if (patterns.length === 0) { - const lernaPath = path.join(rootDir, 'lerna.json'); - if (fs.existsSync(lernaPath)) { - try { - const raw = fs.readFileSync(lernaPath, 'utf-8'); - const lerna = JSON.parse(raw); - if (Array.isArray(lerna.packages)) { - patterns.push(...lerna.packages); - } - } catch (e) { - debug(`detectWorkspaces: failed to parse lerna.json: ${toErrorMessage(e)}`); - } - } +/** Read lerna.json packages field and return glob patterns. */ +function readLernaPatterns(rootDir: string): string[] { + const lernaPath = path.join(rootDir, 'lerna.json'); + if (!fs.existsSync(lernaPath)) return []; + try { + const raw = fs.readFileSync(lernaPath, 'utf-8'); + const lerna = JSON.parse(raw); + if (Array.isArray(lerna.packages)) return lerna.packages; + return []; + } catch (e) { + debug(`detectWorkspaces: failed to parse lerna.json: ${toErrorMessage(e)}`); + return []; } +} - if (patterns.length === 0) return workspaces; - - // Expand glob patterns and collect packages +/** Expand workspace patterns into concrete package entries. */ +function expandWorkspacePatterns(patterns: string[], rootDir: string): Map { + const workspaces = new Map(); for (const pattern of patterns) { - // Check if pattern is a direct path (no glob) or a glob if (pattern.includes('*')) { for (const dir of expandWorkspaceGlob(pattern, rootDir)) { const name = readPackageName(dir); if (name) workspaces.set(name, { dir, entry: resolveWorkspaceEntry(dir) }); } } else { - // Direct path like "packages/core" const dir = path.resolve(rootDir, pattern); if (fs.existsSync(path.join(dir, 'package.json'))) { const name = readPackageName(dir); @@ -403,6 +394,17 @@ export function detectWorkspaces(rootDir: string): Map { } } } + return workspaces; +} + +export function detectWorkspaces(rootDir: string): Map { + // Try each package manager in priority order — first match wins + let patterns = readPnpmWorkspacePatterns(rootDir); + if (patterns.length === 0) patterns = readNpmWorkspacePatterns(rootDir); + if (patterns.length === 0) patterns = readLernaPatterns(rootDir); + if (patterns.length === 0) return new Map(); + + const workspaces = expandWorkspacePatterns(patterns, rootDir); if (workspaces.size > 0) { debug(`Detected ${workspaces.size} workspace packages: ${[...workspaces.keys()].join(', ')}`); diff --git a/src/infrastructure/registry.ts b/src/infrastructure/registry.ts index 9b02abc8..76858864 100644 --- a/src/infrastructure/registry.ts +++ b/src/infrastructure/registry.ts @@ -56,6 +56,22 @@ export function saveRegistry(registry: Registry, registryPath: string = REGISTRY * pointing to a different path, auto-suffixes (`api` → `api-2`, `api-3`, …). * Re-registering the same path updates in place. Explicit names always overwrite. */ + +/** Find a unique suffixed name when the base name collides with a different path. */ +function findAvailableName( + baseName: string, + absRoot: string, + repos: Record, +): string { + let suffix = 2; + while (repos[`${baseName}-${suffix}`]) { + const entry = repos[`${baseName}-${suffix}`]!; + if (path.resolve(entry.path) === absRoot) return `${baseName}-${suffix}`; + suffix++; + } + return `${baseName}-${suffix}`; +} + export function registerRepo( rootDir: string, name?: string, @@ -71,20 +87,7 @@ export function registerRepo( if (!name) { const existing = registry.repos[baseName]; if (existing && path.resolve(existing.path) !== absRoot) { - // Basename collision with a different path — find next available suffix - let suffix = 2; - while (registry.repos[`${baseName}-${suffix}`]) { - const entry = registry.repos[`${baseName}-${suffix}`]!; - if (path.resolve(entry.path) === absRoot) { - // Already registered under this suffixed name — update in place - repoName = `${baseName}-${suffix}`; - break; - } - suffix++; - } - if (repoName === baseName) { - repoName = `${baseName}-${suffix}`; - } + repoName = findAvailableName(baseName, absRoot, registry.repos); } } diff --git a/src/mcp/server.ts b/src/mcp/server.ts index 5a3501cd..ff1bfe20 100644 --- a/src/mcp/server.ts +++ b/src/mcp/server.ts @@ -109,6 +109,51 @@ function validateMultiRepoAccess(multiRepo: boolean, name: string, args: { repo? } } +/** + * Register process-level shutdown and error handlers once per process. + * Ensures graceful cleanup when the MCP client disconnects or the transport + * encounters broken-pipe errors. Uses a globalThis flag to survive + * vi.resetModules() in tests. + */ +function registerShutdownHandlers(): void { + const g = globalThis as Record; + if (g.__codegraph_shutdown_installed) return; + g.__codegraph_shutdown_installed = true; + + const shutdown = async () => { + try { + await _activeServer?.close(); + } catch (_shutdownErr: unknown) { + // Ignore close errors during shutdown — the transport may already be gone. + } + process.exit(0); + }; + const silentExit = (err: Error & { code?: string }) => { + // Only suppress broken-pipe errors from closed stdio transport; + // let real bugs surface with a non-zero exit code. + if (err.code === 'EPIPE' || err.code === 'ERR_STREAM_DESTROYED') { + process.exit(0); + } + process.stderr.write(`Uncaught exception: ${err.stack ?? err.message}\n`); + process.exit(1); + }; + const silentReject = (reason: unknown) => { + const err = reason instanceof Error ? reason : new Error(String(reason)); + const code = (err as Error & { code?: string }).code; + if (code === 'EPIPE' || code === 'ERR_STREAM_DESTROYED') { + process.exit(0); + } + process.stderr.write(`Unhandled rejection: ${err.stack ?? err.message}\n`); + process.exit(1); + }; + + process.on('SIGINT', shutdown); + process.on('SIGTERM', shutdown); + process.on('SIGHUP', shutdown); + process.on('uncaughtException', silentExit); + process.on('unhandledRejection', silentReject); +} + export async function startMCPServer( customDbPath?: string, options: MCPServerOptionsInternal = {}, @@ -180,43 +225,7 @@ export async function startMCPServer( // the latest instance (matters when tests call startMCPServer repeatedly). _activeServer = server; - // Register handlers once per process to avoid listener accumulation. - // Use a process-level flag so it survives vi.resetModules() in tests. - const g = globalThis as Record; - if (!g.__codegraph_shutdown_installed) { - g.__codegraph_shutdown_installed = true; - - const shutdown = async () => { - try { - await _activeServer?.close(); - } catch {} - process.exit(0); - }; - const silentExit = (err: Error & { code?: string }) => { - // Only suppress broken-pipe errors from closed stdio transport; - // let real bugs surface with a non-zero exit code. - if (err.code === 'EPIPE' || err.code === 'ERR_STREAM_DESTROYED') { - process.exit(0); - } - process.stderr.write(`Uncaught exception: ${err.stack ?? err.message}\n`); - process.exit(1); - }; - const silentReject = (reason: unknown) => { - const err = reason instanceof Error ? reason : new Error(String(reason)); - const code = (err as Error & { code?: string }).code; - if (code === 'EPIPE' || code === 'ERR_STREAM_DESTROYED') { - process.exit(0); - } - process.stderr.write(`Unhandled rejection: ${err.stack ?? err.message}\n`); - process.exit(1); - }; - - process.on('SIGINT', shutdown); - process.on('SIGTERM', shutdown); - process.on('SIGHUP', shutdown); - process.on('uncaughtException', silentExit); - process.on('unhandledRejection', silentReject); - } + registerShutdownHandlers(); try { await server.connect(transport); diff --git a/src/presentation/audit.ts b/src/presentation/audit.ts index ec03818e..f82bc1da 100644 --- a/src/presentation/audit.ts +++ b/src/presentation/audit.ts @@ -15,6 +15,77 @@ interface AuditOpts { config?: unknown; } +/** Render health metrics for a single audit function. */ +function renderHealthMetrics(fn: any): void { + if (fn.health.cognitive == null) return; + console.log(`\n Health:`); + console.log( + ` Cognitive: ${fn.health.cognitive} Cyclomatic: ${fn.health.cyclomatic} Nesting: ${fn.health.maxNesting}`, + ); + console.log(` MI: ${fn.health.maintainabilityIndex}`); + if (fn.health.halstead.volume) { + console.log( + ` Halstead: vol=${fn.health.halstead.volume} diff=${fn.health.halstead.difficulty} effort=${fn.health.halstead.effort} bugs=${fn.health.halstead.bugs}`, + ); + } + if (fn.health.loc) { + console.log( + ` LOC: ${fn.health.loc} SLOC: ${fn.health.sloc} Comments: ${fn.health.commentLines}`, + ); + } +} + +/** Render a single audited function with all its sections. */ +function renderAuditFunction(fn: any): void { + const lineRange = fn.endLine ? `${fn.line}-${fn.endLine}` : `${fn.line}`; + const roleTag = fn.role ? ` [${fn.role}]` : ''; + console.log(`## ${kindIcon(fn.kind)} ${fn.name} (${fn.kind})${roleTag}`); + console.log(` ${fn.file}:${lineRange}${fn.lineCount ? ` (${fn.lineCount} lines)` : ''}`); + if (fn.summary) console.log(` ${fn.summary}`); + if (fn.signature) { + if (fn.signature.params != null) console.log(` Parameters: (${fn.signature.params})`); + if (fn.signature.returnType) console.log(` Returns: ${fn.signature.returnType}`); + } + + renderHealthMetrics(fn); + + if (fn.health.thresholdBreaches.length > 0) { + console.log(`\n Threshold Breaches:`); + for (const b of fn.health.thresholdBreaches) { + const icon = b.level === 'fail' ? 'FAIL' : 'WARN'; + console.log(` [${icon}] ${b.metric}: ${b.value} >= ${b.threshold}`); + } + } + + console.log(`\n Impact: ${fn.impact.totalDependents} transitive dependent(s)`); + for (const [level, nodes] of Object.entries(fn.impact.levels)) { + console.log( + ` Level ${level}: ${(nodes as Array<{ name: string }>).map((n) => n.name).join(', ')}`, + ); + } + + if (fn.callees.length > 0) { + console.log(`\n Calls (${fn.callees.length}):`); + for (const c of fn.callees) { + console.log(` ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}`); + } + } + if (fn.callers.length > 0) { + console.log(`\n Called by (${fn.callers.length}):`); + for (const c of fn.callers) { + console.log(` ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}`); + } + } + if (fn.relatedTests.length > 0) { + console.log(`\n Tests (${fn.relatedTests.length}):`); + for (const t of fn.relatedTests) { + console.log(` ${t.file}`); + } + } + + console.log(); +} + export function audit( target: string, customDbPath: string | undefined, @@ -33,72 +104,6 @@ export function audit( console.log(` ${data.functions.length} function(s) analyzed\n`); for (const fn of data.functions) { - const lineRange = fn.endLine ? `${fn.line}-${fn.endLine}` : `${fn.line}`; - const roleTag = fn.role ? ` [${fn.role}]` : ''; - console.log(`## ${kindIcon(fn.kind)} ${fn.name} (${fn.kind})${roleTag}`); - console.log(` ${fn.file}:${lineRange}${fn.lineCount ? ` (${fn.lineCount} lines)` : ''}`); - if (fn.summary) console.log(` ${fn.summary}`); - if (fn.signature) { - if (fn.signature.params != null) console.log(` Parameters: (${fn.signature.params})`); - if (fn.signature.returnType) console.log(` Returns: ${fn.signature.returnType}`); - } - - // Health metrics - if (fn.health.cognitive != null) { - console.log(`\n Health:`); - console.log( - ` Cognitive: ${fn.health.cognitive} Cyclomatic: ${fn.health.cyclomatic} Nesting: ${fn.health.maxNesting}`, - ); - console.log(` MI: ${fn.health.maintainabilityIndex}`); - if (fn.health.halstead.volume) { - console.log( - ` Halstead: vol=${fn.health.halstead.volume} diff=${fn.health.halstead.difficulty} effort=${fn.health.halstead.effort} bugs=${fn.health.halstead.bugs}`, - ); - } - if (fn.health.loc) { - console.log( - ` LOC: ${fn.health.loc} SLOC: ${fn.health.sloc} Comments: ${fn.health.commentLines}`, - ); - } - } - - // Threshold breaches - if (fn.health.thresholdBreaches.length > 0) { - console.log(`\n Threshold Breaches:`); - for (const b of fn.health.thresholdBreaches) { - const icon = b.level === 'fail' ? 'FAIL' : 'WARN'; - console.log(` [${icon}] ${b.metric}: ${b.value} >= ${b.threshold}`); - } - } - - // Impact - console.log(`\n Impact: ${fn.impact.totalDependents} transitive dependent(s)`); - for (const [level, nodes] of Object.entries(fn.impact.levels)) { - console.log( - ` Level ${level}: ${(nodes as Array<{ name: string }>).map((n) => n.name).join(', ')}`, - ); - } - - // Call edges - if (fn.callees.length > 0) { - console.log(`\n Calls (${fn.callees.length}):`); - for (const c of fn.callees) { - console.log(` ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}`); - } - } - if (fn.callers.length > 0) { - console.log(`\n Called by (${fn.callers.length}):`); - for (const c of fn.callers) { - console.log(` ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}`); - } - } - if (fn.relatedTests.length > 0) { - console.log(`\n Tests (${fn.relatedTests.length}):`); - for (const t of fn.relatedTests) { - console.log(` ${t.file}`); - } - } - - console.log(); + renderAuditFunction(fn); } } diff --git a/src/presentation/branch-compare.ts b/src/presentation/branch-compare.ts index e2429d7a..9a5ed2c4 100644 --- a/src/presentation/branch-compare.ts +++ b/src/presentation/branch-compare.ts @@ -36,6 +36,57 @@ interface BranchCompareFormatData { summary: BranchCompareSummary; } +/** Format impact annotation for a symbol. */ +function formatImpactLine(impact: unknown[] | undefined): string | null { + if (!impact || impact.length === 0) return null; + return ` ^ ${impact.length} transitive caller${impact.length !== 1 ? 's' : ''} affected`; +} + +/** Format added symbols section. */ +function formatAddedSection(added: BranchCompareSymbol[]): string[] { + if (added.length === 0) return []; + const lines = ['', ` + Added (${added.length} symbol${added.length !== 1 ? 's' : ''}):`]; + for (const sym of added) { + lines.push(` [${kindIcon(sym.kind)}] ${sym.name} -- ${sym.file}:${sym.line}`); + } + return lines; +} + +/** Format removed symbols section. */ +function formatRemovedSection(removed: BranchCompareSymbol[]): string[] { + if (removed.length === 0) return []; + const lines = ['', ` - Removed (${removed.length} symbol${removed.length !== 1 ? 's' : ''}):`]; + for (const sym of removed) { + lines.push(` [${kindIcon(sym.kind)}] ${sym.name} -- ${sym.file}:${sym.line}`); + const impact = formatImpactLine(sym.impact); + if (impact) lines.push(impact); + } + return lines; +} + +/** Format changed symbols section with delta details. */ +function formatChangedSection(changed: BranchCompareSymbol[]): string[] { + if (changed.length === 0) return []; + const lines = ['', ` ~ Changed (${changed.length} symbol${changed.length !== 1 ? 's' : ''}):`]; + for (const sym of changed) { + const parts: string[] = []; + if (sym.changes?.lineCount !== 0) { + parts.push(`lines: ${sym.base?.lineCount} -> ${sym.target?.lineCount}`); + } + if (sym.changes?.fanIn !== 0) { + parts.push(`fan_in: ${sym.base?.fanIn} -> ${sym.target?.fanIn}`); + } + if (sym.changes?.fanOut !== 0) { + parts.push(`fan_out: ${sym.base?.fanOut} -> ${sym.target?.fanOut}`); + } + const detail = parts.length > 0 ? ` (${parts.join(', ')})` : ''; + lines.push(` [${kindIcon(sym.kind)}] ${sym.name} -- ${sym.file}:${sym.base?.line}${detail}`); + const impact = formatImpactLine(sym.impact); + if (impact) lines.push(impact); + } + return lines; +} + function formatText(data: BranchCompareFormatData): string { if (data.error) return `Error: ${data.error}`; @@ -48,56 +99,9 @@ function formatText(data: BranchCompareFormatData): string { lines.push(` Target: ${data.targetRef} (${shortTarget})`); lines.push(` Files changed: ${data.changedFiles.length}`); - if (data.added.length > 0) { - lines.push(''); - lines.push(` + Added (${data.added.length} symbol${data.added.length !== 1 ? 's' : ''}):`); - for (const sym of data.added) { - lines.push(` [${kindIcon(sym.kind)}] ${sym.name} -- ${sym.file}:${sym.line}`); - } - } - - if (data.removed.length > 0) { - lines.push(''); - lines.push( - ` - Removed (${data.removed.length} symbol${data.removed.length !== 1 ? 's' : ''}):`, - ); - for (const sym of data.removed) { - lines.push(` [${kindIcon(sym.kind)}] ${sym.name} -- ${sym.file}:${sym.line}`); - if (sym.impact && sym.impact.length > 0) { - lines.push( - ` ^ ${sym.impact.length} transitive caller${sym.impact.length !== 1 ? 's' : ''} affected`, - ); - } - } - } - - if (data.changed.length > 0) { - lines.push(''); - lines.push( - ` ~ Changed (${data.changed.length} symbol${data.changed.length !== 1 ? 's' : ''}):`, - ); - for (const sym of data.changed) { - const parts: string[] = []; - if (sym.changes?.lineCount !== 0) { - parts.push(`lines: ${sym.base?.lineCount} -> ${sym.target?.lineCount}`); - } - if (sym.changes?.fanIn !== 0) { - parts.push(`fan_in: ${sym.base?.fanIn} -> ${sym.target?.fanIn}`); - } - if (sym.changes?.fanOut !== 0) { - parts.push(`fan_out: ${sym.base?.fanOut} -> ${sym.target?.fanOut}`); - } - const detail = parts.length > 0 ? ` (${parts.join(', ')})` : ''; - lines.push( - ` [${kindIcon(sym.kind)}] ${sym.name} -- ${sym.file}:${sym.base?.line}${detail}`, - ); - if (sym.impact && sym.impact.length > 0) { - lines.push( - ` ^ ${sym.impact.length} transitive caller${sym.impact.length !== 1 ? 's' : ''} affected`, - ); - } - } - } + lines.push(...formatAddedSection(data.added)); + lines.push(...formatRemovedSection(data.removed)); + lines.push(...formatChangedSection(data.changed)); const s = data.summary; lines.push(''); diff --git a/src/presentation/check.ts b/src/presentation/check.ts index 27520c7c..a9137a43 100644 --- a/src/presentation/check.ts +++ b/src/presentation/check.ts @@ -52,6 +52,39 @@ interface CheckDataResult { }; } +/** Print violation details for a failed predicate (max 10 items). */ +function formatPredicateViolations(pred: CheckPredicate): void { + const MAX_SHOWN = 10; + + if (pred.name === 'cycles' && pred.cycles) { + for (const cycle of pred.cycles.slice(0, MAX_SHOWN)) { + console.log(` ${cycle.join(' -> ')}`); + } + if (pred.cycles.length > MAX_SHOWN) { + console.log(` ... and ${pred.cycles.length - MAX_SHOWN} more`); + } + } + + if (!pred.violations) return; + + const formatViolation = (v: CheckViolation): string => { + if (pred.name === 'blast-radius') { + return `${v.name} (${v.kind}) at ${v.file}:${v.line} — ${v.transitiveCallers} callers (max: ${pred.threshold})`; + } + if (pred.name === 'boundaries') { + return `${v.from} -> ${v.to} (${v.edgeKind})`; + } + return `${v.name} (${v.kind}) at ${v.file}:${v.line}`; + }; + + for (const v of pred.violations.slice(0, MAX_SHOWN)) { + console.log(` ${formatViolation(v)}`); + } + if (pred.violations.length > MAX_SHOWN) { + console.log(` ... and ${pred.violations.length - MAX_SHOWN} more`); + } +} + export function check(customDbPath: string | undefined, opts: CheckCliOpts = {}): void { const data = checkData(customDbPath, { ref: opts.ref, @@ -89,40 +122,7 @@ export function check(customDbPath: string | undefined, opts: CheckCliOpts = {}) console.log(` [${icon}] ${pred.name}`); if (!pred.passed) { - if (pred.name === 'cycles' && pred.cycles) { - for (const cycle of pred.cycles.slice(0, 10)) { - console.log(` ${cycle.join(' -> ')}`); - } - if (pred.cycles.length > 10) { - console.log(` ... and ${pred.cycles.length - 10} more`); - } - } - if (pred.name === 'blast-radius' && pred.violations) { - for (const v of pred.violations.slice(0, 10)) { - console.log( - ` ${v.name} (${v.kind}) at ${v.file}:${v.line} — ${v.transitiveCallers} callers (max: ${pred.threshold})`, - ); - } - if (pred.violations.length > 10) { - console.log(` ... and ${pred.violations.length - 10} more`); - } - } - if (pred.name === 'signatures' && pred.violations) { - for (const v of pred.violations.slice(0, 10)) { - console.log(` ${v.name} (${v.kind}) at ${v.file}:${v.line}`); - } - if (pred.violations.length > 10) { - console.log(` ... and ${pred.violations.length - 10} more`); - } - } - if (pred.name === 'boundaries' && pred.violations) { - for (const v of pred.violations.slice(0, 10)) { - console.log(` ${v.from} -> ${v.to} (${v.edgeKind})`); - } - if (pred.violations.length > 10) { - console.log(` ... and ${pred.violations.length - 10} more`); - } - } + formatPredicateViolations(pred); } if (pred.note) { console.log(` ${pred.note}`); diff --git a/src/presentation/complexity.ts b/src/presentation/complexity.ts index e027cdc8..8789d5b3 100644 --- a/src/presentation/complexity.ts +++ b/src/presentation/complexity.ts @@ -48,6 +48,43 @@ interface ComplexityResult { hasGraph: boolean; } +/** Render health-focused table with Halstead + MI columns. */ +function renderHealthTable(functions: ComplexityFunction[]): void { + console.log( + ` ${'Function'.padEnd(35)} ${'File'.padEnd(25)} ${'MI'.padStart(5)} ${'Vol'.padStart(7)} ${'Diff'.padStart(6)} ${'Effort'.padStart(9)} ${'Bugs'.padStart(6)} ${'LOC'.padStart(5)} ${'SLOC'.padStart(5)}`, + ); + console.log( + ` ${'─'.repeat(35)} ${'─'.repeat(25)} ${'─'.repeat(5)} ${'─'.repeat(7)} ${'─'.repeat(6)} ${'─'.repeat(9)} ${'─'.repeat(6)} ${'─'.repeat(5)} ${'─'.repeat(5)}`, + ); + for (const fn of functions) { + const name = fn.name.length > 33 ? `${fn.name.slice(0, 32)}…` : fn.name; + const file = fn.file.length > 23 ? `…${fn.file.slice(-22)}` : fn.file; + const miWarn = fn.exceeds?.includes('maintainabilityIndex') ? '!' : ' '; + console.log( + ` ${name.padEnd(35)} ${file.padEnd(25)} ${String(fn.maintainabilityIndex).padStart(5)}${miWarn}${String(fn.halstead.volume).padStart(7)} ${String(fn.halstead.difficulty).padStart(6)} ${String(fn.halstead.effort).padStart(9)} ${String(fn.halstead.bugs).padStart(6)} ${String(fn.loc).padStart(5)} ${String(fn.sloc).padStart(5)}`, + ); + } +} + +/** Render default complexity table with MI column. */ +function renderDefaultTable(functions: ComplexityFunction[]): void { + console.log( + ` ${'Function'.padEnd(40)} ${'File'.padEnd(30)} ${'Cog'.padStart(4)} ${'Cyc'.padStart(4)} ${'Nest'.padStart(5)} ${'MI'.padStart(5)}`, + ); + console.log( + ` ${'─'.repeat(40)} ${'─'.repeat(30)} ${'─'.repeat(4)} ${'─'.repeat(4)} ${'─'.repeat(5)} ${'─'.repeat(5)}`, + ); + for (const fn of functions) { + const name = fn.name.length > 38 ? `${fn.name.slice(0, 37)}…` : fn.name; + const file = fn.file.length > 28 ? `…${fn.file.slice(-27)}` : fn.file; + const warn = fn.exceeds ? ' !' : ''; + const mi = fn.maintainabilityIndex > 0 ? String(fn.maintainabilityIndex) : '-'; + console.log( + ` ${name.padEnd(40)} ${file.padEnd(30)} ${String(fn.cognitive).padStart(4)} ${String(fn.cyclomatic).padStart(4)} ${String(fn.maxNesting).padStart(5)} ${mi.padStart(5)}${warn}`, + ); + } +} + export function complexity(customDbPath: string | undefined, opts: ComplexityCliOpts = {}): void { const data = complexityData(customDbPath, opts as any) as unknown as ComplexityResult; @@ -74,40 +111,9 @@ export function complexity(customDbPath: string | undefined, opts: ComplexityCli console.log(`\n# ${header}\n`); if (opts.health) { - // Health-focused view with Halstead + MI columns - console.log( - ` ${'Function'.padEnd(35)} ${'File'.padEnd(25)} ${'MI'.padStart(5)} ${'Vol'.padStart(7)} ${'Diff'.padStart(6)} ${'Effort'.padStart(9)} ${'Bugs'.padStart(6)} ${'LOC'.padStart(5)} ${'SLOC'.padStart(5)}`, - ); - console.log( - ` ${'─'.repeat(35)} ${'─'.repeat(25)} ${'─'.repeat(5)} ${'─'.repeat(7)} ${'─'.repeat(6)} ${'─'.repeat(9)} ${'─'.repeat(6)} ${'─'.repeat(5)} ${'─'.repeat(5)}`, - ); - - for (const fn of data.functions) { - const name = fn.name.length > 33 ? `${fn.name.slice(0, 32)}…` : fn.name; - const file = fn.file.length > 23 ? `…${fn.file.slice(-22)}` : fn.file; - const miWarn = fn.exceeds?.includes('maintainabilityIndex') ? '!' : ' '; - console.log( - ` ${name.padEnd(35)} ${file.padEnd(25)} ${String(fn.maintainabilityIndex).padStart(5)}${miWarn}${String(fn.halstead.volume).padStart(7)} ${String(fn.halstead.difficulty).padStart(6)} ${String(fn.halstead.effort).padStart(9)} ${String(fn.halstead.bugs).padStart(6)} ${String(fn.loc).padStart(5)} ${String(fn.sloc).padStart(5)}`, - ); - } + renderHealthTable(data.functions); } else { - // Default view with MI column appended - console.log( - ` ${'Function'.padEnd(40)} ${'File'.padEnd(30)} ${'Cog'.padStart(4)} ${'Cyc'.padStart(4)} ${'Nest'.padStart(5)} ${'MI'.padStart(5)}`, - ); - console.log( - ` ${'─'.repeat(40)} ${'─'.repeat(30)} ${'─'.repeat(4)} ${'─'.repeat(4)} ${'─'.repeat(5)} ${'─'.repeat(5)}`, - ); - - for (const fn of data.functions) { - const name = fn.name.length > 38 ? `${fn.name.slice(0, 37)}…` : fn.name; - const file = fn.file.length > 28 ? `…${fn.file.slice(-27)}` : fn.file; - const warn = fn.exceeds ? ' !' : ''; - const mi = fn.maintainabilityIndex > 0 ? String(fn.maintainabilityIndex) : '-'; - console.log( - ` ${name.padEnd(40)} ${file.padEnd(30)} ${String(fn.cognitive).padStart(4)} ${String(fn.cyclomatic).padStart(4)} ${String(fn.maxNesting).padStart(5)} ${mi.padStart(5)}${warn}`, - ); - } + renderDefaultTable(data.functions); } if (data.summary) { diff --git a/src/presentation/queries-cli/exports.ts b/src/presentation/queries-cli/exports.ts index d8e779a4..433d409b 100644 --- a/src/presentation/queries-cli/exports.ts +++ b/src/presentation/queries-cli/exports.ts @@ -97,6 +97,22 @@ function printReexportedSymbols(reexportedSymbols: ReexportedSymbol[]): void { } } +function printReexportedSection(data: ExportsDataResult, opts: ExportsOpts): void { + const totalReexported = opts.unused + ? (data.totalReexportedUnused ?? data.reexportedSymbols.length) + : (data.totalReexported ?? data.reexportedSymbols.length); + const plural = totalReexported !== 1 ? 's' : ''; + if (data.results.length === 0) { + const label = opts.unused ? 'unused re-exported' : 're-exported'; + console.log( + `\n# ${data.file} — barrel file (${totalReexported} ${label} symbol${plural} from sub-modules)\n`, + ); + } else { + console.log(`\n Re-exported symbols (${totalReexported} from sub-modules):`); + } + printReexportedSymbols(data.reexportedSymbols); +} + export function fileExports(file: string, customDbPath: string, opts: ExportsOpts = {}): void { const data = exportsData(file, customDbPath, opts) as ExportsDataResult; if (outputResult(data as unknown as Record, 'results', opts)) return; @@ -118,23 +134,7 @@ export function fileExports(file: string, customDbPath: string, opts: ExportsOpt } if (hasReexported) { - const totalReexported = opts.unused - ? (data.totalReexportedUnused ?? data.reexportedSymbols.length) - : (data.totalReexported ?? data.reexportedSymbols.length); - if (data.results.length === 0) { - if (opts.unused) { - console.log( - `\n# ${data.file} — barrel file (${totalReexported} unused re-exported symbol${totalReexported !== 1 ? 's' : ''} from sub-modules)\n`, - ); - } else { - console.log( - `\n# ${data.file} — barrel file (${totalReexported} re-exported symbol${totalReexported !== 1 ? 's' : ''} from sub-modules)\n`, - ); - } - } else { - console.log(`\n Re-exported symbols (${totalReexported} from sub-modules):`); - } - printReexportedSymbols(data.reexportedSymbols); + printReexportedSection(data, opts); } if (data.reexports.length > 0) { diff --git a/src/presentation/queries-cli/impact.ts b/src/presentation/queries-cli/impact.ts index 37852f4a..96661566 100644 --- a/src/presentation/queries-cli/impact.ts +++ b/src/presentation/queries-cli/impact.ts @@ -151,6 +151,33 @@ export function fileDeps(file: string, customDbPath: string, opts: OutputOpts = } } +function printFnDepsCallees(callees: SymbolRef[]): void { + if (callees.length === 0) return; + console.log(` -> Calls (${callees.length}):`); + for (const c of callees) console.log(` -> ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}`); +} + +function printFnDepsCallers(callers: CallerRef[]): void { + if (callers.length === 0) return; + console.log(`\n <- Called by (${callers.length}):`); + for (const c of callers) { + const via = c.viaHierarchy ? ` (via ${c.viaHierarchy})` : ''; + console.log(` <- ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}${via}`); + } +} + +function printFnDepsTransitive(transitiveCallers: Record): void { + for (const [d, fns] of Object.entries(transitiveCallers)) { + const depth = parseInt(d, 10); + console.log(`\n ${'<-'.repeat(depth)} Transitive callers (depth ${d}, ${fns.length}):`); + for (const n of fns.slice(0, 20)) + console.log( + ` ${' '.repeat(depth - 1)}<- ${kindIcon(n.kind)} ${n.name} ${n.file}:${n.line}`, + ); + if (fns.length > 20) console.log(` ... and ${fns.length - 20} more`); + } +} + export function fnDeps(name: string, customDbPath: string, opts: OutputOpts = {}): void { const data = fnDepsData(name, customDbPath, opts) as unknown as FnDepsData; if (outputResult(data as unknown as Record, 'results', opts)) return; @@ -162,28 +189,9 @@ export function fnDeps(name: string, customDbPath: string, opts: OutputOpts = {} for (const r of data.results) { console.log(`\n${kindIcon(r.kind)} ${r.name} (${r.kind}) -- ${r.file}:${r.line}\n`); - if (r.callees.length > 0) { - console.log(` -> Calls (${r.callees.length}):`); - for (const c of r.callees) - console.log(` -> ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}`); - } - if (r.callers.length > 0) { - console.log(`\n <- Called by (${r.callers.length}):`); - for (const c of r.callers) { - const via = c.viaHierarchy ? ` (via ${c.viaHierarchy})` : ''; - console.log(` <- ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}${via}`); - } - } - for (const [d, fns] of Object.entries(r.transitiveCallers)) { - console.log( - `\n ${'<-'.repeat(parseInt(d, 10))} Transitive callers (depth ${d}, ${fns.length}):`, - ); - for (const n of fns.slice(0, 20)) - console.log( - ` ${' '.repeat(parseInt(d, 10) - 1)}<- ${kindIcon(n.kind)} ${n.name} ${n.file}:${n.line}`, - ); - if (fns.length > 20) console.log(` ... and ${fns.length - 20} more`); - } + printFnDepsCallees(r.callees); + printFnDepsCallers(r.callers); + printFnDepsTransitive(r.transitiveCallers); if (r.callees.length === 0 && r.callers.length === 0) { console.log(` (no call edges found -- may be invoked dynamically or via re-exports)`); }