fix: support repeated --file flag for multi-file scoping#498
fix: support repeated --file flag for multi-file scoping#498carlos-alm merged 10 commits intomainfrom
Conversation
Make --file variadic via Commander accumulator so passing --file a.js --file b.js collects both values into an array. - Add normalizeFileFilter(), buildFileConditionSQL(), and collectFile() helpers to query-builder.js - Update NodeQuery.fileFilter() to accept string | string[] - Update all 16 CLI command definitions to use collectFile accumulator - Update all downstream SQL consumers (complexity, cfg, dataflow, manifesto, ast, roles, generators, nodes) to use buildFileConditionSQL - Update in-memory repository to handle array file filters - Update search layer (keyword, prepare, filters) for array filePattern - Add tests for normalizeFileFilter, buildFileConditionSQL, collectFile, and NodeQuery.fileFilter with arrays
Greptile SummaryThis PR adds repeatable One bug remains:
The unit test suite is solid, though the multi-value Confidence Score: 3/5
Important Files Changed
Flowchart%%{init: {'theme': 'neutral'}}%%
flowchart TD
CLI["CLI --file a.js --file b.js\n(collectFile accumulator)"]
CLI --> NFF["normalizeFileFilter(file)\nstring | string[] → string[]"]
NFF -->|"empty / falsy"| EMPTY["return [] (no-op)"]
NFF -->|"string"| WRAP["return [file]"]
NFF -->|"array"| PASS["return file as-is\n⚠ empty strings not filtered"]
WRAP --> BFC
PASS --> BFC
BFC["buildFileConditionSQL(files, column)\n+ validateColumn\n+ escapeLike"]
BFC -->|"0 files"| NOSQL["{ sql: '', params: [] }"]
BFC -->|"1 file"| SINGLE["AND col LIKE ? ESCAPE '\\'"]
BFC -->|"N files"| MULTI["AND (col LIKE ? ESCAPE '\\'\nOR col LIKE ? ESCAPE '\\')"]
SINGLE --> CONSUMERS
MULTI --> CONSUMERS
CONSUMERS["Query Consumers\n(complexity, roles, nodes,\nfind-nodes, generators…)"]
CONSUMERS --> SQL["Parameterised SQL\nwith ESCAPE clause"]
NFF2["normalizeFileFilter (owners.js)\n+ JS Array.filter"]
NFF2 -->|"each entry"| INCL["f.includes(filter)\n⚠ ''.includes always true"]
CLI --> NFF2
GLOB["FTS / Embed search\n(keyword.js / prepare.js)"]
CLI --> GLOB
GLOB -->|"!isGlob"| BFC2["buildFileConditionSQL → SQL pre-filter"]
GLOB -->|"isGlob"| POSTFILT["applyFilters (per-pattern:\nglobMatch or includes)"]
|
src/domain/search/search/filters.js
Outdated
| const isGlob = | ||
| opts.isGlob !== undefined ? opts.isGlob : opts.filePattern && /[*?[\]]/.test(opts.filePattern); | ||
| opts.isGlob !== undefined | ||
| ? opts.isGlob | ||
| : fpArr.length > 0 && fpArr.some((p) => /[*?[\]]/.test(p)); | ||
| if (isGlob) { | ||
| filtered = filtered.filter((row) => globMatch(row.file, opts.filePattern)); | ||
| filtered = filtered.filter((row) => fpArr.some((p) => globMatch(row.file, p))); |
There was a problem hiding this comment.
Mixed glob + non-glob patterns silently break non-glob substring matching
When a user mixes glob and non-glob patterns (e.g., --file src/shared --file "*.test.js"), isGlob becomes true because at least one pattern contains a glob character. As a result, ALL patterns are routed through globMatch, which performs anchored regex matching (regex is wrapped with ^...$).
This means the non-glob pattern src/shared becomes the regex ^src/shared$, which will not match src/shared/paginate.js. The user expects a substring/LIKE match for src/shared, but gets zero results for that pattern.
The same issue exists symmetrically in keyword.js and prepare.js — when isGlob is true, SQL pre-filtering is skipped for all patterns, and then applyFilters uses globMatch for everything.
A fix would be to apply LIKE filtering for non-glob entries independently of glob filtering:
if (isGlob) {
const globPatterns = fpArr.filter((p) => /[*?[\]]/.test(p));
const likePatterns = fpArr.filter((p) => !/[*?[\]]/.test(p));
filtered = filtered.filter((row) =>
globPatterns.some((p) => globMatch(row.file, p)) ||
likePatterns.some((p) => row.file.includes(p))
);
}There was a problem hiding this comment.
Fixed in aff787c — Changed applyFilters to check isGlob per-pattern instead of for the whole array. Each pattern now individually decides whether to use globMatch (for glob patterns with *?[]) or String.includes() (for plain substring patterns). This prevents non-glob patterns from silently breaking when mixed with glob patterns.
| export function buildFileConditionSQL(file, column = 'file') { | ||
| const files = normalizeFileFilter(file); | ||
| if (files.length === 0) return { sql: '', params: [] }; | ||
| if (files.length === 1) { | ||
| return { | ||
| sql: ` AND ${column} LIKE ? ESCAPE '\\'`, | ||
| params: [`%${escapeLike(files[0])}%`], | ||
| }; | ||
| } | ||
| const clauses = files.map(() => `${column} LIKE ? ESCAPE '\\'`); | ||
| return { | ||
| sql: ` AND (${clauses.join(' OR ')})`, | ||
| params: files.map((f) => `%${escapeLike(f)}%`), | ||
| }; | ||
| } |
There was a problem hiding this comment.
column parameter not validated before SQL interpolation
buildFileConditionSQL interpolates the column argument directly into SQL without calling validateColumn(), unlike the existing testFilterSQL which validates:
export function testFilterSQL(column = 'n.file', enabled = true) {
if (!enabled) return '';
validateColumn(column); // ← validates before interpolation
...
}While all current call sites pass hardcoded strings, adding validation here keeps the security model consistent and guards against future callers passing dynamic column names:
| export function buildFileConditionSQL(file, column = 'file') { | |
| const files = normalizeFileFilter(file); | |
| if (files.length === 0) return { sql: '', params: [] }; | |
| if (files.length === 1) { | |
| return { | |
| sql: ` AND ${column} LIKE ? ESCAPE '\\'`, | |
| params: [`%${escapeLike(files[0])}%`], | |
| }; | |
| } | |
| const clauses = files.map(() => `${column} LIKE ? ESCAPE '\\'`); | |
| return { | |
| sql: ` AND (${clauses.join(' OR ')})`, | |
| params: files.map((f) => `%${escapeLike(f)}%`), | |
| }; | |
| } | |
| export function buildFileConditionSQL(file, column = 'file') { | |
| validateColumn(column); | |
| const files = normalizeFileFilter(file); | |
| if (files.length === 0) return { sql: '', params: [] }; |
There was a problem hiding this comment.
Fixed in f6a0766 — Added validateColumn(column) call at the start of buildFileConditionSQL, matching the validation pattern already used by testFilterSQL. This prevents unvalidated column names from being interpolated into SQL.
src/domain/search/search/keyword.js
Outdated
| if (fpArr.length > 0 && !isGlob) { | ||
| if (fpArr.length === 1) { | ||
| sql += ' AND n.file LIKE ?'; | ||
| params.push(`%${fpArr[0]}%`); | ||
| } else { | ||
| sql += ` AND (${fpArr.map(() => 'n.file LIKE ?').join(' OR ')})`; | ||
| params.push(...fpArr.map((f) => `%${f}%`)); | ||
| } |
There was a problem hiding this comment.
Inline LIKE filter missing
ESCAPE clause and wildcard escaping
This file (and the parallel code in prepare.js lines 49–56) has its own inline LIKE implementation instead of reusing buildFileConditionSQL. The inline version doesn't call escapeLike() and omits the ESCAPE '\\' clause, unlike buildFileConditionSQL:
// keyword.js / prepare.js — no escaping, no ESCAPE clause
sql += ` AND (${fpArr.map(() => 'n.file LIKE ?').join(' OR ')})`;
params.push(...fpArr.map((f) => `%${f}%`));
// buildFileConditionSQL — consistent escaping
sql: ` AND (${clauses.join(' OR ')})`, // uses ESCAPE '\\'
params: files.map((f) => `%${escapeLike(f)}%`),A file pattern like my_utils.js (containing _) would unexpectedly match myXutils.js in the search command but not in other commands. Reusing buildFileConditionSQL here would close the gap:
import { buildFileConditionSQL } from '../../../db/query-builder.js';
// ...
const fc = buildFileConditionSQL(fpArr.length === 1 ? fpArr[0] : fpArr, 'n.file');
if (fc.sql) {
sql += fc.sql;
params.push(...fc.params);
}The same fix applies to src/domain/search/search/prepare.js.
There was a problem hiding this comment.
Fixed in f6edc13 — Refactored the inline LIKE filter in keyword.js to reuse \ instead of hand-rolling LIKE clauses. This gets proper \ and \ handling for free, eliminating the SQL injection risk from unescaped wildcards.
src/cli/commands/search.js
Outdated
| // --file collects into an array; search supports single string filePattern, | ||
| // so pass first element for single value, or join for backward compat. | ||
| const fileArr = opts.file || []; | ||
| const filePattern = | ||
| fileArr.length === 1 ? fileArr[0] : fileArr.length > 1 ? fileArr : undefined; |
There was a problem hiding this comment.
Inaccurate comment — array is passed directly, not joined
The comment says "or join for backward compat", but the code actually passes the raw array when there are multiple values — it never calls .join(). The comment is misleading:
| // --file collects into an array; search supports single string filePattern, | |
| // so pass first element for single value, or join for backward compat. | |
| const fileArr = opts.file || []; | |
| const filePattern = | |
| fileArr.length === 1 ? fileArr[0] : fileArr.length > 1 ? fileArr : undefined; | |
| // --file collects into an array; pass the string directly for a single value, | |
| // or the full array for multi-file filtering. | |
| const fileArr = opts.file || []; | |
| const filePattern = | |
| fileArr.length === 1 ? fileArr[0] : fileArr.length > 1 ? fileArr : undefined; |
There was a problem hiding this comment.
Fixed in bb07f13 — Updated the comment to accurately describe what the code does: pass single element unwrapped for single value, or pass the raw array for multi-file scoping.
Impact: 29 functions changed, 178 affected
When a user mixes glob and non-glob patterns, the previous code used a single isGlob flag for the whole array. This caused plain substring patterns to go through globMatch (anchored regex), silently breaking non-glob matches. Now each pattern individually decides whether to use glob matching or substring matching. Impact: 1 functions changed, 1 affected
Add validateColumn() call at the start of buildFileConditionSQL to prevent unvalidated SQL interpolation, matching the pattern used by testFilterSQL. Impact: 1 functions changed, 1 affected
…ping The inline LIKE filter was missing escapeLike() and the ESCAPE '\' clause. Refactored to reuse buildFileConditionSQL which handles both correctly. Impact: 1 functions changed, 0 affected
The comment said "join for backward compat" but the code actually passes the raw array for multi-file scoping. Updated to match reality. Impact: 1 functions changed, 0 affected
| if (fpArr.length > 0 && !isGlob) { | ||
| if (fpArr.length === 1) { | ||
| conditions.push('n.file LIKE ?'); | ||
| params.push(`%${fpArr[0]}%`); | ||
| } else { | ||
| conditions.push(`(${fpArr.map(() => 'n.file LIKE ?').join(' OR ')})`); | ||
| params.push(...fpArr.map((f) => `%${f}%`)); | ||
| } | ||
| } |
There was a problem hiding this comment.
Missing
escapeLike() and ESCAPE clause (inconsistent with keyword.js)
The inline LIKE filter in prepare.js does not call escapeLike() on the file patterns and omits the ESCAPE '\\' clause. This is exactly the same issue that was fixed for keyword.js in this PR (which now delegates to buildFileConditionSQL), but prepare.js was not given the same treatment.
Concrete impact: if a user passes --file src/my_utils.js, the underscore is a SQL LIKE wildcard and will match src/myXutils.js. Similarly, a % in a path would match anything.
The fix is the same pattern already used in keyword.js:
| if (fpArr.length > 0 && !isGlob) { | |
| if (fpArr.length === 1) { | |
| conditions.push('n.file LIKE ?'); | |
| params.push(`%${fpArr[0]}%`); | |
| } else { | |
| conditions.push(`(${fpArr.map(() => 'n.file LIKE ?').join(' OR ')})`); | |
| params.push(...fpArr.map((f) => `%${f}%`)); | |
| } | |
| } | |
| if (fpArr.length > 0 && !isGlob) { | |
| const fc = buildFileConditionSQL(fpArr, 'n.file'); | |
| conditions.push(fc.sql.replace(/^ AND /, '')); | |
| params.push(...fc.params); | |
| } |
You'll also need to add the import at the top of the file:
import { buildFileConditionSQL } from '../../../db/query-builder.js';There was a problem hiding this comment.
Fixed in 8b98e40 — imported \ from \ and added \ clause to both the single-file and multi-file LIKE conditions in , matching the pattern already used in .
There was a problem hiding this comment.
Already fixed — prepare.js now imports escapeLike from query-builder.js (line 2) and uses it with ESCAPE '\' clauses on lines 55-59. This was addressed in the same batch of fixes as the keyword.js fix.
Impact: 1 functions changed, 7 affected
1 similar comment
Stop passing a pre-computed isGlob boolean to applyFilters — it applied one flag to all patterns, so mixed glob + non-glob patterns (e.g. --file "*.js" --file "utils") would treat non-glob patterns as globs. Now each pattern is individually checked for glob chars. Impact: 3 functions changed, 2 affected
|
Addressed Greptile's outside-diff P1 about mixed glob/non-glob Fixed in 23db20b — stopped passing the pre-computed |
Impact: 225 functions changed, 103 affected
Impact: 124 functions changed, 57 affected
Summary
Closes #483
--filenow collects all values into an array when repeated (e.g.--file a.js --file b.js)collectFileaccumulator,normalizeFileFilter, andbuildFileConditionSQLhelpers insrc/db/query-builder.js--fileto use the accumulatorAND (file LIKE ? OR file LIKE ?)clauses for multi-file filters--filevalue still works as beforeTest plan
codegraph complexity --file src/shared/paginate.js --file src/shared/errors.jsreturns results from both files--filestill works as beforenormalizeFileFilter,buildFileConditionSQL,collectFileNodeQuery.fileFilter()works with arrays