Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 126 additions & 0 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -146,3 +146,129 @@ jobs:
--head "$BRANCH" \
--title "docs: update embedding benchmarks" \
--body "Automated embedding benchmark update from workflow run [#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})."

query-benchmark:
runs-on: ubuntu-latest
if: >-
github.event_name == 'workflow_dispatch' ||
github.event.workflow_run.conclusion == 'success'
permissions:
contents: write
pull-requests: write

steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
ref: main
token: ${{ secrets.GITHUB_TOKEN }}

- uses: actions/setup-node@v4
with:
node-version: "22"

- run: npm install

- name: Run query benchmark
run: node scripts/query-benchmark.js 2>/dev/null > query-benchmark-result.json

- name: Update query report
run: node scripts/update-query-report.js query-benchmark-result.json

- name: Upload query result
uses: actions/upload-artifact@v4
with:
name: query-benchmark-result
path: query-benchmark-result.json

- name: Check for changes
id: changes
run: |
if git diff --quiet HEAD -- generated/QUERY-BENCHMARKS.md; then
echo "changed=false" >> "$GITHUB_OUTPUT"
else
echo "changed=true" >> "$GITHUB_OUTPUT"
fi

- name: Commit and push via PR
if: steps.changes.outputs.changed == 'true'
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"

BRANCH="benchmark/query-$(date +%Y%m%d-%H%M%S)"
git checkout -b "$BRANCH"
git add generated/QUERY-BENCHMARKS.md
git commit -m "docs: update query benchmarks"
git push origin "$BRANCH"

gh pr create \
--base main \
--head "$BRANCH" \
--title "docs: update query benchmarks" \
--body "Automated query benchmark update from workflow run [#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})."

incremental-benchmark:
runs-on: ubuntu-latest
if: >-
github.event_name == 'workflow_dispatch' ||
github.event.workflow_run.conclusion == 'success'
permissions:
contents: write
pull-requests: write

steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
ref: main
token: ${{ secrets.GITHUB_TOKEN }}

- uses: actions/setup-node@v4
with:
node-version: "22"

- run: npm install

- name: Run incremental benchmark
run: node scripts/incremental-benchmark.js 2>/dev/null > incremental-benchmark-result.json

- name: Update incremental report
run: node scripts/update-incremental-report.js incremental-benchmark-result.json

- name: Upload incremental result
uses: actions/upload-artifact@v4
with:
name: incremental-benchmark-result
path: incremental-benchmark-result.json

- name: Check for changes
id: changes
run: |
if git diff --quiet HEAD -- generated/INCREMENTAL-BENCHMARKS.md; then
echo "changed=false" >> "$GITHUB_OUTPUT"
else
echo "changed=true" >> "$GITHUB_OUTPUT"
fi

- name: Commit and push via PR
if: steps.changes.outputs.changed == 'true'
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"

BRANCH="benchmark/incremental-$(date +%Y%m%d-%H%M%S)"
git checkout -b "$BRANCH"
git add generated/INCREMENTAL-BENCHMARKS.md
git commit -m "docs: update incremental benchmarks"
git push origin "$BRANCH"

gh pr create \
--base main \
--head "$BRANCH" \
--title "docs: update incremental benchmarks" \
--body "Automated incremental benchmark update from workflow run [#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})."
2 changes: 2 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,8 @@ description.
|-----------|-----------------|-------------|
| `node scripts/benchmark.js` | Build speed (native vs WASM), query latency | Changes to `builder.js`, `parser.js`, `queries.js`, `resolve.js`, `db.js`, or the native engine |
| `node scripts/embedding-benchmark.js` | Search recall (Hit@1/3/5/10) across models | Changes to `embedder.js` or embedding strategies |
| `node scripts/query-benchmark.js` | Query depth scaling, diff-impact latency | Changes to `queries.js`, `resolve.js`, or `db.js` |
| `node scripts/incremental-benchmark.js` | Incremental build, import resolution throughput | Changes to `builder.js`, `resolve.js`, `parser.js`, or `journal.js` |

### How to report results

Expand Down
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,20 @@ Self-measured on every release via CI ([build benchmarks](generated/BUILD-BENCHM

Metrics are normalized per file for cross-version comparability. Times above are for a full initial build — incremental rebuilds only re-parse changed files.

### Lightweight Footprint

<a href="https://www.npmjs.com/package/@optave/codegraph"><img src="https://img.shields.io/npm/unpacked-size/@optave/codegraph?style=flat-square&label=unpacked%20size" alt="npm unpacked size" /></a>

Only **3 runtime dependencies** — everything else is optional or a devDependency:

| Dependency | What it does | | |
|---|---|---|---|
| [better-sqlite3](https://github.com/WiseLibs/better-sqlite3) | Fast, synchronous SQLite driver | ![GitHub stars](https://img.shields.io/github/stars/WiseLibs/better-sqlite3?style=flat-square&label=%E2%AD%90) | ![npm downloads](https://img.shields.io/npm/dw/better-sqlite3?style=flat-square&label=%F0%9F%93%A5%2Fwk) |
| [commander](https://github.com/tj/commander.js) | CLI argument parsing | ![GitHub stars](https://img.shields.io/github/stars/tj/commander.js?style=flat-square&label=%E2%AD%90) | ![npm downloads](https://img.shields.io/npm/dw/commander?style=flat-square&label=%F0%9F%93%A5%2Fwk) |
| [web-tree-sitter](https://github.com/tree-sitter/tree-sitter) | WASM tree-sitter bindings | ![GitHub stars](https://img.shields.io/github/stars/tree-sitter/tree-sitter?style=flat-square&label=%E2%AD%90) | ![npm downloads](https://img.shields.io/npm/dw/web-tree-sitter?style=flat-square&label=%F0%9F%93%A5%2Fwk) |

Optional: `@huggingface/transformers` (semantic search), `@modelcontextprotocol/sdk` (MCP server) — lazy-loaded only when needed.

## 🤖 AI Agent Integration

### MCP Server
Expand Down
202 changes: 202 additions & 0 deletions scripts/incremental-benchmark.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
#!/usr/bin/env node

/**
* Incremental build benchmark — measures build tiers and import resolution.
*
* Measures full build, no-op rebuild, and single-file rebuild for both
* native and WASM engines. Also benchmarks import resolution throughput:
* native batch vs JS fallback.
*
* Usage: node scripts/incremental-benchmark.js > result.json
*/

import fs from 'node:fs';
import path from 'node:path';
import { performance } from 'node:perf_hooks';
import { fileURLToPath, pathToFileURL } from 'node:url';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const root = path.resolve(__dirname, '..');

const pkg = JSON.parse(fs.readFileSync(path.join(root, 'package.json'), 'utf8'));
const dbPath = path.join(root, '.codegraph', 'graph.db');

const { buildGraph } = await import(pathToFileURL(path.join(root, 'src', 'builder.js')).href);
const { statsData } = await import(pathToFileURL(path.join(root, 'src', 'queries.js')).href);
const { resolveImportPath, resolveImportsBatch, resolveImportPathJS } = await import(
pathToFileURL(path.join(root, 'src', 'resolve.js')).href
);
const { isNativeAvailable } = await import(
pathToFileURL(path.join(root, 'src', 'native.js')).href
);

// Redirect console.log to stderr so only JSON goes to stdout
const origLog = console.log;
console.log = (...args) => console.error(...args);

const RUNS = 3;
const PROBE_FILE = path.join(root, 'src', 'queries.js');

function median(arr) {
const sorted = [...arr].sort((a, b) => a - b);
const mid = Math.floor(sorted.length / 2);
return sorted.length % 2 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2;
}

function round1(n) {
return Math.round(n * 10) / 10;
}

/**
* Benchmark build tiers for a given engine.
*/
async function benchmarkBuildTiers(engine) {
// Full build (delete DB first)
const fullTimings = [];
for (let i = 0; i < RUNS; i++) {
if (fs.existsSync(dbPath)) fs.unlinkSync(dbPath);
const start = performance.now();
await buildGraph(root, { engine, incremental: false });
fullTimings.push(performance.now() - start);
}
const fullBuildMs = Math.round(median(fullTimings));

// No-op rebuild (nothing changed)
const noopTimings = [];
for (let i = 0; i < RUNS; i++) {
const start = performance.now();
await buildGraph(root, { engine, incremental: true });
noopTimings.push(performance.now() - start);
}
const noopRebuildMs = Math.round(median(noopTimings));

// 1-file change rebuild
const original = fs.readFileSync(PROBE_FILE, 'utf8');
let oneFileRebuildMs;
try {
const oneFileTimings = [];
for (let i = 0; i < RUNS; i++) {
fs.writeFileSync(PROBE_FILE, original + `\n// probe-${i}\n`);
const start = performance.now();
await buildGraph(root, { engine, incremental: true });
oneFileTimings.push(performance.now() - start);
}
oneFileRebuildMs = Math.round(median(oneFileTimings));
} finally {
fs.writeFileSync(PROBE_FILE, original);
// One final incremental build to restore DB state
await buildGraph(root, { engine, incremental: true });
}

return { fullBuildMs, noopRebuildMs, oneFileRebuildMs };
}

/**
* Collect all import pairs by scanning source files for ES import statements.
*/
function collectImportPairs() {
const srcDir = path.join(root, 'src');
const files = fs.readdirSync(srcDir).filter((f) => f.endsWith('.js'));
const importRe = /(?:^|\n)\s*import\s+.*?\s+from\s+['"]([^'"]+)['"]/g;

const pairs = [];
for (const file of files) {
const absFile = path.join(srcDir, file);
const content = fs.readFileSync(absFile, 'utf8');
let match;
while ((match = importRe.exec(content)) !== null) {
pairs.push({ fromFile: absFile, importSource: match[1] });
}
}
return pairs;
}

/**
* Benchmark import resolution: native batch vs JS fallback.
*/
function benchmarkResolve(inputs) {
const aliases = null; // codegraph itself has no path aliases

// Native batch
let nativeBatchMs = null;
let perImportNativeMs = null;
if (isNativeAvailable()) {
const timings = [];
for (let i = 0; i < RUNS; i++) {
const start = performance.now();
resolveImportsBatch(inputs, root, aliases);
timings.push(performance.now() - start);
}
nativeBatchMs = round1(median(timings));
perImportNativeMs = inputs.length > 0 ? round1(nativeBatchMs / inputs.length) : 0;
}

// JS fallback (call the exported JS implementation)
const jsTimings = [];
for (let i = 0; i < RUNS; i++) {
const start = performance.now();
for (const { fromFile, importSource } of inputs) {
resolveImportPathJS(fromFile, importSource, root, aliases);
}
jsTimings.push(performance.now() - start);
}
const jsFallbackMs = round1(median(jsTimings));
const perImportJsMs = inputs.length > 0 ? round1(jsFallbackMs / inputs.length) : 0;

return {
imports: inputs.length,
nativeBatchMs,
jsFallbackMs,
perImportNativeMs,
perImportJsMs,
};
}

// ── Run benchmarks ───────────────────────────────────────────────────────

console.error('Benchmarking WASM engine...');
const wasm = await benchmarkBuildTiers('wasm');
console.error(` full=${wasm.fullBuildMs}ms noop=${wasm.noopRebuildMs}ms 1-file=${wasm.oneFileRebuildMs}ms`);

// Get file count from the WASM-built graph
const stats = statsData(dbPath);
const files = stats.files.total;

let native = null;
if (isNativeAvailable()) {
console.error('Benchmarking native engine...');
native = await benchmarkBuildTiers('native');
console.error(` full=${native.fullBuildMs}ms noop=${native.noopRebuildMs}ms 1-file=${native.oneFileRebuildMs}ms`);
} else {
console.error('Native engine not available — skipping native build benchmark');
}

// Import resolution benchmark (uses existing graph)
console.error('Benchmarking import resolution...');
const inputs = collectImportPairs();
console.error(` ${inputs.length} import pairs collected`);
const resolve = benchmarkResolve(inputs);
console.error(` native=${resolve.nativeBatchMs}ms js=${resolve.jsFallbackMs}ms`);

// Restore console.log for JSON output
console.log = origLog;

const result = {
version: pkg.version,
date: new Date().toISOString().slice(0, 10),
files,
wasm: {
fullBuildMs: wasm.fullBuildMs,
noopRebuildMs: wasm.noopRebuildMs,
oneFileRebuildMs: wasm.oneFileRebuildMs,
},
native: native
? {
fullBuildMs: native.fullBuildMs,
noopRebuildMs: native.noopRebuildMs,
oneFileRebuildMs: native.oneFileRebuildMs,
}
: null,
resolve,
};

console.log(JSON.stringify(result, null, 2));
Loading