From c21c38786f47c2c9bcbc9a188c0bffd2160f7af0 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 25 Feb 2026 02:37:50 -0700 Subject: [PATCH 1/3] chore: configure bge-large as default embedding model Sets Xenova/bge-large-en-v1.5 (1024d) as the default embedding model for codegraph self-analysis, replacing the default minilm. --- .codegraphrc.json | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .codegraphrc.json diff --git a/.codegraphrc.json b/.codegraphrc.json new file mode 100644 index 00000000..98663830 --- /dev/null +++ b/.codegraphrc.json @@ -0,0 +1,3 @@ +{ + "embeddings": { "model": "bge-large" } +} From 77ffffcac566dbb84a95bece1b3d5ae432cf01e1 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 25 Feb 2026 09:28:26 -0700 Subject: [PATCH 2/3] fix: make embed command respect config embeddings.model The CLI embed command hardcoded 'minilm' as the default model via Commander, ignoring .codegraphrc.json config entirely. Now the embed command reads config.embeddings.model as the default when no -m flag is passed. Also fixes DEFAULTS.embeddings.model from 'nomic-v1.5' to 'minilm' to match the actual fallback used by the embedder, and updates the models command to show the configured default. --- src/cli.js | 15 ++++++++------- src/config.js | 2 +- tests/unit/config.test.js | 2 +- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/cli.js b/src/cli.js index 60dbac63..2487ef75 100644 --- a/src/cli.js +++ b/src/cli.js @@ -423,12 +423,13 @@ program .command('models') .description('List available embedding models') .action(() => { + const defaultModel = config.embeddings?.model || 'minilm'; console.log('\nAvailable embedding models:\n'); - for (const [key, config] of Object.entries(MODELS)) { - const def = key === 'minilm' ? ' (default)' : ''; - const ctx = config.contextWindow ? `${config.contextWindow} ctx` : ''; + for (const [key, cfg] of Object.entries(MODELS)) { + const def = key === defaultModel ? ' (default)' : ''; + const ctx = cfg.contextWindow ? `${cfg.contextWindow} ctx` : ''; console.log( - ` ${key.padEnd(12)} ${String(config.dim).padStart(4)}d ${ctx.padEnd(9)} ${config.desc}${def}`, + ` ${key.padEnd(12)} ${String(cfg.dim).padStart(4)}d ${ctx.padEnd(9)} ${cfg.desc}${def}`, ); } console.log('\nUsage: codegraph embed --model --strategy '); @@ -442,8 +443,7 @@ program ) .option( '-m, --model ', - 'Embedding model: minilm (default), jina-small, jina-base, jina-code, nomic, nomic-v1.5, bge-large. Run `codegraph models` for details', - 'minilm', + 'Embedding model (default from config or minilm). Run `codegraph models` for details', ) .option( '-s, --strategy ', @@ -458,7 +458,8 @@ program process.exit(1); } const root = path.resolve(dir || '.'); - await buildEmbeddings(root, opts.model, undefined, { strategy: opts.strategy }); + const model = opts.model || config.embeddings?.model || 'minilm'; + await buildEmbeddings(root, model, undefined, { strategy: opts.strategy }); }); program diff --git a/src/config.js b/src/config.js index 5e90e5a1..4bee5a58 100644 --- a/src/config.js +++ b/src/config.js @@ -20,7 +20,7 @@ export const DEFAULTS = { defaultLimit: 20, excludeTests: false, }, - embeddings: { model: 'nomic-v1.5', llmProvider: null }, + embeddings: { model: 'minilm', llmProvider: null }, llm: { provider: null, model: null, baseUrl: null, apiKey: null, apiKeyCommand: null }, search: { defaultMinScore: 0.2, rrfK: 60, topK: 15 }, ci: { failOnCycles: false, impactThreshold: null }, diff --git a/tests/unit/config.test.js b/tests/unit/config.test.js index e922abe5..c005e6cb 100644 --- a/tests/unit/config.test.js +++ b/tests/unit/config.test.js @@ -55,7 +55,7 @@ describe('DEFAULTS', () => { }); it('has embeddings defaults', () => { - expect(DEFAULTS.embeddings).toEqual({ model: 'nomic-v1.5', llmProvider: null }); + expect(DEFAULTS.embeddings).toEqual({ model: 'minilm', llmProvider: null }); }); it('has llm defaults', () => { From 832fa49578bd51ffe3166cbbdc8dee71a51ef8c2 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 25 Feb 2026 09:32:56 -0700 Subject: [PATCH 3/3] fix: use DEFAULT_MODEL as single source of truth for embed default Change DEFAULT_MODEL in embedder.js from 'minilm' to 'nomic-v1.5' to match the intended default. Import DEFAULT_MODEL in cli.js instead of hardcoding strings. The embed command now resolves the model as: CLI flag > config.embeddings.model > DEFAULT_MODEL. Restores config.js DEFAULTS.embeddings.model to 'nomic-v1.5' (was incorrectly changed in prior commit). --- src/cli.js | 12 +++++++++--- src/config.js | 2 +- src/embedder.js | 2 +- tests/unit/config.test.js | 2 +- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/cli.js b/src/cli.js index 2487ef75..e048ac44 100644 --- a/src/cli.js +++ b/src/cli.js @@ -7,7 +7,13 @@ import { buildGraph } from './builder.js'; import { loadConfig } from './config.js'; import { findCycles, formatCycles } from './cycles.js'; import { openReadonlyOrFail } from './db.js'; -import { buildEmbeddings, EMBEDDING_STRATEGIES, MODELS, search } from './embedder.js'; +import { + buildEmbeddings, + DEFAULT_MODEL, + EMBEDDING_STRATEGIES, + MODELS, + search, +} from './embedder.js'; import { exportDOT, exportJSON, exportMermaid } from './export.js'; import { setVerbose } from './logger.js'; import { @@ -423,7 +429,7 @@ program .command('models') .description('List available embedding models') .action(() => { - const defaultModel = config.embeddings?.model || 'minilm'; + const defaultModel = config.embeddings?.model || DEFAULT_MODEL; console.log('\nAvailable embedding models:\n'); for (const [key, cfg] of Object.entries(MODELS)) { const def = key === defaultModel ? ' (default)' : ''; @@ -458,7 +464,7 @@ program process.exit(1); } const root = path.resolve(dir || '.'); - const model = opts.model || config.embeddings?.model || 'minilm'; + const model = opts.model || config.embeddings?.model || DEFAULT_MODEL; await buildEmbeddings(root, model, undefined, { strategy: opts.strategy }); }); diff --git a/src/config.js b/src/config.js index 4bee5a58..5e90e5a1 100644 --- a/src/config.js +++ b/src/config.js @@ -20,7 +20,7 @@ export const DEFAULTS = { defaultLimit: 20, excludeTests: false, }, - embeddings: { model: 'minilm', llmProvider: null }, + embeddings: { model: 'nomic-v1.5', llmProvider: null }, llm: { provider: null, model: null, baseUrl: null, apiKey: null, apiKeyCommand: null }, search: { defaultMinScore: 0.2, rrfK: 60, topK: 15 }, ci: { failOnCycles: false, impactThreshold: null }, diff --git a/src/embedder.js b/src/embedder.js index 938a5976..4aba1e7d 100644 --- a/src/embedder.js +++ b/src/embedder.js @@ -98,7 +98,7 @@ export const MODELS = { export const EMBEDDING_STRATEGIES = ['structured', 'source']; -export const DEFAULT_MODEL = 'minilm'; +export const DEFAULT_MODEL = 'nomic-v1.5'; const BATCH_SIZE_MAP = { minilm: 32, 'jina-small': 16, diff --git a/tests/unit/config.test.js b/tests/unit/config.test.js index c005e6cb..e922abe5 100644 --- a/tests/unit/config.test.js +++ b/tests/unit/config.test.js @@ -55,7 +55,7 @@ describe('DEFAULTS', () => { }); it('has embeddings defaults', () => { - expect(DEFAULTS.embeddings).toEqual({ model: 'minilm', llmProvider: null }); + expect(DEFAULTS.embeddings).toEqual({ model: 'nomic-v1.5', llmProvider: null }); }); it('has llm defaults', () => {