diff --git a/.codegraphrc.json b/.codegraphrc.json new file mode 100644 index 00000000..98663830 --- /dev/null +++ b/.codegraphrc.json @@ -0,0 +1,3 @@ +{ + "embeddings": { "model": "bge-large" } +} diff --git a/src/cli.js b/src/cli.js index 60dbac63..e048ac44 100644 --- a/src/cli.js +++ b/src/cli.js @@ -7,7 +7,13 @@ import { buildGraph } from './builder.js'; import { loadConfig } from './config.js'; import { findCycles, formatCycles } from './cycles.js'; import { openReadonlyOrFail } from './db.js'; -import { buildEmbeddings, EMBEDDING_STRATEGIES, MODELS, search } from './embedder.js'; +import { + buildEmbeddings, + DEFAULT_MODEL, + EMBEDDING_STRATEGIES, + MODELS, + search, +} from './embedder.js'; import { exportDOT, exportJSON, exportMermaid } from './export.js'; import { setVerbose } from './logger.js'; import { @@ -423,12 +429,13 @@ program .command('models') .description('List available embedding models') .action(() => { + const defaultModel = config.embeddings?.model || DEFAULT_MODEL; console.log('\nAvailable embedding models:\n'); - for (const [key, config] of Object.entries(MODELS)) { - const def = key === 'minilm' ? ' (default)' : ''; - const ctx = config.contextWindow ? `${config.contextWindow} ctx` : ''; + for (const [key, cfg] of Object.entries(MODELS)) { + const def = key === defaultModel ? ' (default)' : ''; + const ctx = cfg.contextWindow ? `${cfg.contextWindow} ctx` : ''; console.log( - ` ${key.padEnd(12)} ${String(config.dim).padStart(4)}d ${ctx.padEnd(9)} ${config.desc}${def}`, + ` ${key.padEnd(12)} ${String(cfg.dim).padStart(4)}d ${ctx.padEnd(9)} ${cfg.desc}${def}`, ); } console.log('\nUsage: codegraph embed --model --strategy '); @@ -442,8 +449,7 @@ program ) .option( '-m, --model ', - 'Embedding model: minilm (default), jina-small, jina-base, jina-code, nomic, nomic-v1.5, bge-large. Run `codegraph models` for details', - 'minilm', + 'Embedding model (default from config or minilm). Run `codegraph models` for details', ) .option( '-s, --strategy ', @@ -458,7 +464,8 @@ program process.exit(1); } const root = path.resolve(dir || '.'); - await buildEmbeddings(root, opts.model, undefined, { strategy: opts.strategy }); + const model = opts.model || config.embeddings?.model || DEFAULT_MODEL; + await buildEmbeddings(root, model, undefined, { strategy: opts.strategy }); }); program diff --git a/src/embedder.js b/src/embedder.js index 938a5976..4aba1e7d 100644 --- a/src/embedder.js +++ b/src/embedder.js @@ -98,7 +98,7 @@ export const MODELS = { export const EMBEDDING_STRATEGIES = ['structured', 'source']; -export const DEFAULT_MODEL = 'minilm'; +export const DEFAULT_MODEL = 'nomic-v1.5'; const BATCH_SIZE_MAP = { minilm: 32, 'jina-small': 16,