diff --git a/src/services/api-handlers.ts b/src/services/api-handlers.ts index dc70681..e177bd2 100644 --- a/src/services/api-handlers.ts +++ b/src/services/api-handlers.ts @@ -100,7 +100,10 @@ function getProjectPathFromTag(tag: string): string | undefined { export async function handleListTags(): Promise> { try { - await embeddingService.warmup(); + // Tags are stored as SQLite metadata; embedding model is not needed. + // Calling warmup() here would block on @huggingface/transformers init in + // the worker thread and hang every read API. Only handlers that compute + // similarity (e.g. handleSearch) should warm up the embedding service. const projectShards = shardManager.getAllShards("project", ""); const tagsMap = new Map(); for (const shard of projectShards) { @@ -140,7 +143,8 @@ export async function handleListMemories( includePrompts: boolean = true ): Promise>> { try { - await embeddingService.warmup(); + // Listing only reads SQLite rows; no vector ops happen here. + // See handleListTags comment - keep embedding init out of read paths. let allMemories: any[] = []; if (tag) { const { scope: tagScope, hash } = extractScopeFromTag(tag); @@ -652,7 +656,8 @@ export async function handleStats(): Promise< }> > { try { - await embeddingService.warmup(); + // Stats only counts SQLite rows; no embedding needed. + // See handleListTags comment - keep embedding init out of read paths. const projectShards = shardManager.getAllShards("project", ""); let userCount = 0, projectCount = 0; diff --git a/src/services/embedding.ts b/src/services/embedding.ts index 8473c09..a63a2d0 100644 --- a/src/services/embedding.ts +++ b/src/services/embedding.ts @@ -1,6 +1,7 @@ import { CONFIG } from "../config.js"; import { log } from "./logger.js"; import { join } from "node:path"; +import type { PretrainedModelOptions } from "@huggingface/transformers"; const TIMEOUT_MS = 30000; const GLOBAL_EMBEDDING_KEY = Symbol.for("opencode-mem.embedding.instance"); @@ -17,6 +18,14 @@ async function ensureTransformersLoaded(): Promise 1. + // See https://github.com/xenova/transformers.js/pull/488 + try { + (mod.env as any).backends.onnx.wasm.numThreads = 1; + } catch (e) { + log("Failed to set wasm.numThreads", { error: String(e) }); + } _transformers = mod; return _transformers!; } @@ -56,9 +65,14 @@ export class EmbeddingService { return; } const { pipeline } = await ensureTransformersLoaded(); - this.pipe = await pipeline("feature-extraction", CONFIG.embeddingModel, { + const pipelineOptions: PretrainedModelOptions = { progress_callback: progressCallback, - }); + // Force quantized ONNX. Default is fp32 model.onnx which transformers v4 + // tries to download from huggingface.co; cache only ships model_quantized.onnx + // and HF is unreachable behind GFW, causing init to fail. + dtype: "q8", + }; + this.pipe = await pipeline("feature-extraction", CONFIG.embeddingModel, pipelineOptions); this.isWarmedUp = true; } catch (error) { this.initPromise = null;