From f455c8ef4f4a2c91b86e0b6652f043649ac1b1c7 Mon Sep 17 00:00:00 2001 From: Stum Huang Date: Thu, 23 Apr 2026 09:15:49 +0800 Subject: [PATCH 1/3] fix(embedding): prevent pipeline() hang in Node/Bun runtime Two independent issues caused pipeline("feature-extraction", ...) to hang indefinitely (35s+) on first call, poisoning initPromise so every subsequent embed() blocked forever. Symptom: web UI blank, /api/search returned "Empty reply from server". 1. ONNX WASM threading deadlock @huggingface/transformers v4 defaults wasm.numThreads > 1, but Node.js and Bun lack SharedArrayBuffer support, so onnxruntime-web deadlocks during pipeline init. Fixed by forcing numThreads=1 in ensureTransformersLoaded(). Ref xenova/transformers.js#488. 2. dtype default mismatch transformers v4 default dtype tries to load model.onnx (fp32, ~500MB). The cached model directory only ships model_quantized.onnx, so pipeline falls back to a network fetch from huggingface.co. In restricted networks this fails with "Unable to connect". Fixed by passing dtype: "q8" to the pipeline() options so the local quantized model is used unconditionally. After both fixes, pipeline ready in ~2.3s and /api/search returns real results (similarity 0.457, vecLen=768). --- src/services/embedding.ts | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/services/embedding.ts b/src/services/embedding.ts index 8473c09..f99c906 100644 --- a/src/services/embedding.ts +++ b/src/services/embedding.ts @@ -17,6 +17,14 @@ async function ensureTransformersLoaded(): Promise 1. + // See https://github.com/xenova/transformers.js/pull/488 + try { + (mod.env as any).backends.onnx.wasm.numThreads = 1; + } catch (e) { + log("Failed to set wasm.numThreads", { error: String(e) }); + } _transformers = mod; return _transformers!; } @@ -58,7 +66,11 @@ export class EmbeddingService { const { pipeline } = await ensureTransformersLoaded(); this.pipe = await pipeline("feature-extraction", CONFIG.embeddingModel, { progress_callback: progressCallback, - }); + // Force quantized ONNX. Default is fp32 model.onnx which transformers v4 + // tries to download from huggingface.co; cache only ships model_quantized.onnx + // and HF is unreachable behind GFW, causing init to fail. + dtype: "q8", + } as any); this.isWarmedUp = true; } catch (error) { this.initPromise = null; From 20748bec021e9ba744a6105023e1564c5dfab1ff Mon Sep 17 00:00:00 2001 From: Stum Huang Date: Thu, 23 Apr 2026 09:16:18 +0800 Subject: [PATCH 2/3] fix(api): remove embedding warmup from read-only handlers handleListTags, handleListMemories, and handleStats each awaited embeddingService.warmup() before serving. These handlers only read SQLite/sqlite-vec rows and never compute query embeddings, so the coupling was unnecessary. When warmup() stalled (or simply took a few seconds on cold start), the entire web UI went blank because every read endpoint blocked behind the embedding model load. Removed the warmup() calls from the three read paths. handleSearch still warms up because it needs the query vector. Net effect: /api/stats, /api/tags, and /api/memories now respond immediately even when the embedding model has not been loaded yet, so the dashboard can render before the first search query. --- src/services/api-handlers.ts | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/services/api-handlers.ts b/src/services/api-handlers.ts index dc70681..e177bd2 100644 --- a/src/services/api-handlers.ts +++ b/src/services/api-handlers.ts @@ -100,7 +100,10 @@ function getProjectPathFromTag(tag: string): string | undefined { export async function handleListTags(): Promise> { try { - await embeddingService.warmup(); + // Tags are stored as SQLite metadata; embedding model is not needed. + // Calling warmup() here would block on @huggingface/transformers init in + // the worker thread and hang every read API. Only handlers that compute + // similarity (e.g. handleSearch) should warm up the embedding service. const projectShards = shardManager.getAllShards("project", ""); const tagsMap = new Map(); for (const shard of projectShards) { @@ -140,7 +143,8 @@ export async function handleListMemories( includePrompts: boolean = true ): Promise>> { try { - await embeddingService.warmup(); + // Listing only reads SQLite rows; no vector ops happen here. + // See handleListTags comment - keep embedding init out of read paths. let allMemories: any[] = []; if (tag) { const { scope: tagScope, hash } = extractScopeFromTag(tag); @@ -652,7 +656,8 @@ export async function handleStats(): Promise< }> > { try { - await embeddingService.warmup(); + // Stats only counts SQLite rows; no embedding needed. + // See handleListTags comment - keep embedding init out of read paths. const projectShards = shardManager.getAllShards("project", ""); let userCount = 0, projectCount = 0; From 9e50c2658699f9b6488221f35dcd440e10005265 Mon Sep 17 00:00:00 2001 From: Stum Huang Date: Thu, 23 Apr 2026 09:34:58 +0800 Subject: [PATCH 3/3] fix(embedding): replace 'as any' with PretrainedModelOptions type Per Copilot review on PR #100: the 'as any' cast on pipeline() options silently dropped compile-time validation of the dtype key, which is the exact protection that prevents an unwanted fp32 model.onnx download. Use the official PretrainedModelOptions type so any future typo in dtype or other option keys fails at tsc time instead of at runtime. --- src/services/embedding.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/services/embedding.ts b/src/services/embedding.ts index f99c906..a63a2d0 100644 --- a/src/services/embedding.ts +++ b/src/services/embedding.ts @@ -1,6 +1,7 @@ import { CONFIG } from "../config.js"; import { log } from "./logger.js"; import { join } from "node:path"; +import type { PretrainedModelOptions } from "@huggingface/transformers"; const TIMEOUT_MS = 30000; const GLOBAL_EMBEDDING_KEY = Symbol.for("opencode-mem.embedding.instance"); @@ -64,13 +65,14 @@ export class EmbeddingService { return; } const { pipeline } = await ensureTransformersLoaded(); - this.pipe = await pipeline("feature-extraction", CONFIG.embeddingModel, { + const pipelineOptions: PretrainedModelOptions = { progress_callback: progressCallback, // Force quantized ONNX. Default is fp32 model.onnx which transformers v4 // tries to download from huggingface.co; cache only ships model_quantized.onnx // and HF is unreachable behind GFW, causing init to fail. dtype: "q8", - } as any); + }; + this.pipe = await pipeline("feature-extraction", CONFIG.embeddingModel, pipelineOptions); this.isWarmedUp = true; } catch (error) { this.initPromise = null;