Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions src/services/api-handlers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,10 @@ function getProjectPathFromTag(tag: string): string | undefined {

export async function handleListTags(): Promise<ApiResponse<{ project: TagInfo[] }>> {
try {
await embeddingService.warmup();
// Tags are stored as SQLite metadata; embedding model is not needed.
// Calling warmup() here would block on @huggingface/transformers init in
// the worker thread and hang every read API. Only handlers that compute
// similarity (e.g. handleSearch) should warm up the embedding service.
const projectShards = shardManager.getAllShards("project", "");
const tagsMap = new Map<string, TagInfo>();
for (const shard of projectShards) {
Expand Down Expand Up @@ -140,7 +143,8 @@ export async function handleListMemories(
includePrompts: boolean = true
): Promise<ApiResponse<PaginatedResponse<Memory | any>>> {
try {
await embeddingService.warmup();
// Listing only reads SQLite rows; no vector ops happen here.
// See handleListTags comment - keep embedding init out of read paths.
let allMemories: any[] = [];
if (tag) {
const { scope: tagScope, hash } = extractScopeFromTag(tag);
Expand Down Expand Up @@ -652,7 +656,8 @@ export async function handleStats(): Promise<
}>
> {
try {
await embeddingService.warmup();
// Stats only counts SQLite rows; no embedding needed.
// See handleListTags comment - keep embedding init out of read paths.
const projectShards = shardManager.getAllShards("project", "");
let userCount = 0,
projectCount = 0;
Expand Down
18 changes: 16 additions & 2 deletions src/services/embedding.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { CONFIG } from "../config.js";
import { log } from "./logger.js";
import { join } from "node:path";
import type { PretrainedModelOptions } from "@huggingface/transformers";

const TIMEOUT_MS = 30000;
const GLOBAL_EMBEDDING_KEY = Symbol.for("opencode-mem.embedding.instance");
Expand All @@ -17,6 +18,14 @@ async function ensureTransformersLoaded(): Promise<NonNullable<typeof _transform
mod.env.allowLocalModels = true;
mod.env.allowRemoteModels = true;
mod.env.cacheDir = join(CONFIG.storagePath, ".cache");
// CRITICAL: Disable WASM multi-threading. In Node.js/Bun (no SharedArrayBuffer),
// ONNX runtime hangs indefinitely during pipeline() init when threads > 1.
// See https://github.com/xenova/transformers.js/pull/488
try {
(mod.env as any).backends.onnx.wasm.numThreads = 1;
} catch (e) {
log("Failed to set wasm.numThreads", { error: String(e) });
Comment on lines +25 to +27
Copy link

Copilot AI Apr 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The numThreads override is described as critical to avoid an indefinite hang, but failures are currently swallowed (only logged) and init continues. If this assignment fails at runtime, pipeline() can still hang and the root issue returns; consider failing fast (throw) or explicitly validating/creating the nested env.backends.onnx.wasm object and asserting the final value before proceeding.

Suggested change
(mod.env as any).backends.onnx.wasm.numThreads = 1;
} catch (e) {
log("Failed to set wasm.numThreads", { error: String(e) });
const envWithBackends = mod.env as any;
envWithBackends.backends ??= {};
envWithBackends.backends.onnx ??= {};
envWithBackends.backends.onnx.wasm ??= {};
envWithBackends.backends.onnx.wasm.numThreads = 1;
if (envWithBackends.backends.onnx.wasm.numThreads !== 1) {
throw new Error("Failed to verify wasm.numThreads override");
}
} catch (e) {
log("Failed to set wasm.numThreads", { error: String(e) });
throw new Error(`Failed to initialize transformers WASM threading configuration: ${String(e)}`);

Copilot uses AI. Check for mistakes.
}
_transformers = mod;
return _transformers!;
}
Expand Down Expand Up @@ -56,9 +65,14 @@ export class EmbeddingService {
return;
}
const { pipeline } = await ensureTransformersLoaded();
this.pipe = await pipeline("feature-extraction", CONFIG.embeddingModel, {
const pipelineOptions: PretrainedModelOptions = {
progress_callback: progressCallback,
});
// Force quantized ONNX. Default is fp32 model.onnx which transformers v4
// tries to download from huggingface.co; cache only ships model_quantized.onnx
// and HF is unreachable behind GFW, causing init to fail.
dtype: "q8",
};
this.pipe = await pipeline("feature-extraction", CONFIG.embeddingModel, pipelineOptions);
this.isWarmedUp = true;
} catch (error) {
this.initPromise = null;
Expand Down