diff --git a/src/services/api-handlers.ts b/src/services/api-handlers.ts
index dc70681..e177bd2 100644
--- a/src/services/api-handlers.ts
+++ b/src/services/api-handlers.ts
@@ -100,7 +100,10 @@ function getProjectPathFromTag(tag: string): string | undefined {
 
 export async function handleListTags(): Promise<ApiResponse<{ project: TagInfo[] }>> {
   try {
-    await embeddingService.warmup();
+    // Tags are stored as SQLite metadata; embedding model is not needed.
+    // Calling warmup() here would block on @huggingface/transformers init in
+    // the worker thread and hang every read API. Only handlers that compute
+    // similarity (e.g. handleSearch) should warm up the embedding service.
     const projectShards = shardManager.getAllShards("project", "");
     const tagsMap = new Map<string, TagInfo>();
     for (const shard of projectShards) {
@@ -140,7 +143,8 @@ export async function handleListMemories(
   includePrompts: boolean = true
 ): Promise<ApiResponse<PaginatedResponse<Memory | any>>> {
   try {
-    await embeddingService.warmup();
+    // Listing only reads SQLite rows; no vector ops happen here.
+    // See handleListTags comment - keep embedding init out of read paths.
     let allMemories: any[] = [];
     if (tag) {
       const { scope: tagScope, hash } = extractScopeFromTag(tag);
@@ -652,7 +656,8 @@ export async function handleStats(): Promise<
   }>
 > {
   try {
-    await embeddingService.warmup();
+    // Stats only counts SQLite rows; no embedding needed.
+    // See handleListTags comment - keep embedding init out of read paths.
     const projectShards = shardManager.getAllShards("project", "");
     let userCount = 0,
       projectCount = 0;
diff --git a/src/services/embedding.ts b/src/services/embedding.ts
index 8473c09..a63a2d0 100644
--- a/src/services/embedding.ts
+++ b/src/services/embedding.ts
@@ -1,6 +1,7 @@
 import { CONFIG } from "../config.js";
 import { log } from "./logger.js";
 import { join } from "node:path";
+import type { PretrainedModelOptions } from "@huggingface/transformers";
 
 const TIMEOUT_MS = 30000;
 const GLOBAL_EMBEDDING_KEY = Symbol.for("opencode-mem.embedding.instance");
@@ -17,6 +18,14 @@ async function ensureTransformersLoaded(): Promise<NonNullable<typeof _transform
   mod.env.allowLocalModels = true;
   mod.env.allowRemoteModels = true;
   mod.env.cacheDir = join(CONFIG.storagePath, ".cache");
+  // CRITICAL: Disable WASM multi-threading. In Node.js/Bun (no SharedArrayBuffer),
+  // ONNX runtime hangs indefinitely during pipeline() init when threads > 1.
+  // See https://github.com/xenova/transformers.js/pull/488
+  try {
+    (mod.env as any).backends.onnx.wasm.numThreads = 1;
+  } catch (e) {
+    log("Failed to set wasm.numThreads", { error: String(e) });
+  }
   _transformers = mod;
   return _transformers!;
 }
@@ -56,9 +65,14 @@ export class EmbeddingService {
         return;
       }
       const { pipeline } = await ensureTransformersLoaded();
-      this.pipe = await pipeline("feature-extraction", CONFIG.embeddingModel, {
+      const pipelineOptions: PretrainedModelOptions = {
         progress_callback: progressCallback,
-      });
+        // Force quantized ONNX. Default is fp32 model.onnx which transformers v4
+        // tries to download from huggingface.co; cache only ships model_quantized.onnx
+        // and HF is unreachable behind GFW, causing init to fail.
+        dtype: "q8",
+      };
+      this.pipe = await pipeline("feature-extraction", CONFIG.embeddingModel, pipelineOptions);
       this.isWarmedUp = true;
     } catch (error) {
       this.initPromise = null;