Complete reference for all exports from context-compression-engine.
// Primary
export { compress, defaultTokenCounter, bestSentenceScore } from './compress.js';
export { uncompress } from './expand.js';
export type { StoreLookup } from './expand.js';
// Helpers (LLM integration)
export { createSummarizer, createEscalatingSummarizer } from './summarizer.js';
export { createClassifier, createEscalatingClassifier } from './classifier.js';
// Entity extraction & quality metrics
export {
extractEntities,
collectMessageEntities,
computeEntityRetention,
computeStructuralIntegrity,
computeReferenceCoherence,
computeQualityScore,
} from './entities.js';
// ML token classifier
export {
compressWithTokenClassifier,
compressWithTokenClassifierSync,
whitespaceTokenize,
createMockTokenClassifier,
} from './ml-classifier.js';
// Discourse decomposition (EDU-lite)
export { segmentEDUs, scoreEDUs, selectEDUs, summarizeWithEDUs } from './discourse.js';
export type { EDU } from './discourse.js';
// Semantic clustering
export { clusterMessages, summarizeCluster } from './cluster.js';
export type { MessageCluster } from './cluster.js';
// Cross-message coreference
export {
buildCoreferenceMap,
findOrphanedReferences,
generateInlineDefinitions,
} from './coreference.js';
export type { EntityDefinition } from './coreference.js';
// Conversation flow detection
export { detectFlowChains, summarizeChain } from './flow.js';
export type { FlowChain } from './flow.js';
// Entropy scoring utilities
export { splitSentences, normalizeScores, combineScores } from './entropy.js';
// Importance scoring
export {
computeImportance,
scoreContentSignals,
DEFAULT_IMPORTANCE_THRESHOLD,
} from './importance.js';
export type { ImportanceMap } from './importance.js';
// Contradiction detection
export { analyzeContradictions } from './contradiction.js';
export type { ContradictionAnnotation } from './contradiction.js';
// Types
export type {
Classifier,
ClassifierResult,
CompressOptions,
CompressResult,
CreateClassifierOptions,
CreateSummarizerOptions,
Message,
MLTokenClassifier,
TokenClassification,
Summarizer,
UncompressOptions,
UncompressResult,
VerbatimMap,
} from './types.js';Deterministic compression by default. Returns a Promise when a summarizer or classifier is provided.
function compress(messages: Message[], options?: CompressOptions): CompressResult;
function compress(
messages: Message[],
options: CompressOptions & { summarizer: Summarizer },
): Promise<CompressResult>;
function compress(
messages: Message[],
options: CompressOptions & { classifier: Classifier },
): Promise<CompressResult>;| Parameter | Type | Description |
|---|---|---|
messages |
Message[] |
Messages to compress |
options |
CompressOptions |
Compression options (see below) |
| Option | Type | Default | Description |
|---|---|---|---|
preserve |
string[] |
['system'] |
Roles to never compress |
recencyWindow |
number |
4 |
Protect the last N messages from compression |
sourceVersion |
number |
0 |
Version tag for provenance tracking |
summarizer |
Summarizer |
- | LLM-powered summarizer. When provided, compress() returns a Promise. See LLM integration |
tokenBudget |
number |
- | Target token count. Binary-searches recencyWindow to fit. See Token budget |
minRecencyWindow |
number |
0 |
Floor for recencyWindow when using tokenBudget |
dedup |
boolean |
true |
Replace earlier exact-duplicate messages with a compact reference. See Deduplication |
fuzzyDedup |
boolean |
false |
Detect near-duplicate messages using line-level similarity. See Deduplication |
fuzzyThreshold |
number |
0.85 |
Similarity threshold for fuzzy dedup (0-1) |
embedSummaryId |
boolean |
false |
Embed summary_id in compressed content for downstream reference. See Provenance |
forceConverge |
boolean |
false |
Hard-truncate non-recency messages when binary search bottoms out. See Token budget |
preservePatterns |
Array<{ re: RegExp; label: string }> |
- | Custom regex patterns that force hard T0 preservation. See Preservation rules |
classifier |
Classifier |
- | LLM-powered classifier. When provided, compress() returns a Promise. See LLM integration |
classifierMode |
'hybrid' | 'full' |
'hybrid' |
Classification mode. 'hybrid': heuristics first, LLM for prose. 'full': LLM for all eligible. Ignored without classifier |
tokenCounter |
(msg: Message) => number |
defaultTokenCounter |
Custom token counter per message. See Token budget |
importanceScoring |
boolean |
false |
Score messages by forward-reference density, decision/correction content, and recency. High-importance messages are preserved outside the recency window. forceConverge truncates low-importance first. Note: preserving extra messages reduces compression ratio, which may make tokenBudget harder to meet |
importanceThreshold |
number |
0.65 |
Importance score threshold for preservation (0–1). Only used when importanceScoring: true |
contradictionDetection |
boolean |
false |
Detect later messages that correct/override earlier ones. Superseded messages are compressed with a provenance annotation |
contradictionTopicThreshold |
number |
0.15 |
IDF-weighted Dice similarity threshold for topic overlap in contradiction detection (0–1) |
relevanceThreshold |
number |
- | Sentence score threshold. Messages whose best sentence score falls below this are replaced with a stub. See V2 features |
budgetStrategy |
'binary-search' | 'tiered' |
'binary-search' |
Budget strategy when tokenBudget is set. 'tiered' keeps recency window fixed and progressively compresses older content. See V2 features |
entropyScorer |
(sentences: string[]) => number[] |
- | External self-information scorer. Can be sync or async. See V2 features |
entropyScorerMode |
'replace' | 'augment' |
'augment' |
How to combine entropy and heuristic scores. 'augment' = weighted average, 'replace' = entropy only |
conversationFlow |
boolean |
false |
Group Q&A, request→action, correction, and acknowledgment chains into compression units. See V2 features |
discourseAware |
boolean |
false |
Experimental. EDU decomposition with dependency-aware selection. Reduces ratio 8–28% without a custom ML scorer — use segmentEDUs/scoreEDUs/selectEDUs directly instead. See V2 features |
coreference |
boolean |
false |
Inline entity definitions into compressed summaries when references would be orphaned. See V2 features |
semanticClustering |
boolean |
false |
Group messages by topic using TF-IDF + entity overlap, compress as units. See V2 features |
clusterThreshold |
number |
0.15 |
Similarity threshold for semantic clustering (0–1). Lower = larger clusters |
compressionDepth |
'gentle' | 'moderate' | 'aggressive' | 'auto' |
'gentle' |
Controls summarization aggressiveness. 'auto' tries each level until tokenBudget fits. See V2 features |
mlTokenClassifier |
MLTokenClassifier |
- | Per-token keep/remove classifier. T0 rules still override for code/structured content. See V2 features |
| Field | Type | Description |
|---|---|---|
messages |
Message[] |
Compressed message array |
verbatim |
VerbatimMap |
Original messages keyed by ID. Must be persisted atomically with messages |
compression.original_version |
number |
Mirrors sourceVersion |
compression.ratio |
number |
Character-based compression ratio. >1 means savings |
compression.token_ratio |
number |
Token-based compression ratio. >1 means savings |
compression.messages_compressed |
number |
Messages that were compressed |
compression.messages_preserved |
number |
Messages kept as-is |
compression.messages_deduped |
number | undefined |
Exact duplicates replaced (when dedup: true) |
compression.messages_fuzzy_deduped |
number | undefined |
Near-duplicates replaced (when fuzzyDedup: true) |
compression.messages_pattern_preserved |
number | undefined |
Messages preserved by preservePatterns (when patterns are provided) |
compression.messages_llm_classified |
number | undefined |
Messages classified by LLM (when classifier is provided) |
compression.messages_llm_preserved |
number | undefined |
Messages where LLM decided to preserve (when classifier is provided) |
compression.messages_contradicted |
number | undefined |
Messages superseded by a later correction (when contradictionDetection: true) |
compression.messages_importance_preserved |
number | undefined |
Messages preserved due to high importance score (when importanceScoring: true) |
compression.messages_relevance_dropped |
number | undefined |
Messages replaced with stubs (when relevanceThreshold is set) |
compression.entity_retention |
number | undefined |
Fraction of technical identifiers preserved (0–1). Present when compression occurs |
compression.structural_integrity |
number | undefined |
Fraction of structural elements preserved (0–1). Present when compression occurs |
compression.reference_coherence |
number | undefined |
Fraction of entity references with surviving sources (0–1) |
compression.quality_score |
number | undefined |
Composite quality: 0.4×entity + 0.4×structural + 0.2×coherence |
fits |
boolean | undefined |
Whether result fits within tokenBudget. Present when tokenBudget is set |
tokenCount |
number | undefined |
Estimated token count. Present when tokenBudget is set |
recencyWindow |
number | undefined |
The recencyWindow the binary search settled on. Present when tokenBudget is set |
import { compress } from 'context-compression-engine';
// Sync
const result = compress(messages, {
preserve: ['system'],
recencyWindow: 4,
sourceVersion: 1,
});
// Async (with LLM summarizer)
const result = await compress(messages, {
summarizer: async (text) => myLlm.summarize(text),
});Restore originals from the verbatim store. Always synchronous. See Round-trip for full details.
function uncompress(
messages: Message[],
store: StoreLookup,
options?: UncompressOptions,
): UncompressResult;| Parameter | Type | Description |
|---|---|---|
messages |
Message[] |
Compressed messages to expand |
store |
StoreLookup |
VerbatimMap object or (id: string) => Message | undefined function |
options |
UncompressOptions |
Expansion options (see below) |
| Option | Type | Default | Description |
|---|---|---|---|
recursive |
boolean |
false |
Recursively expand messages whose originals are also compressed (up to 10 levels) |
| Field | Type | Description |
|---|---|---|
messages |
Message[] |
Expanded messages |
messages_expanded |
number |
How many compressed messages were restored |
messages_passthrough |
number |
How many messages passed through unchanged |
missing_ids |
string[] |
IDs looked up but not found. Non-empty = data loss |
import { uncompress } from 'context-compression-engine';
const { messages, missing_ids } = uncompress(compressed, verbatim);
// Recursive expansion
const deep = uncompress(compressed, verbatim, { recursive: true });
// Function store (database-backed)
const result = uncompress(compressed, (id) => db.getMessageById(id));The built-in token estimator used when no custom tokenCounter is provided.
function defaultTokenCounter(msg: Message): number;Math.ceil(msg.content.length / 3.5);The 3.5 chars/token ratio is the empirical average for GPT-family BPE tokenizers (cl100k_base, o200k_base) on mixed English text. The lower end of the range (~3.2–4.5) is chosen intentionally so budget estimates stay conservative — over-counting tokens is safer than under-counting. For accurate budgeting, replace with a real tokenizer. See Token budget.
Creates an LLM-powered summarizer with an optimized prompt template. See LLM integration for provider examples.
function createSummarizer(
callLlm: (prompt: string) => string | Promise<string>,
options?: CreateSummarizerOptions,
): Summarizer;| Option | Type | Default | Description |
|---|---|---|---|
maxResponseTokens |
number |
300 |
Hint for maximum tokens in the LLM response |
systemPrompt |
string |
- | Domain-specific instructions prepended to the built-in rules |
mode |
'normal' | 'aggressive' |
'normal' |
'aggressive' produces terse bullet points at half the token budget |
preserveTerms |
string[] |
- | Domain-specific terms appended to the built-in preserve list |
The prompt always preserves: code references, file paths, function/variable names, URLs, API keys, error messages, numbers, and technical decisions. Add domain terms via preserveTerms.
import { createSummarizer, compress } from 'context-compression-engine';
const summarizer = createSummarizer(async (prompt) => myLlm.complete(prompt), {
maxResponseTokens: 300,
systemPrompt: 'This is a legal contract. Preserve all clause numbers.',
preserveTerms: ['clause numbers', 'party names'],
});
const result = await compress(messages, { summarizer });Three-level escalation summarizer. See LLM integration and Compression pipeline for how the fallback chain works.
function createEscalatingSummarizer(
callLlm: (prompt: string) => string | Promise<string>,
options?: Omit<CreateSummarizerOptions, 'mode'>,
): Summarizer;- Level 1: Normal - concise prose summary via the LLM
- Level 2: Aggressive - terse bullet points at half the token budget (if Level 1 fails or returns longer text)
- Level 3: Deterministic - sentence extraction fallback via the compression pipeline (handled by
withFallbackincompress)
Same as CreateSummarizerOptions but without mode (managed internally).
| Option | Type | Default | Description |
|---|---|---|---|
maxResponseTokens |
number |
300 |
Hint for maximum tokens in the LLM response |
systemPrompt |
string |
- | Domain-specific instructions prepended to the built-in rules |
preserveTerms |
string[] |
- | Domain-specific terms appended to the built-in preserve list |
Creates an LLM-powered classifier that decides whether messages should be preserved or compressed. See LLM integration for domain examples.
function createClassifier(
callLlm: (prompt: string) => string | Promise<string>,
options?: CreateClassifierOptions,
): Classifier;| Option | Type | Default | Description |
|---|---|---|---|
maxResponseTokens |
number |
100 |
Hint for maximum tokens in the LLM response |
systemPrompt |
string |
- | Domain-specific instructions prepended to the classification prompt |
alwaysPreserve |
string[] |
- | Content types to always preserve, injected as bullet points |
alwaysCompress |
string[] |
- | Content types always safe to compress, injected as bullet points |
import { createClassifier, compress } from 'context-compression-engine';
const classifier = createClassifier(async (prompt) => myLlm.complete(prompt), {
systemPrompt: 'You are classifying content from legal documents.',
alwaysPreserve: ['clause references', 'defined terms', 'party names'],
alwaysCompress: ['boilerplate acknowledgments', 'scheduling correspondence'],
});
const result = await compress(messages, { classifier });Two-level escalation classifier. Tries LLM first, falls back to heuristic classifyMessage() on failure.
function createEscalatingClassifier(
callLlm: (prompt: string) => string | Promise<string>,
options?: CreateClassifierOptions,
): Classifier;- Level 1: LLM - send content to LLM, parse structured JSON response
- Level 2: Heuristic - if LLM throws, returns unparseable output, or confidence=0, fall back to
classifyMessage(). Hard T0 heuristic results map topreserve, everything else tocompress.
Same as CreateClassifierOptions.
type Message = {
id: string;
index: number;
role?: string;
content?: string;
metadata?: Record<string, unknown>;
tool_calls?: unknown[];
[key: string]: unknown;
};type Summarizer = (text: string) => string | Promise<string>;type VerbatimMap = Record<string, Message>;type Classifier = (content: string) => ClassifierResult | Promise<ClassifierResult>;type ClassifierResult = {
decision: 'preserve' | 'compress';
confidence: number;
reason: string;
};type MLTokenClassifier = (
content: string,
) => TokenClassification[] | Promise<TokenClassification[]>;type TokenClassification = {
token: string;
keep: boolean;
confidence: number;
};type StoreLookup = VerbatimMap | ((id: string) => Message | undefined);- V2 features - quality metrics, flow detection, clustering, depth, ML classifier
- Compression pipeline - how the engine processes messages
- Token budget - budget-driven compression
- LLM integration - provider examples
- Round-trip - lossless compress/uncompress
- Provenance - metadata tracking