/**
* Preserves ZPT navigation context and metadata in formatted outputs
*/
export default class MetadataEncoder {
constructor(options = {}) {
this.config = {
encoding: options.encoding || 'structured',
includeNavigation: options.includeNavigation !== false,
includeTimestamps: options.includeTimestamps !== false,
includeProvenance: options.includeProvenance !== false,
includeMetrics: options.includeMetrics !== false,
compressionLevel: options.compressionLevel || 'medium',
preservePrivacy: options.preservePrivacy || false,
...options
};
this.initializeEncodingStrategies();
this.initializeMetadataSchemas();
this.initializeCompressionLevels();
}
/**
* Initialize metadata encoding strategies
*/
initializeEncodingStrategies() {
this.encodingStrategies = {
structured: {
name: 'Structured Metadata',
handler: this.encodeStructured.bind(this),
description: 'Comprehensive structured metadata with full context',
overhead: 'high',
readability: 'high'
},
compact: {
name: 'Compact Encoding',
handler: this.encodeCompact.bind(this),
description: 'Compressed metadata with essential information only',
overhead: 'low',
readability: 'medium'
},
inline: {
name: 'Inline Markers',
handler: this.encodeInline.bind(this),
description: 'Embedded metadata markers within content',
overhead: 'minimal',
readability: 'low'
},
header: {
name: 'Header Metadata',
handler: this.encodeHeader.bind(this),
description: 'Metadata in document header/frontmatter',
overhead: 'medium',
readability: 'high'
},
footer: {
name: 'Footer Metadata',
handler: this.encodeFooter.bind(this),
description: 'Metadata appended at document end',
overhead: 'medium',
readability: 'medium'
},
distributed: {
name: 'Distributed Encoding',
handler: this.encodeDistributed.bind(this),
description: 'Metadata distributed throughout content sections',
overhead: 'medium',
readability: 'high'
}
};
}
/**
* Initialize metadata schemas for different contexts
*/
initializeMetadataSchemas() {
this.schemas = {
navigation: {
fields: ['zoom', 'pan', 'tilt', 'timestamp', 'sessionId'],
required: ['zoom', 'tilt'],
description: 'ZPT navigation parameters and context'
},
provenance: {
fields: ['source', 'method', 'filters', 'transformations', 'timestamp'],
required: ['source', 'method'],
description: 'Data origin and processing history'
},
performance: {
fields: ['processingTime', 'tokenCount', 'chunkCount', 'cacheHits'],
required: ['processingTime'],
description: 'Performance and processing metrics'
},
quality: {
fields: ['confidence', 'completeness', 'relevance', 'coherence'],
required: ['confidence'],
description: 'Content quality indicators'
},
technical: {
fields: ['version', 'model', 'tokenizer', 'parameters'],
required: ['version'],
description: 'Technical implementation details'
}
};
}
/**
* Initialize compression levels
*/
initializeCompressionLevels() {
this.compressionLevels = {
minimal: {
includeFields: ['zoom', 'tilt', 'timestamp'],
omitDetails: true,
abbreviate: true,
description: 'Absolute minimum metadata'
},
low: {
includeFields: ['zoom', 'pan', 'tilt', 'timestamp', 'processingTime'],
omitDetails: true,
abbreviate: false,
description: 'Essential navigation and timing info'
},
medium: {
includeFields: ['zoom', 'pan', 'tilt', 'timestamp', 'processingTime', 'source', 'confidence'],
omitDetails: false,
abbreviate: false,
description: 'Balanced metadata with key quality indicators'
},
high: {
includeFields: 'all',
omitDetails: false,
abbreviate: false,
description: 'Complete metadata with full context'
},
full: {
includeFields: 'all',
omitDetails: false,
abbreviate: false,
includeDebug: true,
description: 'Full metadata including debug information'
}
};
}
/**
* Main encoding method - embeds metadata into formatted content
* @param {Object} formattedContent - Content from PromptFormatter
* @param {Object} fullContext - Complete processing context
* @param {Object} options - Encoding options
* @returns {Object} Content with embedded metadata
*/
async encode(formattedContent, fullContext, options = {}) {
const startTime = Date.now();
const opts = { ...this.config, ...options };
try {
// Extract and normalize metadata
const extractedMetadata = this.extractMetadata(fullContext, opts);
// Apply compression
const compressedMetadata = this.compressMetadata(extractedMetadata, opts);
// Apply privacy filters
const filteredMetadata = this.applyPrivacyFilters(compressedMetadata, opts);
// Select encoding strategy
const strategy = opts.encoding || this.config.encoding;
const encoder = this.encodingStrategies[strategy];
if (!encoder) {
throw new Error(`Unknown encoding strategy: ${strategy}`);
}
// Encode metadata into content
const encodedContent = await encoder.handler(
formattedContent,
filteredMetadata,
opts
);
// Add encoding metadata
const result = {
content: encodedContent,
metadata: {
encoding: {
strategy,
compressionLevel: opts.compressionLevel,
overhead: this.calculateOverhead(encodedContent, formattedContent),
processingTime: Date.now() - startTime
},
preserved: filteredMetadata,
original: extractedMetadata
}
};
return result;
} catch (error) {
throw new Error(`Metadata encoding failed: ${error.message}`);
}
}
/**
* Extract comprehensive metadata from processing context
*/
extractMetadata(fullContext, options) {
const metadata = {
navigation: this.extractNavigationMetadata(fullContext),
provenance: this.extractProvenanceMetadata(fullContext),
performance: this.extractPerformanceMetadata(fullContext),
quality: this.extractQualityMetadata(fullContext),
technical: this.extractTechnicalMetadata(fullContext),
timestamp: new Date().toISOString(),
version: '1.0.0'
};
// Add session tracking
if (options.sessionId) {
metadata.session = {
id: options.sessionId,
sequence: options.sequenceNumber || 1,
context: options.sessionContext || 'standalone'
};
}
return metadata;
}
/**
* Extract navigation-specific metadata
*/
extractNavigationMetadata(context) {
const navigation = context.selection?.metadata?.parameters || context.navigation || {};
return {
zoom: navigation.zoom?.level || navigation.zoom,
pan: {
filters: this.summarizeFilters(navigation.pan),
complexity: navigation._metadata?.complexity,
hasFilters: navigation._metadata?.hasFilters
},
tilt: navigation.tilt?.representation || navigation.tilt,
parameters: {
original: navigation,
normalized: !!navigation._metadata
}
};
}
/**
* Extract data provenance metadata
*/
extractProvenanceMetadata(context) {
return {
source: {
corpus: context.corpus?.name || 'unknown',
selection: context.selection?.metadata?.zoomLevel,
projection: context.projection?.representation
},
method: {
selectionStrategy: context.selection?.metadata?.criteria?.primary?.length || 0,
projectionType: context.projection?.outputType,
transformationFormat: context.formatting?.format
},
filters: this.extractFilterSummary(context),
transformations: this.extractTransformationChain(context)
};
}
/**
* Extract performance metadata
*/
extractPerformanceMetadata(context) {
const selection = context.selection?.metadata || {};
const projection = context.projection?.metadata || {};
const formatting = context.formatting?.metadata || {};
return {
processingTime: {
selection: selection.selectionTime || 0,
projection: projection.processingTime || 0,
formatting: formatting.processingTime || 0,
total: (selection.selectionTime || 0) +
(projection.processingTime || 0) +
(formatting.processingTime || 0)
},
tokenCount: {
estimated: formatting.tokenEstimate || 0,
budget: context.navigation?.transform?.maxTokens || 0,
utilization: formatting.tokenEstimate ?
(formatting.tokenEstimate / (context.navigation?.transform?.maxTokens || 4000)) : 0
},
chunkCount: context.chunking?.metadata?.totalChunks || 0,
cacheHits: selection.fromCache ? 1 : 0,
resultCount: selection.resultCount || 0
};
}
/**
* Extract quality indicators
*/
extractQualityMetadata(context) {
const selection = context.selection?.metadata || {};
const projection = context.projection?.data || {};
return {
confidence: this.calculateConfidence(context),
completeness: this.calculateCompleteness(context),
relevance: this.calculateRelevance(context),
coherence: this.calculateCoherence(projection),
coverage: selection.criteria?.estimatedSelectivity || 0
};
}
/**
* Extract technical implementation details
*/
extractTechnicalMetadata(context) {
return {
version: {
zpt: '1.0.0',
semem: '1.0.0',
ragno: '1.0.0'
},
model: context.projection?.metadata?.model || 'unknown',
tokenizer: context.navigation?.transform?.tokenizer || 'cl100k_base',
parameters: {
zoomGranularity: context.navigation?.zoom?.granularity,
tiltOutputFormat: context.navigation?.tilt?.outputFormat,
transformStrategy: context.navigation?.transform?.chunkStrategy
},
environment: {
timestamp: new Date().toISOString(),
timezone: Intl.DateTimeFormat().resolvedOptions().timeZone
}
};
}
/**
* Compress metadata based on compression level
*/
compressMetadata(metadata, options) {
const level = this.compressionLevels[options.compressionLevel];
if (!level) {
return metadata; // No compression
}
const compressed = {};
// Include specified fields
if (level.includeFields === 'all') {
Object.assign(compressed, metadata);
} else {
level.includeFields.forEach(field => {
if (this.hasNestedField(metadata, field)) {
this.setNestedField(compressed, field, this.getNestedField(metadata, field));
}
});
}
// Apply detail omission
if (level.omitDetails) {
compressed.provenance = compressed.provenance ? {
source: compressed.provenance.source?.corpus
} : undefined;
compressed.technical = compressed.technical ? {
version: compressed.technical.version?.zpt
} : undefined;
}
// Apply abbreviation
if (level.abbreviate) {
compressed = this.abbreviateFields(compressed);
}
return compressed;
}
/**
* Apply privacy filters to metadata
*/
applyPrivacyFilters(metadata, options) {
if (!options.preservePrivacy) {
return metadata;
}
const filtered = JSON.parse(JSON.stringify(metadata)); // Deep copy
// Remove potentially sensitive information
if (filtered.session) {
delete filtered.session.id;
}
if (filtered.technical?.environment) {
delete filtered.technical.environment.timezone;
}
if (filtered.provenance?.source) {
filtered.provenance.source = {
type: 'corpus'
};
}
// Anonymize navigation parameters
if (filtered.navigation?.pan?.filters) {
filtered.navigation.pan.filters = {
count: Object.keys(filtered.navigation.pan.filters).length,
types: Object.keys(filtered.navigation.pan.filters)
};
}
return filtered;
}
/**
* Encoding strategy implementations
*/
async encodeStructured(content, metadata, options) {
const structured = {
zpt_metadata: metadata,
content: content.content || content,
encoding_info: {
strategy: 'structured',
timestamp: new Date().toISOString(),
version: '1.0.0'
}
};
if (typeof content === 'string') {
return `${JSON.stringify(structured, null, 2)}`;
} else {
return structured;
}
}
async encodeCompact(content, metadata, options) {
const compact = this.createCompactRepresentation(metadata);
const encoded = {
zpt: compact,
data: content.content || content
};
if (typeof content === 'string') {
return `<!-- ZPT:${JSON.stringify(compact)} -->\n${content.content || content}`;
} else {
return encoded;
}
}
async encodeInline(content, metadata, options) {
const contentStr = content.content || content;
if (typeof contentStr !== 'string') {
throw new Error('Inline encoding requires string content');
}
const markers = this.createInlineMarkers(metadata);
let encoded = contentStr;
// Insert markers at strategic points
encoded = markers.start + encoded;
encoded = encoded.replace(/\n\n/g, `\n${markers.section}\n`);
encoded = encoded + markers.end;
return encoded;
}
async encodeHeader(content, metadata, options) {
const contentStr = content.content || content;
const header = this.createHeaderMetadata(metadata);
if (typeof contentStr === 'string') {
return `---\n${header}\n---\n\n${contentStr}`;
} else {
return {
metadata: metadata,
content: contentStr
};
}
}
async encodeFooter(content, metadata, options) {
const contentStr = content.content || content;
const footer = this.createFooterMetadata(metadata);
if (typeof contentStr === 'string') {
return `${contentStr}\n\n---\n\n${footer}`;
} else {
return {
content: contentStr,
metadata: metadata
};
}
}
async encodeDistributed(content, metadata, options) {
const contentStr = content.content || content;
if (typeof contentStr !== 'string') {
throw new Error('Distributed encoding requires string content');
}
// Split content into sections
const sections = this.splitIntoSections(contentStr);
const metadataParts = this.distributeMetadata(metadata, sections.length);
let distributed = '';
sections.forEach((section, index) => {
distributed += section;
if (index < metadataParts.length) {
distributed += `\n<!-- ${JSON.stringify(metadataParts[index])} -->\n`;
}
});
return distributed;
}
/**
* Helper methods for encoding strategies
*/
createCompactRepresentation(metadata) {
return {
z: metadata.navigation?.zoom,
t: metadata.navigation?.tilt,
f: metadata.navigation?.pan?.filters ? Object.keys(metadata.navigation.pan.filters).length : 0,
ts: metadata.timestamp,
pt: metadata.performance?.processingTime?.total,
rc: metadata.performance?.resultCount
};
}
createInlineMarkers(metadata) {
const nav = metadata.navigation || {};
return {
start: `<!-- ZPT-START: ${nav.zoom}/${nav.tilt} -->`,
section: `<!-- ZPT-CTX: ${metadata.timestamp} -->`,
end: `<!-- ZPT-END: ${metadata.performance?.processingTime?.total}ms -->`
};
}
createHeaderMetadata(metadata) {
const yaml = [];
yaml.push('zpt_navigation:');
yaml.push(` zoom: ${metadata.navigation?.zoom || 'unknown'}`);
yaml.push(` tilt: ${metadata.navigation?.tilt || 'unknown'}`);
yaml.push(`timestamp: ${metadata.timestamp}`);
if (metadata.performance?.processingTime?.total) {
yaml.push(`processing_time: ${metadata.performance.processingTime.total}ms`);
}
if (metadata.quality?.confidence) {
yaml.push(`confidence: ${metadata.quality.confidence.toFixed(3)}`);
}
return yaml.join('\n');
}
createFooterMetadata(metadata) {
const footer = [];
footer.push('## ZPT Metadata');
footer.push('');
footer.push(`**Navigation:** ${metadata.navigation?.zoom}/${metadata.navigation?.tilt}`);
footer.push(`**Generated:** ${metadata.timestamp}`);
if (metadata.performance?.processingTime?.total) {
footer.push(`**Processing Time:** ${metadata.performance.processingTime.total}ms`);
}
return footer.join('\n');
}
splitIntoSections(content) {
// Split content by double newlines (paragraphs)
return content.split(/\n\s*\n/).filter(section => section.trim().length > 0);
}
distributeMetadata(metadata, sectionCount) {
const parts = [];
const keys = Object.keys(metadata);
const keysPerSection = Math.ceil(keys.length / sectionCount);
for (let i = 0; i < sectionCount; i++) {
const sectionKeys = keys.slice(i * keysPerSection, (i + 1) * keysPerSection);
const part = {};
sectionKeys.forEach(key => {
part[key] = metadata[key];
});
if (Object.keys(part).length > 0) {
parts.push(part);
}
}
return parts;
}
/**
* Quality calculation methods
*/
calculateConfidence(context) {
let confidence = 0.5; // Base confidence
// Increase confidence based on processing success
if (context.selection?.metadata?.selectionTime) confidence += 0.2;
if (context.projection?.metadata?.processingTime) confidence += 0.2;
if (!context.selection?.metadata?.fromCache) confidence += 0.1;
return Math.min(1.0, confidence);
}
calculateCompleteness(context) {
const targetResults = context.navigation?.transform?.maxTokens ?
Math.floor(context.navigation.transform.maxTokens / 50) : 100;
const actualResults = context.selection?.metadata?.resultCount || 0;
return Math.min(1.0, actualResults / targetResults);
}
calculateRelevance(context) {
// Based on selection criteria and filtering
const hasFilters = context.navigation?.pan && Object.keys(context.navigation.pan).length > 0;
const complexity = context.navigation?._metadata?.complexity || 0;
return hasFilters ? 0.8 + (complexity * 0.2) : 0.5;
}
calculateCoherence(projectionData) {
// Simple coherence based on data structure
if (projectionData.aggregateStats?.avgSimilarity) {
return projectionData.aggregateStats.avgSimilarity;
}
if (projectionData.stats?.coverageScore) {
return projectionData.stats.coverageScore;
}
return 0.5; // Default coherence
}
/**
* Utility methods
*/
summarizeFilters(pan) {
if (!pan) return {};
const summary = {};
if (pan.topic) summary.topic = pan.topic.value || pan.topic;
if (pan.entity) summary.entity = pan.entity.count || (Array.isArray(pan.entity) ? pan.entity.length : 1);
if (pan.temporal) summary.temporal = true;
if (pan.geographic) summary.geographic = true;
return summary;
}
extractFilterSummary(context) {
const navigation = context.navigation || context.selection?.metadata?.parameters || {};
return {
count: navigation.pan ? Object.keys(navigation.pan).length : 0,
types: navigation.pan ? Object.keys(navigation.pan) : [],
complexity: navigation._metadata?.complexity || 0
};
}
extractTransformationChain(context) {
const chain = [];
if (context.selection) {
chain.push({
step: 'selection',
method: 'corpuscle_selector',
timestamp: context.selection.metadata?.timestamp
});
}
if (context.projection) {
chain.push({
step: 'projection',
method: context.projection.representation,
timestamp: context.projection.metadata?.timestamp
});
}
if (context.formatting) {
chain.push({
step: 'formatting',
method: context.formatting.format,
timestamp: context.formatting.metadata?.timestamp
});
}
return chain;
}
calculateOverhead(encodedContent, originalContent) {
const encodedSize = typeof encodedContent === 'string' ?
encodedContent.length : JSON.stringify(encodedContent).length;
const originalSize = typeof originalContent === 'string' ?
originalContent.length : JSON.stringify(originalContent).length;
return {
bytes: encodedSize - originalSize,
percentage: ((encodedSize - originalSize) / originalSize * 100).toFixed(1)
};
}
abbreviateFields(obj) {
const abbreviations = {
timestamp: 'ts',
processingTime: 'pt',
confidence: 'conf',
navigation: 'nav',
performance: 'perf',
technical: 'tech'
};
const abbreviated = {};
for (const [key, value] of Object.entries(obj)) {
const abbrevKey = abbreviations[key] || key;
abbreviated[abbrevKey] = value;
}
return abbreviated;
}
hasNestedField(obj, field) {
return field.split('.').reduce((current, key) =>
current && current[key] !== undefined, obj) !== undefined;
}
getNestedField(obj, field) {
return field.split('.').reduce((current, key) =>
current && current[key], obj);
}
setNestedField(obj, field, value) {
const keys = field.split('.');
const lastKey = keys.pop();
const target = keys.reduce((current, key) => {
if (!current[key]) current[key] = {};
return current[key];
}, obj);
target[lastKey] = value;
}
/**
* Decoding methods for retrieving metadata
*/
async decode(encodedContent, strategy = null) {
if (!strategy) {
strategy = this.detectEncodingStrategy(encodedContent);
}
const decoder = this.getDecoder(strategy);
if (!decoder) {
throw new Error(`No decoder available for strategy: ${strategy}`);
}
return decoder(encodedContent);
}
detectEncodingStrategy(content) {
if (typeof content === 'object' && content.zpt_metadata) {
return 'structured';
}
if (typeof content === 'string') {
if (content.includes('<!-- ZPT:')) return 'compact';
if (content.includes('<!-- ZPT-START:')) return 'inline';
if (content.startsWith('---\n')) return 'header';
if (content.includes('\n---\n\n## ZPT Metadata')) return 'footer';
}
return 'unknown';
}
getDecoder(strategy) {
const decoders = {
structured: (content) => content.zpt_metadata,
compact: (content) => {
const match = content.match(/<!-- ZPT:(.+?) -->/);
return match ? JSON.parse(match[1]) : null;
},
inline: (content) => {
const matches = content.match(/<!-- ZPT-.*? -->/g);
return matches ? { markers: matches } : null;
},
header: (content) => {
const match = content.match(/^---\n([\s\S]*?)\n---/);
return match ? { yaml: match[1] } : null;
},
footer: (content) => {
const match = content.match(/\n---\n\n([\s\S]*)$/);
return match ? { footer: match[1] } : null;
}
};
return decoders[strategy];
}
/**
* Configuration and info methods
*/
getAvailableStrategies() {
return Object.keys(this.encodingStrategies);
}
getStrategyInfo(strategy) {
return this.encodingStrategies[strategy] ?
{ ...this.encodingStrategies[strategy] } : null;
}
getCompressionLevels() {
return Object.keys(this.compressionLevels);
}
getCompressionInfo(level) {
return this.compressionLevels[level] ?
{ ...this.compressionLevels[level] } : null;
}
getSchemas() {
return Object.keys(this.schemas);
}
getSchemaInfo(schema) {
return this.schemas[schema] ?
{ ...this.schemas[schema] } : null;
}
/**
* Validation methods
*/
validateMetadata(metadata, schema = 'navigation') {
const schemaInfo = this.schemas[schema];
if (!schemaInfo) {
throw new Error(`Unknown schema: ${schema}`);
}
const issues = [];
// Check required fields
schemaInfo.required.forEach(field => {
if (!this.hasNestedField(metadata, field)) {
issues.push(`Missing required field: ${field}`);
}
});
return {
valid: issues.length === 0,
issues,
schema
};
}
validateEncodedContent(content, expectedStrategy = null) {
const detectedStrategy = this.detectEncodingStrategy(content);
if (expectedStrategy && detectedStrategy !== expectedStrategy) {
return {
valid: false,
detected: detectedStrategy,
expected: expectedStrategy,
message: `Strategy mismatch: expected ${expectedStrategy}, detected ${detectedStrategy}`
};
}
return {
valid: detectedStrategy !== 'unknown',
detected: detectedStrategy,
message: detectedStrategy === 'unknown' ? 'No encoding detected' : 'Valid encoding detected'
};
}
}