/**
* Main orchestrator for parameter-based corpuscle selection from Ragno corpus
*/
import ParameterValidator from '../parameters/ParameterValidator.js';
import ParameterNormalizer from '../parameters/ParameterNormalizer.js';
import FilterBuilder from '../parameters/FilterBuilder.js';
import SelectionCriteria from '../parameters/SelectionCriteria.js';
import { logger } from '../../Utils.js';
export default class CorpuscleSelector {
constructor(ragnoCorpus, options = {}) {
this.corpus = ragnoCorpus;
this.sparqlStore = options.sparqlStore;
this.embeddingHandler = options.embeddingHandler;
// Initialize parameter processing components
this.validator = new ParameterValidator();
this.normalizer = new ParameterNormalizer();
this.filterBuilder = new FilterBuilder(options);
this.criteriaBuilder = new SelectionCriteria(options);
// Selection configuration
this.config = {
maxResults: options.maxResults || 1000,
timeoutMs: options.timeoutMs || 30000,
enableCaching: options.enableCaching !== false,
debugMode: options.debugMode || false,
...options
};
// Performance tracking
this.metrics = {
totalSelections: 0,
avgSelectionTime: 0,
cacheHits: 0,
cacheMisses: 0
};
// Result cache
this.cache = new Map();
this.cacheExpiry = options.cacheExpiry || 3600000; // 1 hour
}
/**
* Main selection method - selects corpuscles based on ZPT parameters
* @param {Object} params - Raw ZPT navigation parameters
* @returns {Promise<Object>} Selection results with corpuscles and metadata
*/
async select(params) {
const startTime = Date.now();
this.metrics.totalSelections++;
try {
logger.info('Starting corpuscle selection', { params });
// Phase 1: Validate parameters
const validationResult = this.validator.validate(params);
if (!validationResult.valid) {
throw new Error(`Parameter validation failed: ${validationResult.message}`);
}
// Phase 2: Normalize parameters
const normalizedParams = this.normalizer.normalize(params);
logger.debug('Parameters normalized', { normalizedParams });
// Phase 3: Check cache
const cacheKey = this.normalizer.createParameterHash(normalizedParams);
if (this.config.enableCaching) {
const cachedResult = this.getCachedResult(cacheKey);
if (cachedResult) {
this.metrics.cacheHits++;
logger.debug('Cache hit', { cacheKey });
return this.enrichCachedResult(cachedResult, normalizedParams);
}
this.metrics.cacheMisses++;
}
// Phase 4: Build selection criteria
const selectionCriteria = this.criteriaBuilder.buildCriteria(normalizedParams);
logger.debug('Selection criteria built', {
criteria: this.criteriaBuilder.getSummary(selectionCriteria)
});
// Phase 5: Execute selection based on tilt type
let corpuscles;
switch (normalizedParams.tilt.representation) {
case 'embedding':
corpuscles = await this.selectByEmbedding(normalizedParams, selectionCriteria);
break;
case 'keywords':
corpuscles = await this.selectByKeywords(normalizedParams, selectionCriteria);
break;
case 'graph':
corpuscles = await this.selectByGraph(normalizedParams, selectionCriteria);
break;
case 'temporal':
corpuscles = await this.selectByTemporal(normalizedParams, selectionCriteria);
break;
default:
throw new Error(`Unsupported tilt representation: ${normalizedParams.tilt.representation}`);
}
// Phase 6: Apply post-processing
const processedCorpuscles = await this.postProcessCorpuscles(
corpuscles,
normalizedParams,
selectionCriteria
);
// Phase 7: Build result object
const result = this.buildSelectionResult(
processedCorpuscles,
normalizedParams,
selectionCriteria,
Date.now() - startTime
);
// Phase 8: Cache result
if (this.config.enableCaching) {
this.cacheResult(cacheKey, result);
}
// Update metrics
this.updateMetrics(Date.now() - startTime);
logger.info('Corpuscle selection completed', {
resultCount: result.corpuscles.length,
selectionTime: result.metadata.selectionTime,
cacheKey
});
return result;
} catch (error) {
logger.error('Corpuscle selection failed', { error, params });
throw new Error(`Selection failed: ${error.message}`);
}
}
/**
* Select corpuscles using embedding similarity
*/
async selectByEmbedding(normalizedParams, selectionCriteria) {
if (!this.embeddingHandler) {
throw new Error('EmbeddingHandler required for embedding-based selection');
}
// Build SPARQL query for embedding search
const queryConfig = this.filterBuilder.buildQuery(normalizedParams);
// Execute base query to get candidates
const candidates = await this.executeQuery(queryConfig);
// If we have a topic, generate query embedding for similarity
if (normalizedParams.pan.topic) {
const queryEmbedding = await this.embeddingHandler.generateEmbedding(
normalizedParams.pan.topic.value
);
// Calculate similarities and rank
return this.rankBySimilarity(candidates, queryEmbedding, selectionCriteria);
}
// Otherwise, return candidates filtered by selection criteria
return this.filterCorpuscles(candidates, selectionCriteria);
}
/**
* Select corpuscles using keyword matching
*/
async selectByKeywords(normalizedParams, selectionCriteria) {
const queryConfig = this.filterBuilder.buildQuery(normalizedParams);
const candidates = await this.executeQuery(queryConfig);
// Apply keyword-based scoring
return this.scoreByKeywords(candidates, normalizedParams, selectionCriteria);
}
/**
* Select corpuscles using graph structure
*/
async selectByGraph(normalizedParams, selectionCriteria) {
const queryConfig = this.filterBuilder.buildQuery(normalizedParams);
const candidates = await this.executeQuery(queryConfig);
// Apply graph-based scoring (connectivity, centrality)
return this.scoreByGraph(candidates, normalizedParams, selectionCriteria);
}
/**
* Select corpuscles using temporal ordering
*/
async selectByTemporal(normalizedParams, selectionCriteria) {
const queryConfig = this.filterBuilder.buildQuery(normalizedParams);
queryConfig.query = queryConfig.query.replace(
'ORDER BY ?uri',
'ORDER BY DESC(?created) DESC(?modified)'
);
const candidates = await this.executeQuery(queryConfig);
return this.filterCorpuscles(candidates, selectionCriteria);
}
/**
* Execute SPARQL query against the corpus
*/
async executeQuery(queryConfig) {
if (!this.sparqlStore) {
throw new Error('SPARQLStore required for corpus queries');
}
try {
logger.debug('Executing SPARQL query', {
query: queryConfig.query.substring(0, 200) + '...'
});
const result = await this.sparqlStore._executeSparqlQuery(
queryConfig.query,
this.sparqlStore.endpoint.query
);
return this.parseQueryResults(result, queryConfig);
} catch (error) {
logger.error('SPARQL query execution failed', { error, queryConfig });
throw new Error(`Query execution failed: ${error.message}`);
}
}
/**
* Parse SPARQL query results into corpuscle objects
*/
parseQueryResults(sparqlResult, queryConfig) {
if (!sparqlResult.results || !sparqlResult.results.bindings) {
return [];
}
return sparqlResult.results.bindings.map(binding => {
const corpuscle = {
uri: binding.uri?.value,
type: this.determineCorpuscleType(binding, queryConfig.zoomLevel),
content: this.extractContent(binding),
metadata: this.extractMetadata(binding),
score: 0, // Will be calculated later
binding // Keep original binding for debugging
};
return corpuscle;
});
}
/**
* Determine corpuscle type from SPARQL binding
*/
determineCorpuscleType(binding, zoomLevel) {
if (binding.type?.value) {
const rdfType = binding.type.value;
if (rdfType.includes('Entity')) return 'entity';
if (rdfType.includes('SemanticUnit') || rdfType.includes('Unit')) return 'unit';
if (rdfType.includes('TextElement') || rdfType.includes('Text')) return 'text';
if (rdfType.includes('Community')) return 'community';
if (rdfType.includes('Corpus')) return 'corpus';
}
return zoomLevel; // Fallback to zoom level
}
/**
* Extract content from SPARQL binding
*/
extractContent(binding) {
const content = {};
if (binding.label?.value) content.label = binding.label.value;
if (binding.prefLabel?.value) content.prefLabel = binding.prefLabel.value;
if (binding.text?.value) content.text = binding.text.value;
if (binding.content?.value) content.content = binding.content.value;
if (binding.description?.value) content.description = binding.description.value;
return content;
}
/**
* Extract metadata from SPARQL binding
*/
extractMetadata(binding) {
const metadata = {};
if (binding.created?.value) metadata.created = binding.created.value;
if (binding.modified?.value) metadata.modified = binding.modified.value;
if (binding.source?.value) metadata.source = binding.source.value;
if (binding.position?.value) metadata.position = binding.position.value;
if (binding.embedding?.value) {
try {
metadata.embedding = JSON.parse(binding.embedding.value);
} catch (e) {
logger.warn('Failed to parse embedding', { embedding: binding.embedding.value });
}
}
return metadata;
}
/**
* Rank corpuscles by embedding similarity
*/
async rankBySimilarity(corpuscles, queryEmbedding, selectionCriteria) {
const scoredCorpuscles = corpuscles.map(corpuscle => {
let similarity = 0;
if (corpuscle.metadata.embedding) {
similarity = this.calculateCosineSimilarity(
queryEmbedding,
corpuscle.metadata.embedding
);
}
return {
...corpuscle,
score: similarity,
similarity
};
});
// Sort by similarity and apply selection criteria
scoredCorpuscles.sort((a, b) => b.similarity - a.similarity);
return this.filterCorpuscles(scoredCorpuscles, selectionCriteria);
}
/**
* Score corpuscles by keyword relevance
*/
scoreByKeywords(corpuscles, normalizedParams, selectionCriteria) {
const topicValue = normalizedParams.pan.topic?.value;
if (!topicValue) {
return this.filterCorpuscles(corpuscles, selectionCriteria);
}
const keywords = topicValue.toLowerCase().split(/\s+/);
const scoredCorpuscles = corpuscles.map(corpuscle => {
const text = [
corpuscle.content.label,
corpuscle.content.prefLabel,
corpuscle.content.text,
corpuscle.content.content,
corpuscle.content.description
].filter(Boolean).join(' ').toLowerCase();
let score = 0;
keywords.forEach(keyword => {
const matches = (text.match(new RegExp(keyword, 'g')) || []).length;
score += matches;
});
return {
...corpuscle,
score: score / keywords.length,
keywordScore: score
};
});
scoredCorpuscles.sort((a, b) => b.score - a.score);
return this.filterCorpuscles(scoredCorpuscles, selectionCriteria);
}
/**
* Score corpuscles by graph connectivity
*/
scoreByGraph(corpuscles, normalizedParams, selectionCriteria) {
// For now, use a simple connectivity heuristic
// In a full implementation, this would use graph metrics
const scoredCorpuscles = corpuscles.map(corpuscle => {
let connectivityScore = 0;
// Count relationships/connections (simplified)
if (corpuscle.binding.entity) connectivityScore += 1;
if (corpuscle.binding.unit) connectivityScore += 1;
if (corpuscle.binding.members) connectivityScore += 2;
return {
...corpuscle,
score: connectivityScore,
connectivityScore
};
});
scoredCorpuscles.sort((a, b) => b.score - a.score);
return this.filterCorpuscles(scoredCorpuscles, selectionCriteria);
}
/**
* Apply selection criteria to filter corpuscles
*/
filterCorpuscles(corpuscles, selectionCriteria) {
let filtered = [...corpuscles];
// Apply constraints
if (selectionCriteria.constraints) {
const resultLimit = selectionCriteria.constraints.find(c => c.type === 'result_count')?.limit;
if (resultLimit) {
filtered = filtered.slice(0, resultLimit);
}
}
return filtered;
}
/**
* Post-process selected corpuscles
*/
async postProcessCorpuscles(corpuscles, normalizedParams, selectionCriteria) {
// Apply diversity filtering if needed
if (selectionCriteria.scoring.components.some(c => c.name === 'diversity')) {
corpuscles = this.applyDiversityFilter(corpuscles, normalizedParams);
}
// Sort by final score
corpuscles.sort((a, b) => b.score - a.score);
return corpuscles;
}
/**
* Apply diversity filtering to reduce redundancy
*/
applyDiversityFilter(corpuscles, normalizedParams) {
const diversityThreshold = 0.8;
const filtered = [];
for (const corpuscle of corpuscles) {
let isDiverse = true;
for (const existing of filtered) {
if (this.calculateContentSimilarity(corpuscle, existing) > diversityThreshold) {
isDiverse = false;
break;
}
}
if (isDiverse) {
filtered.push(corpuscle);
}
}
return filtered;
}
/**
* Calculate cosine similarity between embeddings
*/
calculateCosineSimilarity(embedding1, embedding2) {
if (!embedding1 || !embedding2 || embedding1.length !== embedding2.length) {
return 0;
}
let dotProduct = 0;
let norm1 = 0;
let norm2 = 0;
for (let i = 0; i < embedding1.length; i++) {
dotProduct += embedding1[i] * embedding2[i];
norm1 += embedding1[i] * embedding1[i];
norm2 += embedding2[i] * embedding2[i];
}
return dotProduct / (Math.sqrt(norm1) * Math.sqrt(norm2));
}
/**
* Calculate content similarity between corpuscles
*/
calculateContentSimilarity(corpuscle1, corpuscle2) {
const text1 = Object.values(corpuscle1.content).join(' ').toLowerCase();
const text2 = Object.values(corpuscle2.content).join(' ').toLowerCase();
// Simple Jaccard similarity
const words1 = new Set(text1.split(/\s+/));
const words2 = new Set(text2.split(/\s+/));
const intersection = new Set([...words1].filter(w => words2.has(w)));
const union = new Set([...words1, ...words2]);
return intersection.size / union.size;
}
/**
* Build final selection result object
*/
buildSelectionResult(corpuscles, normalizedParams, selectionCriteria, selectionTime) {
return {
corpuscles,
metadata: {
selectionTime,
parameters: normalizedParams,
criteria: this.criteriaBuilder.getSummary(selectionCriteria),
resultCount: corpuscles.length,
zoomLevel: normalizedParams.zoom.level,
tiltRepresentation: normalizedParams.tilt.representation,
hasFilters: normalizedParams._metadata.hasFilters,
complexity: normalizedParams._metadata.complexity,
timestamp: new Date().toISOString()
},
navigation: {
zoom: normalizedParams.zoom.level,
pan: normalizedParams.pan,
tilt: normalizedParams.tilt.representation
}
};
}
/**
* Cache management methods
*/
getCachedResult(cacheKey) {
if (!this.cache.has(cacheKey)) return null;
const cached = this.cache.get(cacheKey);
if (Date.now() - cached.timestamp > this.cacheExpiry) {
this.cache.delete(cacheKey);
return null;
}
return cached.result;
}
cacheResult(cacheKey, result) {
this.cache.set(cacheKey, {
result: JSON.parse(JSON.stringify(result)), // Deep copy
timestamp: Date.now()
});
// Cleanup old cache entries
if (this.cache.size > 100) {
const oldestKey = this.cache.keys().next().value;
this.cache.delete(oldestKey);
}
}
enrichCachedResult(cachedResult, normalizedParams) {
return {
...cachedResult,
metadata: {
...cachedResult.metadata,
fromCache: true,
parameters: normalizedParams,
timestamp: new Date().toISOString()
}
};
}
/**
* Update performance metrics
*/
updateMetrics(selectionTime) {
this.metrics.avgSelectionTime =
(this.metrics.avgSelectionTime * (this.metrics.totalSelections - 1) + selectionTime) /
this.metrics.totalSelections;
}
/**
* Get selector statistics
*/
getMetrics() {
return {
...this.metrics,
cacheSize: this.cache.size,
cacheHitRate: this.metrics.cacheHits / (this.metrics.cacheHits + this.metrics.cacheMisses)
};
}
/**
* Clear cache and reset metrics
*/
reset() {
this.cache.clear();
this.metrics = {
totalSelections: 0,
avgSelectionTime: 0,
cacheHits: 0,
cacheMisses: 0
};
}
/**
* Dispose of resources
*/
dispose() {
this.cache.clear();
logger.info('CorpuscleSelector disposed');
}
}