Source: zpt/parameters/FilterBuilder.js

/**
 * Builds SPARQL queries and filters from normalized ZPT parameters
 */
export default class FilterBuilder {
    constructor(options = {}) {
        this.graphName = options.graphName || 'http://example.org/ragno/memory';
        this.initializeNamespaces();
        this.initializeQueryTemplates();
    }

    /**
     * Initialize namespace prefixes for SPARQL queries
     */
    initializeNamespaces() {
        this.prefixes = `
            PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
            PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
            PREFIX owl: <http://www.w3.org/2002/07/owl#>
            PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
            PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
            PREFIX prov: <http://www.w3.org/ns/prov#>
            PREFIX dcterms: <http://purl.org/dc/terms/>
            PREFIX ragno: <http://purl.org/stuff/ragno/>
            PREFIX semem: <http://purl.org/stuff/semem/>
        `;
    }

    /**
     * Initialize SPARQL query templates for different zoom levels
     */
    initializeQueryTemplates() {
        this.templates = {
            entity: `
                SELECT DISTINCT ?uri ?label ?type ?prefLabel ?embedding ?metadata
                WHERE {
                    GRAPH <${this.graphName}> {
                        ?uri a ragno:Entity ;
                             rdfs:label ?label ;
                             rdf:type ?type .
                        OPTIONAL { ?uri skos:prefLabel ?prefLabel }
                        OPTIONAL { ?uri semem:embedding ?embedding }
                        OPTIONAL { ?uri semem:metadata ?metadata }
                        {{FILTERS}}
                    }
                }
                {{ORDER_BY}}
                {{LIMIT}}
            `,
            unit: `
                SELECT DISTINCT ?uri ?content ?entity ?unit ?embedding ?metadata
                WHERE {
                    GRAPH <${this.graphName}> {
                        ?uri a ragno:SemanticUnit ;
                             ragno:hasContent ?content .
                        OPTIONAL { ?uri ragno:relatedTo ?entity }
                        OPTIONAL { ?uri ragno:partOf ?unit }
                        OPTIONAL { ?uri semem:embedding ?embedding }
                        OPTIONAL { ?uri semem:metadata ?metadata }
                        {{FILTERS}}
                    }
                }
                {{ORDER_BY}}
                {{LIMIT}}
            `,
            text: `
                SELECT DISTINCT ?uri ?text ?source ?position ?embedding ?metadata
                WHERE {
                    GRAPH <${this.graphName}> {
                        ?uri a ragno:TextElement ;
                             ragno:hasText ?text .
                        OPTIONAL { ?uri ragno:sourceDocument ?source }
                        OPTIONAL { ?uri ragno:position ?position }
                        OPTIONAL { ?uri semem:embedding ?embedding }
                        OPTIONAL { ?uri semem:metadata ?metadata }
                        {{FILTERS}}
                    }
                }
                {{ORDER_BY}}
                {{LIMIT}}
            `,
            community: `
                SELECT DISTINCT ?uri ?label ?description ?members ?metadata
                WHERE {
                    GRAPH <${this.graphName}> {
                        ?uri a ragno:Community ;
                             rdfs:label ?label .
                        OPTIONAL { ?uri rdfs:comment ?description }
                        OPTIONAL { ?uri ragno:hasMember ?members }
                        OPTIONAL { ?uri semem:metadata ?metadata }
                        {{FILTERS}}
                    }
                }
                {{ORDER_BY}}
                {{LIMIT}}
            `,
            corpus: `
                SELECT DISTINCT ?uri ?label ?description ?created ?modified
                WHERE {
                    GRAPH <${this.graphName}> {
                        ?uri a ragno:Corpus ;
                             rdfs:label ?label .
                        OPTIONAL { ?uri rdfs:comment ?description }
                        OPTIONAL { ?uri dcterms:created ?created }
                        OPTIONAL { ?uri dcterms:modified ?modified }
                        {{FILTERS}}
                    }
                }
                {{ORDER_BY}}
                {{LIMIT}}
            `
        };
    }

    /**
     * Build complete SPARQL query from normalized parameters
     * @param {Object} normalizedParams - Normalized ZPT parameters
     * @returns {Object} Query configuration
     */
    buildQuery(normalizedParams) {
        const zoomLevel = normalizedParams.zoom.level;
        const template = this.templates[zoomLevel];
        
        if (!template) {
            throw new Error(`Unsupported zoom level: ${zoomLevel}`);
        }

        // Build filter clauses
        const filters = this.buildFilters(normalizedParams.pan);
        const orderBy = this.buildOrderBy(normalizedParams.tilt);
        const limit = this.buildLimit(normalizedParams.transform);

        // Substitute placeholders
        const query = this.prefixes + template
            .replace('{{FILTERS}}', filters)
            .replace('{{ORDER_BY}}', orderBy)
            .replace('{{LIMIT}}', limit);

        return {
            query,
            zoomLevel,
            filters: normalizedParams.pan,
            tilt: normalizedParams.tilt,
            metadata: {
                complexity: normalizedParams._metadata.complexity,
                estimatedResults: this.estimateResults(normalizedParams),
                cacheKey: this.buildCacheKey(normalizedParams)
            }
        };
    }

    /**
     * Build filter clauses from pan parameters
     */
    buildFilters(panParams) {
        const filterClauses = [];

        // Topic filter
        if (panParams.topic) {
            filterClauses.push(this.buildTopicFilter(panParams.topic));
        }

        // Entity filter
        if (panParams.entity) {
            filterClauses.push(this.buildEntityFilter(panParams.entity));
        }

        // Temporal filter
        if (panParams.temporal) {
            filterClauses.push(this.buildTemporalFilter(panParams.temporal));
        }

        // Geographic filter
        if (panParams.geographic) {
            filterClauses.push(this.buildGeographicFilter(panParams.geographic));
        }

        return filterClauses.length > 0 ? filterClauses.join(' ') : '';
    }

    /**
     * Build topic filter clause
     */
    buildTopicFilter(topicFilter) {
        const { value, pattern } = topicFilter;

        if (pattern === 'wildcard') {
            const regexPattern = value.replace(/\*/g, '.*');
            return `
                FILTER (REGEX(STR(?label), "${regexPattern}", "i") || 
                        REGEX(STR(?prefLabel), "${regexPattern}", "i"))
            `;
        } else {
            return `
                FILTER (CONTAINS(LCASE(STR(?label)), "${value}") || 
                        CONTAINS(LCASE(STR(?prefLabel)), "${value}"))
            `;
        }
    }

    /**
     * Build entity filter clause
     */
    buildEntityFilter(entityFilter) {
        const { values, type } = entityFilter;

        if (type === 'single') {
            return `FILTER (?uri = <${values[0]}> || ?entity = <${values[0]}>)`;
        } else {
            const uriList = values.map(v => `<${v}>`).join(', ');
            return `FILTER (?uri IN (${uriList}) || ?entity IN (${uriList}))`;
        }
    }

    /**
     * Build temporal filter clause
     */
    buildTemporalFilter(temporalFilter) {
        const filterClauses = [];

        if (temporalFilter.start) {
            filterClauses.push(`?uri dcterms:created ?created`);
            filterClauses.push(`FILTER (?created >= "${temporalFilter.start}"^^xsd:dateTime)`);
        }

        if (temporalFilter.end) {
            if (!temporalFilter.start) {
                filterClauses.push(`?uri dcterms:created ?created`);
            }
            filterClauses.push(`FILTER (?created <= "${temporalFilter.end}"^^xsd:dateTime)`);
        }

        return filterClauses.join(' . ');
    }

    /**
     * Build geographic filter clause
     */
    buildGeographicFilter(geographicFilter) {
        const filterClauses = [];

        // Add geographic properties to query
        filterClauses.push(`
            OPTIONAL { 
                ?uri ragno:hasLocation ?location .
                ?location ragno:latitude ?lat ;
                         ragno:longitude ?lon 
            }
        `);

        if (geographicFilter.bbox) {
            const { minLon, minLat, maxLon, maxLat } = geographicFilter.bbox;
            filterClauses.push(`
                FILTER (?lat >= ${minLat} && ?lat <= ${maxLat} && 
                        ?lon >= ${minLon} && ?lon <= ${maxLon})
            `);
        }

        if (geographicFilter.center && geographicFilter.radius) {
            const { lat, lon } = geographicFilter.center;
            const radius = geographicFilter.radius;
            // Approximate distance filter (not precise, but sufficient for basic filtering)
            filterClauses.push(`
                FILTER (ABS(?lat - ${lat}) <= ${radius/111} && 
                        ABS(?lon - ${lon}) <= ${radius/111})
            `);
        }

        return filterClauses.join(' ');
    }

    /**
     * Build ORDER BY clause based on tilt representation
     */
    buildOrderBy(tiltParams) {
        const { representation } = tiltParams;

        switch (representation) {
            case 'temporal':
                return 'ORDER BY DESC(?created)';
            case 'embedding':
                return 'ORDER BY ?uri'; // Will be overridden by similarity search
            case 'keywords':
                return 'ORDER BY ?label';
            case 'graph':
                return 'ORDER BY DESC(?uri)'; // Graph structure ordering
            default:
                return 'ORDER BY ?uri';
        }
    }

    /**
     * Build LIMIT clause based on transform parameters
     */
    buildLimit(transformParams) {
        // Estimate results based on token budget
        const tokenBudget = transformParams.tokenBudget.content;
        const estimatedTokensPerItem = 50; // Conservative estimate
        const maxResults = Math.min(
            Math.floor(tokenBudget / estimatedTokensPerItem),
            1000 // Hard limit
        );

        return `LIMIT ${maxResults}`;
    }

    /**
     * Build similarity search query for embedding-based tilt
     */
    buildSimilarityQuery(normalizedParams, queryEmbedding) {
        if (normalizedParams.tilt.representation !== 'embedding') {
            throw new Error('Similarity query only supported for embedding tilt');
        }

        const baseQuery = this.buildQuery(normalizedParams);
        
        // Add similarity computation
        const similarityQuery = `
            ${this.prefixes}
            SELECT ?uri ?label ?similarity ?embedding ?metadata
            WHERE {
                {
                    ${baseQuery.query.replace(this.prefixes, '').replace(/SELECT.*WHERE/, 'SELECT DISTINCT ?uri ?label ?embedding ?metadata WHERE')}
                }
                BIND(semem:cosineSimilarity(?embedding, "${queryEmbedding}") AS ?similarity)
                FILTER(?similarity > 0.1)
            }
            ORDER BY DESC(?similarity)
            LIMIT ${this.extractLimit(baseQuery.query)}
        `;

        return {
            ...baseQuery,
            query: similarityQuery,
            similarity: true,
            threshold: 0.1
        };
    }

    /**
     * Build aggregation query for community-level zoom
     */
    buildAggregationQuery(normalizedParams) {
        if (normalizedParams.zoom.level !== 'community') {
            return this.buildQuery(normalizedParams);
        }

        const aggregationQuery = `
            ${this.prefixes}
            SELECT ?community ?label ?memberCount ?avgSimilarity
            WHERE {
                GRAPH <${this.graphName}> {
                    ?community a ragno:Community ;
                               rdfs:label ?label .
                    {
                        SELECT ?community (COUNT(?member) AS ?memberCount) 
                               (AVG(?similarity) AS ?avgSimilarity)
                        WHERE {
                            ?community ragno:hasMember ?member .
                            OPTIONAL { ?member semem:similarity ?similarity }
                            ${this.buildFilters(normalizedParams.pan)}
                        }
                        GROUP BY ?community
                    }
                }
            }
            ORDER BY DESC(?memberCount)
            ${this.buildLimit(normalizedParams.transform)}
        `;

        return {
            query: aggregationQuery,
            zoomLevel: 'community',
            aggregated: true,
            filters: normalizedParams.pan
        };
    }

    /**
     * Extract LIMIT value from query string
     */
    extractLimit(query) {
        const limitMatch = query.match(/LIMIT\s+(\d+)/i);
        return limitMatch ? limitMatch[1] : '100';
    }

    /**
     * Estimate number of results based on complexity
     */
    estimateResults(normalizedParams) {
        const complexity = normalizedParams._metadata.complexity;
        const baseEstimate = 1000;
        
        // Reduce estimate based on filters
        const filterReduction = Object.keys(normalizedParams.pan).length * 0.3;
        return Math.floor(baseEstimate * (1 - filterReduction));
    }

    /**
     * Build cache key for query results
     */
    buildCacheKey(normalizedParams) {
        const keyData = {
            zoom: normalizedParams.zoom.level,
            pan: normalizedParams.pan,
            tilt: normalizedParams.tilt.representation,
            tokenBudget: normalizedParams.transform.maxTokens
        };
        
        return this.hashObject(keyData);
    }

    /**
     * Simple hash function for cache keys
     */
    hashObject(obj) {
        const str = JSON.stringify(obj, Object.keys(obj).sort());
        let hash = 0;
        for (let i = 0; i < str.length; i++) {
            const char = str.charCodeAt(i);
            hash = ((hash << 5) - hash) + char;
            hash = hash & hash;
        }
        return Math.abs(hash).toString(36);
    }

    /**
     * Validate that query can be executed
     */
    validateQuery(queryConfig) {
        if (!queryConfig.query || typeof queryConfig.query !== 'string') {
            throw new Error('Invalid query configuration: missing query string');
        }

        if (!queryConfig.zoomLevel) {
            throw new Error('Invalid query configuration: missing zoom level');
        }

        // Basic SPARQL syntax validation
        if (!queryConfig.query.includes('SELECT') || !queryConfig.query.includes('WHERE')) {
            throw new Error('Invalid SPARQL query: missing SELECT or WHERE clause');
        }

        return true;
    }

    /**
     * Get query statistics for optimization
     */
    getQueryStats(queryConfig) {
        const query = queryConfig.query;
        
        return {
            hasFilters: Object.keys(queryConfig.filters || {}).length > 0,
            hasOptionals: (query.match(/OPTIONAL/g) || []).length,
            hasRegex: query.includes('REGEX'),
            estimatedComplexity: queryConfig.metadata?.complexity || 1,
            estimatedResults: queryConfig.metadata?.estimatedResults || 100
        };
    }
}