JSDoc: Source: aux/ResearchService.js

Source: aux/ResearchService.js
/**
 * ResearchService - Unified interface for Wikipedia and Wikidata research
 * 
 * This service provides a simplified API for conducting research using both
 * Wikipedia and Wikidata sources. It can be used independently or as part
 * of the HTTP API through ResearchAPI.
 * 
 * Features:
 * - Concept extraction and research
 * - Entity discovery and lookup
 * - Wikipedia article search
 * - Combined research workflows
 * - Knowledge graph storage
 */

import WikidataResearcher from './wikidata/WikidataResearcher.js';
import WikipediaSearch from './wikipedia/Search.js';
import WikidataSearch from './wikidata/WikidataSearch.js';
import WikidataConnector from './wikidata/WikidataConnector.js';
import logger from 'loglevel';

export default class ResearchService {
    constructor(options = {}) {
        this.options = {
            sparqlEndpoint: options.sparqlEndpoint || 'http://localhost:3030/semem/update',
            sparqlAuth: options.sparqlAuth || { user: 'admin', password: 'admin123' },
            defaultGraphURI: options.defaultGraphURI || 'http://purl.org/stuff/research',
            maxEntitiesPerConcept: options.maxEntitiesPerConcept || 3,
            maxSearchResults: options.maxSearchResults || 15,
            minConfidence: options.minConfidence || 0.4,
            timeout: options.timeout || 30000,
            ...options
        };
        
        // Initialize service instances
        this.wikidataResearcher = new WikidataResearcher();
        this.wikipediaSearch = new WikipediaSearch({
            sparqlEndpoint: this.options.sparqlEndpoint,
            sparqlAuth: this.options.sparqlAuth,
            graphURI: this.options.defaultGraphURI,
            timeout: this.options.timeout
        });
        this.wikidataSearch = new WikidataSearch();
        this.wikidataConnector = new WikidataConnector();
        
        // Statistics
        this.stats = {
            totalResearches: 0,
            wikidataQueries: 0,
            wikipediaQueries: 0,
            entitiesDiscovered: 0,
            conceptsExtracted: 0
        };
        
        logger.info('ResearchService initialized');
    }

    /**
     * Research concepts using Wikidata
     * 
     * @param {Object} input - Research input
     * @param {string} input.question - Question to research
     * @param {Array<string>} input.concepts - Pre-extracted concepts (optional)
     * @param {Object} resources - Required resources (llmHandler, sparqlHelper, config)
     * @param {Object} options - Research options
     * @returns {Promise<Object>} Research results
     */
    async researchConcepts(input, resources, options = {}) {
        this.stats.totalResearches++;
        this.stats.wikidataQueries++;
        
        const researchOptions = {
            maxEntitiesPerConcept: options.maxEntitiesPerConcept || this.options.maxEntitiesPerConcept,
            maxWikidataSearchResults: options.maxSearchResults || this.options.maxSearchResults,
            minEntityConfidence: options.minConfidence || this.options.minConfidence,
            enableHierarchySearch: options.enableHierarchySearch !== false,
            storeResults: options.storeResults !== false,
            ...options
        };
        
        logger.info(`Researching concepts for: "${input.question || 'provided concepts'}"`);
        
        const result = await this.wikidataResearcher.executeResearch(input, resources, researchOptions);
        
        this.stats.entitiesDiscovered += result.statistics?.entitiesFound || 0;
        
        return result;
    }

    /**
     * Search Wikipedia articles
     * 
     * @param {string} query - Search query
     * @param {Object} options - Search options
     * @returns {Promise<Object>} Search results
     */
    async searchWikipedia(query, options = {}) {
        this.stats.wikipediaQueries++;
        
        const searchOptions = {
            limit: options.limit || 10,
            offset: options.offset || 0,
            namespace: options.namespace || '0',
            format: options.format || 'json',
            ingestResults: options.ingestResults !== false,
            ...options
        };
        
        logger.info(`Searching Wikipedia for: "${query}"`);
        
        return await this.wikipediaSearch.search(query, searchOptions);
    }

    /**
     * Combined research using both Wikidata and Wikipedia
     * 
     * @param {string} question - Research question
     * @param {Object} resources - Required resources (llmHandler, sparqlHelper, config)
     * @param {Object} options - Research options
     * @returns {Promise<Object>} Combined research results
     */
    async combinedResearch(question, resources, options = {}) {
        this.stats.totalResearches++;
        
        logger.info(`Starting combined research for: "${question}"`);
        
        const results = {};
        
        // Step 1: Wikidata research
        try {
            results.wikidata = await this.researchConcepts(
                { question },
                resources,
                {
                    ...options,
                    storeResults: options.storeWikidataResults !== false
                }
            );
        } catch (error) {
            logger.warn('Wikidata research failed:', error.message);
            results.wikidata = { error: error.message };
        }
        
        // Step 2: Wikipedia search
        try {
            results.wikipedia = await this.searchWikipedia(question, {
                limit: options.wikipediaLimit || 5,
                ingestResults: options.storeWikipediaResults !== false
            });
        } catch (error) {
            logger.warn('Wikipedia search failed:', error.message);
            results.wikipedia = { error: error.message };
        }
        
        // Generate summary
        results.summary = {
            entitiesFound: results.wikidata?.statistics?.entitiesFound || 0,
            wikipediaArticles: results.wikipedia?.results?.length || 0,
            totalSources: (results.wikidata?.statistics?.entitiesFound || 0) + 
                         (results.wikipedia?.results?.length || 0),
            hasWikidataResults: !results.wikidata?.error,
            hasWikipediaResults: !results.wikipedia?.error
        };
        
        return results;
    }

    /**
     * Look up a specific entity by ID or name
     * 
     * @param {Object} params - Lookup parameters
     * @param {string} params.entityId - Wikidata entity ID (e.g., Q42)
     * @param {string} params.entityName - Entity name to search for
     * @param {string} params.language - Language code (default: 'en')
     * @returns {Promise<Object>} Entity details
     */
    async lookupEntity({ entityId, entityName, language = 'en' }) {
        if (!entityId && !entityName) {
            throw new Error('Either entityId or entityName must be provided');
        }
        
        logger.info(`Looking up entity: ${entityId || entityName}`);
        
        if (entityId) {
            // Direct lookup by ID
            return await this.wikidataConnector.getEntityDetails(entityId, { language });
        } else {
            // Search by name first
            const searchResults = await this.wikidataSearch.searchEntities(entityName, {
                language,
                limit: 1
            });
            
            if (searchResults.length === 0) {
                throw new Error(`No entity found with name: ${entityName}`);
            }
            
            const foundEntityId = searchResults[0].id;
            return await this.wikidataConnector.getEntityDetails(foundEntityId, { language });
        }
    }

    /**
     * Extract concepts from text and optionally research them
     * 
     * @param {string} text - Text to analyze
     * @param {Object} resources - Required resources (llmHandler)
     * @param {Object} options - Processing options
     * @returns {Promise<Object>} Concept extraction and research results
     */
    async extractAndResearchConcepts(text, resources, options = {}) {
        const { llmHandler } = resources;
        
        if (!llmHandler) {
            throw new Error('LLM handler is required for concept extraction');
        }
        
        logger.info(`Extracting concepts from text (${text.length} chars)`);
        
        // Extract concepts
        const concepts = await llmHandler.extractConcepts(text);
        this.stats.conceptsExtracted += concepts.length;
        
        const result = {
            concepts,
            summary: {
                conceptsExtracted: concepts.length
            }
        };
        
        // Optionally research concepts using Wikidata
        if (options.searchWikidata !== false && concepts.length > 0) {
            try {
                result.wikidataResults = await this.researchConcepts(
                    { concepts },
                    resources,
                    {
                        maxEntitiesPerConcept: options.maxEntitiesPerConcept || 2,
                        storeResults: options.storeResults !== false
                    }
                );
                
                result.summary.entitiesFound = result.wikidataResults.statistics?.entitiesFound || 0;
            } catch (error) {
                logger.warn('Concept research failed:', error.message);
                result.wikidataResults = { error: error.message };
            }
        }
        
        return result;
    }

    /**
     * Batch research multiple questions
     * 
     * @param {Array<string>} questions - Questions to research
     * @param {Object} resources - Required resources
     * @param {Object} options - Research options
     * @returns {Promise<Array>} Array of research results
     */
    async batchResearch(questions, resources, options = {}) {
        logger.info(`Starting batch research for ${questions.length} questions`);
        
        const results = [];
        const batchSize = options.batchSize || 3;
        const useParallel = options.parallel !== false;
        
        if (useParallel) {
            // Process in parallel batches
            for (let i = 0; i < questions.length; i += batchSize) {
                const batch = questions.slice(i, i + batchSize);
                const batchPromises = batch.map(async (question, index) => {
                    try {
                        const result = await this.combinedResearch(question, resources, options);
                        return { 
                            index: i + index, 
                            question, 
                            success: true, 
                            result 
                        };
                    } catch (error) {
                        logger.warn(`Batch research failed for question ${i + index}:`, error.message);
                        return { 
                            index: i + index, 
                            question, 
                            success: false, 
                            error: error.message 
                        };
                    }
                });
                
                const batchResults = await Promise.all(batchPromises);
                results.push(...batchResults);
                
                // Brief pause between batches to avoid overwhelming services
                if (i + batchSize < questions.length) {
                    await new Promise(resolve => setTimeout(resolve, 1000));
                }
            }
        } else {
            // Process sequentially
            for (let i = 0; i < questions.length; i++) {
                const question = questions[i];
                try {
                    const result = await this.combinedResearch(question, resources, options);
                    results.push({ 
                        index: i, 
                        question, 
                        success: true, 
                        result 
                    });
                } catch (error) {
                    logger.warn(`Sequential research failed for question ${i}:`, error.message);
                    results.push({ 
                        index: i, 
                        question, 
                        success: false, 
                        error: error.message 
                    });
                }
            }
        }
        
        return results;
    }

    /**
     * Get service statistics
     */
    getStats() {
        return {
            ...this.stats,
            averageEntitiesPerQuery: this.stats.wikidataQueries > 0 ? 
                (this.stats.entitiesDiscovered / this.stats.wikidataQueries).toFixed(2) : 0
        };
    }

    /**
     * Reset statistics
     */
    resetStats() {
        this.stats = {
            totalResearches: 0,
            wikidataQueries: 0,
            wikipediaQueries: 0,
            entitiesDiscovered: 0,
            conceptsExtracted: 0
        };
    }

    /**
     * Update configuration
     */
    updateConfig(newOptions) {
        this.options = {
            ...this.options,
            ...newOptions
        };
        
        // Update Wikipedia search configuration
        this.wikipediaSearch.options = {
            ...this.wikipediaSearch.options,
            sparqlEndpoint: this.options.sparqlEndpoint,
            sparqlAuth: this.options.sparqlAuth,
            graphURI: this.options.defaultGraphURI,
            timeout: this.options.timeout
        };
        
        logger.info('ResearchService configuration updated');
    }
}