Source: api/features/WikipediaAPI.js

import BaseAPI from '../common/BaseAPI.js';
import { v4 as uuidv4 } from 'uuid';
import WikipediaSearch from '../../aux/wikipedia/Search.js';

/**
 * Wikipedia API handler for Wikipedia search and article retrieval
 * 
 * Provides endpoints for:
 * - Article search and retrieval
 * - Content ingestion to knowledge graph
 * - Article metadata extraction
 * - Batch processing
 */
export default class WikipediaAPI extends BaseAPI {
    constructor(config = {}) {
        super(config);
        this.memoryManager = null;
        
        // Wikipedia service instance
        this.wikipediaSearch = null;
        
        // Configuration
        this.defaultLimit = config.defaultLimit || 10;
        this.maxLimit = config.maxLimit || 50;
        this.requestTimeout = config.requestTimeout || 30000;
        this.defaultGraphURI = config.defaultGraphURI || 'http://purl.org/stuff/wikipedia';
        this.defaultNamespace = config.defaultNamespace || '0'; // Main articles
    }

    async initialize() {
        await super.initialize();
        
        // Get dependencies from registry
        const registry = this.config.registry;
        if (!registry) {
            throw new Error('Registry is required for WikipediaAPI');
        }
        
        try {
            this.memoryManager = registry.get('memory');
            
            // Get performance config for Wikipedia
            const performanceConfig = registry.get('config')?.get('performance.wikipedia') || {};
            
            // Initialize Wikipedia search service
            this.wikipediaSearch = new WikipediaSearch({
                sparqlEndpoint: registry.get('config')?.get('sparqlUpdateEndpoint') || 'http://localhost:3030/semem/update',
                sparqlAuth: registry.get('config')?.get('sparqlAuth') || { user: 'admin', password: 'admin123' },
                graphURI: this.defaultGraphURI,
                timeout: this.requestTimeout,
                defaultSearchLimit: performanceConfig.searchResultsLimit || 10,
                rateLimit: performanceConfig.rateLimit || 100
            });
            
            this.logger.info('WikipediaAPI initialized successfully');
        } catch (error) {
            this.logger.error('Failed to initialize WikipediaAPI:', error);
            throw error;
        }
    }

    /**
     * Execute a Wikipedia operation
     */
    async executeOperation(operation, params) {
        this._validateParams(params);
        
        const start = Date.now();
        const requestId = params.requestId || uuidv4();
        
        try {
            let result;
            
            switch (operation) {
                case 'search':
                    result = await this._executeSearch(params, requestId);
                    break;
                    
                case 'article':
                    result = await this._executeArticleLookup(params, requestId);
                    break;
                    
                case 'batch-search':
                    result = await this._executeBatchSearch(params, requestId);
                    break;
                    
                case 'ingest':
                    result = await this._executeIngest(params, requestId);
                    break;
                    
                case 'categories':
                    result = await this._executeCategorySearch(params, requestId);
                    break;
                    
                default:
                    throw new Error(`Unknown Wikipedia operation: ${operation}`);
            }
            
            const duration = Date.now() - start;
            
            return {
                success: true,
                operation,
                requestId,
                duration,
                result
            };
            
        } catch (error) {
            const duration = Date.now() - start;
            this.logger.error(`Wikipedia operation ${operation} failed:`, error);
            
            return {
                success: false,
                operation,
                requestId,
                duration,
                error: error.message
            };
        }
    }

    /**
     * Execute Wikipedia search
     */
    async _executeSearch(params, requestId) {
        const { 
            query, 
            limit = this.defaultLimit, 
            offset = 0, 
            namespace = this.defaultNamespace,
            ingestResults = false,
            language = 'en'
        } = params;
        
        if (!query) {
            throw new Error('Search query is required');
        }
        
        if (limit > this.maxLimit) {
            throw new Error(`Limit cannot exceed ${this.maxLimit}`);
        }
        
        this.logger.info(`[${requestId}] Searching Wikipedia for: "${query}" (limit: ${limit})`);
        
        const searchOptions = {
            limit,
            offset,
            namespace,
            format: 'json',
            ingestResults,
            language
        };
        
        const result = await this.wikipediaSearch.search(query, searchOptions);
        
        return {
            query,
            ...result,
            searchOptions,
            resultCount: result.results?.length || 0
        };
    }

    /**
     * Execute article lookup by title or page ID
     */
    async _executeArticleLookup(params, requestId) {
        const { 
            title, 
            pageId, 
            includeContent = true, 
            includeSummary = true,
            includeMetadata = true,
            ingestArticle = false,
            language = 'en'
        } = params;
        
        if (!title && !pageId) {
            throw new Error('Either title or pageId must be provided');
        }
        
        this.logger.info(`[${requestId}] Looking up Wikipedia article: ${title || `page ${pageId}`}`);
        
        try {
            const article = await this.wikipediaSearch.getArticle({
                title,
                pageId,
                includeContent,
                includeSummary,
                includeMetadata,
                language
            });
            
            // Optionally ingest the article into the knowledge graph
            if (ingestArticle && article) {
                const ingestionResult = await this.wikipediaSearch.ingestArticle(article);
                article.ingestionResult = ingestionResult;
            }
            
            return {
                article,
                lookupMethod: title ? 'title' : 'pageId',
                requestedTitle: title,
                requestedPageId: pageId
            };
        } catch (error) {
            throw new Error(`Article lookup failed: ${error.message}`);
        }
    }

    /**
     * Execute batch search for multiple queries
     */
    async _executeBatchSearch(params, requestId) {
        const { 
            queries, 
            limit = this.defaultLimit, 
            namespace = this.defaultNamespace,
            ingestResults = false,
            parallel = true,
            batchSize = 3
        } = params;
        
        if (!queries || !Array.isArray(queries) || queries.length === 0) {
            throw new Error('Queries array is required and must not be empty');
        }
        
        if (queries.length > 20) {
            throw new Error('Maximum 20 queries allowed per batch');
        }
        
        this.logger.info(`[${requestId}] Batch searching ${queries.length} queries (parallel: ${parallel})`);
        
        const results = [];
        
        if (parallel) {
            // Process in parallel batches
            for (let i = 0; i < queries.length; i += batchSize) {
                const batch = queries.slice(i, i + batchSize);
                const batchPromises = batch.map(async (query, index) => {
                    try {
                        const searchResult = await this.wikipediaSearch.search(query, {
                            limit,
                            namespace,
                            ingestResults,
                            format: 'json'
                        });
                        
                        return {
                            index: i + index,
                            query,
                            success: true,
                            result: searchResult,
                            resultCount: searchResult.results?.length || 0
                        };
                    } catch (error) {
                        this.logger.warn(`Batch search failed for query "${query}":`, error.message);
                        return {
                            index: i + index,
                            query,
                            success: false,
                            error: error.message
                        };
                    }
                });
                
                const batchResults = await Promise.all(batchPromises);
                results.push(...batchResults);
                
                // Brief pause between batches
                if (i + batchSize < queries.length) {
                    await new Promise(resolve => setTimeout(resolve, 500));
                }
            }
        } else {
            // Process sequentially
            for (let i = 0; i < queries.length; i++) {
                const query = queries[i];
                try {
                    const searchResult = await this.wikipediaSearch.search(query, {
                        limit,
                        namespace,
                        ingestResults,
                        format: 'json'
                    });
                    
                    results.push({
                        index: i,
                        query,
                        success: true,
                        result: searchResult,
                        resultCount: searchResult.results?.length || 0
                    });
                } catch (error) {
                    this.logger.warn(`Sequential search failed for query "${query}":`, error.message);
                    results.push({
                        index: i,
                        query,
                        success: false,
                        error: error.message
                    });
                }
            }
        }
        
        const summary = {
            totalQueries: queries.length,
            successfulQueries: results.filter(r => r.success).length,
            failedQueries: results.filter(r => !r.success).length,
            totalResults: results.reduce((sum, r) => sum + (r.resultCount || 0), 0)
        };
        
        return {
            results,
            summary,
            batchOptions: { parallel, batchSize }
        };
    }

    /**
     * Execute article ingestion to knowledge graph
     */
    async _executeIngest(params, requestId) {
        const { 
            articles, 
            titles, 
            pageIds,
            searchQueries,
            options = {}
        } = params;
        
        let articlesToIngest = [];
        
        if (articles) {
            articlesToIngest = Array.isArray(articles) ? articles : [articles];
        } else if (titles || pageIds || searchQueries) {
            // Fetch articles first
            if (titles) {
                const titleArray = Array.isArray(titles) ? titles : [titles];
                for (const title of titleArray) {
                    try {
                        const article = await this.wikipediaSearch.getArticle({ title });
                        if (article) articlesToIngest.push(article);
                    } catch (error) {
                        this.logger.warn(`Failed to fetch article "${title}":`, error.message);
                    }
                }
            }
            
            if (pageIds) {
                const pageIdArray = Array.isArray(pageIds) ? pageIds : [pageIds];
                for (const pageId of pageIdArray) {
                    try {
                        const article = await this.wikipediaSearch.getArticle({ pageId });
                        if (article) articlesToIngest.push(article);
                    } catch (error) {
                        this.logger.warn(`Failed to fetch article with ID ${pageId}:`, error.message);
                    }
                }
            }
            
            if (searchQueries) {
                const queryArray = Array.isArray(searchQueries) ? searchQueries : [searchQueries];
                for (const query of queryArray) {
                    try {
                        const searchResult = await this.wikipediaSearch.search(query, {
                            limit: options.searchLimit || 3,
                            ingestResults: false
                        });
                        
                        // Get articles for top search results
                        for (const result of searchResult.results || []) {
                            try {
                                const article = await this.wikipediaSearch.getArticle({ 
                                    title: result.title 
                                });
                                if (article) articlesToIngest.push(article);
                            } catch (error) {
                                this.logger.warn(`Failed to fetch search result "${result.title}":`, error.message);
                            }
                        }
                    } catch (error) {
                        this.logger.warn(`Search failed for query "${query}":`, error.message);
                    }
                }
            }
        } else {
            throw new Error('Must provide articles, titles, pageIds, or searchQueries');
        }
        
        if (articlesToIngest.length === 0) {
            throw new Error('No articles to ingest');
        }
        
        this.logger.info(`[${requestId}] Ingesting ${articlesToIngest.length} articles to knowledge graph`);
        
        const ingestionResults = [];
        
        for (const article of articlesToIngest) {
            try {
                const result = await this.wikipediaSearch.ingestArticle(article);
                ingestionResults.push({
                    article: article.title || article.pageId,
                    success: true,
                    result
                });
            } catch (error) {
                this.logger.warn(`Ingestion failed for article "${article.title}":`, error.message);
                ingestionResults.push({
                    article: article.title || article.pageId,
                    success: false,
                    error: error.message
                });
            }
        }
        
        const summary = {
            totalArticles: articlesToIngest.length,
            successfulIngestions: ingestionResults.filter(r => r.success).length,
            failedIngestions: ingestionResults.filter(r => !r.success).length
        };
        
        return {
            ingestionResults,
            summary
        };
    }

    /**
     * Execute category-based search
     */
    async _executeCategorySearch(params, requestId) {
        const { 
            category, 
            limit = this.defaultLimit,
            recursive = false,
            ingestResults = false
        } = params;
        
        if (!category) {
            throw new Error('Category is required');
        }
        
        this.logger.info(`[${requestId}] Searching Wikipedia category: "${category}"`);
        
        try {
            const result = await this.wikipediaSearch.searchCategory(category, {
                limit,
                recursive,
                ingestResults
            });
            
            return {
                category,
                ...result,
                resultCount: result.pages?.length || 0
            };
        } catch (error) {
            throw new Error(`Category search failed: ${error.message}`);
        }
    }

    /**
     * Validate required parameters
     */
    _validateParams(params) {
        if (!params || typeof params !== 'object') {
            throw new Error('Parameters object is required');
        }
    }

    /**
     * Get API endpoints configuration
     */
    getEndpoints() {
        return [
            {
                method: 'GET',
                path: '/wikipedia/search',
                operation: 'search',
                description: 'Search Wikipedia articles',
                schema: {
                    type: 'object',
                    properties: {
                        query: { type: 'string', description: 'Search query' },
                        limit: { type: 'number', default: 10, maximum: 50 },
                        offset: { type: 'number', default: 0 },
                        namespace: { type: 'string', default: '0' },
                        ingestResults: { type: 'boolean', default: false },
                        language: { type: 'string', default: 'en' }
                    },
                    required: ['query']
                }
            },
            {
                method: 'GET',
                path: '/wikipedia/article',
                operation: 'article',
                description: 'Get specific Wikipedia article by title or page ID',
                schema: {
                    type: 'object',
                    properties: {
                        title: { type: 'string', description: 'Article title' },
                        pageId: { type: 'number', description: 'Wikipedia page ID' },
                        includeContent: { type: 'boolean', default: true },
                        includeSummary: { type: 'boolean', default: true },
                        includeMetadata: { type: 'boolean', default: true },
                        ingestArticle: { type: 'boolean', default: false },
                        language: { type: 'string', default: 'en' }
                    },
                    anyOf: [
                        { required: ['title'] },
                        { required: ['pageId'] }
                    ]
                }
            },
            {
                method: 'POST',
                path: '/wikipedia/batch-search',
                operation: 'batch-search',
                description: 'Search multiple queries in batch',
                schema: {
                    type: 'object',
                    properties: {
                        queries: { 
                            type: 'array', 
                            items: { type: 'string' },
                            maxItems: 20,
                            description: 'Array of search queries'
                        },
                        limit: { type: 'number', default: 10, maximum: 50 },
                        namespace: { type: 'string', default: '0' },
                        ingestResults: { type: 'boolean', default: false },
                        parallel: { type: 'boolean', default: true },
                        batchSize: { type: 'number', default: 3 }
                    },
                    required: ['queries']
                }
            },
            {
                method: 'POST',
                path: '/wikipedia/ingest',
                operation: 'ingest',
                description: 'Ingest Wikipedia articles into knowledge graph',
                schema: {
                    type: 'object',
                    properties: {
                        articles: { type: 'array', description: 'Array of article objects' },
                        titles: { 
                            type: 'array', 
                            items: { type: 'string' },
                            description: 'Array of article titles to fetch and ingest'
                        },
                        pageIds: { 
                            type: 'array', 
                            items: { type: 'number' },
                            description: 'Array of page IDs to fetch and ingest'
                        },
                        searchQueries: { 
                            type: 'array', 
                            items: { type: 'string' },
                            description: 'Search queries to find and ingest articles'
                        },
                        options: {
                            type: 'object',
                            properties: {
                                searchLimit: { type: 'number', default: 3 }
                            }
                        }
                    },
                    anyOf: [
                        { required: ['articles'] },
                        { required: ['titles'] },
                        { required: ['pageIds'] },
                        { required: ['searchQueries'] }
                    ]
                }
            },
            {
                method: 'GET',
                path: '/wikipedia/categories',
                operation: 'categories',
                description: 'Search articles by Wikipedia category',
                schema: {
                    type: 'object',
                    properties: {
                        category: { type: 'string', description: 'Wikipedia category name' },
                        limit: { type: 'number', default: 10, maximum: 50 },
                        recursive: { type: 'boolean', default: false },
                        ingestResults: { type: 'boolean', default: false }
                    },
                    required: ['category']
                }
            }
        ];
    }
}