/**
* CommunityDetection.js - Leiden algorithm implementation for Ragno knowledge graphs
*
* This module implements the Leiden algorithm for community detection in
* RDF-based knowledge graphs. The Leiden algorithm is an improvement over
* the Louvain algorithm that guarantees well-connected communities.
*
* Key Features:
* - Leiden algorithm implementation
* - Modularity optimization
* - Community quality metrics
* - RDF-aware community export
* - Integration with ragno ontology
*/
import { logger } from '../../Utils.js'
import rdf from 'rdf-ext'
export default class CommunityDetection {
constructor(options = {}) {
this.options = {
resolution: options.resolution || 1.0,
minCommunitySize: options.minCommunitySize || 3,
maxIterations: options.maxIterations || 100,
convergenceThreshold: options.convergenceThreshold || 1e-6,
randomSeed: options.randomSeed || Math.random(),
logProgress: options.logProgress || false,
...options
}
this.stats = {
lastClustering: null,
communityCount: 0,
modularity: 0,
iterations: 0
}
// Initialize random number generator with seed for reproducibility
this.random = this.seededRandom(this.options.randomSeed)
logger.debug('CommunityDetection initialized with Leiden algorithm')
}
/**
* Compute Leiden clustering (alias for detectCommunities for pipeline compatibility)
* @param {Object} graph - Graph representation from GraphAnalytics
* @param {Object} [options] - Algorithm options
* @returns {Object} Community detection results
*/
computeLeidenClustering(graph, options = {}) {
return this.detectCommunities(graph, options);
}
/**
* Compute Leiden clustering (alias for detectCommunities for pipeline compatibility)
* @param {Object} graph - Graph representation from GraphAnalytics
* @param {Object} [options] - Algorithm options
* @returns {Object} Community detection results
*/
computeLeidenClustering(graph, options = {}) {
return this.detectCommunities(graph, options);
}
/**
* Seeded random number generator for reproducible results
* @param {number} seed - Random seed
* @returns {Function} Random number generator function
*/
seededRandom(seed) {
let m = 0x80000000 // 2**31
let a = 1103515245
let c = 12345
seed = (seed % (m - 1)) + 1
return function () {
seed = (a * seed + c) % m
return seed / m
}
}
/**
* Run Leiden algorithm for community detection
* @param {Object} graph - Graph representation from GraphAnalytics
* @param {Object} [options] - Algorithm options
* @returns {Object} Community detection results
*/
detectCommunities(graph, options = {}) {
logger.info('Starting Leiden community detection...')
const opts = { ...this.options, ...options }
const nodes = Array.from(graph.nodes.keys())
const edges = Array.from(graph.edges.values())
// Initialize each node in its own community
let communities = new Map()
let nodeToCommId = new Map()
for (let i = 0; i < nodes.length; i++) {
communities.set(i, new Set([nodes[i]]))
nodeToCommId.set(nodes[i], i)
}
let currentModularity = this.calculateModularity(graph, nodeToCommId)
let iteration = 0
let improved = true
while (improved && iteration < opts.maxIterations) {
iteration++
if (opts.logProgress && iteration % 10 === 0) {
logger.info(`Leiden iteration ${iteration}, modularity: ${currentModularity.toFixed(4)}`)
}
// Phase 1: Local moving phase
const localResult = this.localMovingPhase(graph, nodeToCommId, opts)
nodeToCommId = localResult.nodeToCommId
// Phase 2: Refinement phase
const refinedResult = this.refinementPhase(graph, nodeToCommId, opts)
nodeToCommId = refinedResult.nodeToCommId
// Phase 3: Aggregation phase
const aggregatedGraph = this.aggregationPhase(graph, nodeToCommId)
// Calculate new modularity
const newModularity = this.calculateModularity(graph, nodeToCommId)
if (newModularity - currentModularity < opts.convergenceThreshold) {
improved = false
} else {
currentModularity = newModularity
graph = aggregatedGraph.graph
nodes = aggregatedGraph.nodeMapping
}
}
// Build final communities
communities = this.buildCommunities(nodeToCommId)
// Filter small communities
const filteredCommunities = this.filterSmallCommunities(communities, opts.minCommunitySize)
// Compute community statistics
const stats = this.computeCommunityStatistics(filteredCommunities, graph)
// Update internal statistics
this.stats.lastClustering = new Date()
this.stats.communityCount = filteredCommunities.size
this.stats.modularity = currentModularity
this.stats.iterations = iteration
logger.info(`Leiden algorithm completed: ${filteredCommunities.size} communities, modularity: ${currentModularity.toFixed(4)}`)
return {
communities: Array.from(filteredCommunities.values()),
nodeToCommId,
modularity: currentModularity,
iterations: iteration,
statistics: stats,
algorithm: 'leiden',
timestamp: new Date()
}
}
/**
* Local moving phase of Leiden algorithm
* @param {Object} graph - Graph representation
* @param {Map} nodeToCommId - Current node to community mapping
* @param {Object} options - Algorithm options
* @returns {Object} Updated node to community mapping
*/
localMovingPhase(graph, nodeToCommId, options) {
const nodes = Array.from(graph.nodes.keys())
let improved = true
let localIterations = 0
while (improved && localIterations < 50) {
improved = false
localIterations++
// Shuffle nodes for random order processing
const shuffledNodes = this.shuffleArray([...nodes])
for (const node of shuffledNodes) {
const currentComm = nodeToCommId.get(node)
const neighbors = Array.from(graph.adjacency.get(node) || [])
// Find neighboring communities
const neighborComms = new Set()
for (const neighbor of neighbors) {
neighborComms.add(nodeToCommId.get(neighbor))
}
let bestComm = currentComm
let bestGain = 0
// Try moving to each neighboring community
for (const targetComm of neighborComms) {
if (targetComm === currentComm) continue
const gain = this.calculateModularityGain(graph, node, currentComm, targetComm, nodeToCommId)
if (gain > bestGain) {
bestGain = gain
bestComm = targetComm
}
}
// Move node if beneficial
if (bestComm !== currentComm && bestGain > 0) {
nodeToCommId.set(node, bestComm)
improved = true
}
}
}
return { nodeToCommId }
}
/**
* Refinement phase to ensure well-connected communities
* @param {Object} graph - Graph representation
* @param {Map} nodeToCommId - Current node to community mapping
* @param {Object} options - Algorithm options
* @returns {Object} Refined node to community mapping
*/
refinementPhase(graph, nodeToCommId, options) {
const communities = this.buildCommunities(nodeToCommId)
const newNodeToCommId = new Map(nodeToCommId)
let nextCommId = Math.max(...nodeToCommId.values()) + 1
for (const [commId, nodes] of communities) {
if (nodes.size <= 1) continue
// Check if community is well-connected
const subgraph = this.extractSubgraph(graph, nodes)
const components = this.findConnectedComponents(subgraph)
if (components.length > 1) {
// Split into separate communities
for (let i = 1; i < components.length; i++) {
const component = components[i]
for (const node of component) {
newNodeToCommId.set(node, nextCommId)
}
nextCommId++
}
}
}
return { nodeToCommId: newNodeToCommId }
}
/**
* Aggregation phase to create meta-graph
* @param {Object} graph - Graph representation
* @param {Map} nodeToCommId - Current node to community mapping
* @returns {Object} Aggregated graph
*/
aggregationPhase(graph, nodeToCommId) {
const communities = this.buildCommunities(nodeToCommId)
const metaGraph = {
nodes: new Map(),
edges: new Map(),
adjacency: new Map()
}
const communityNodes = []
// Create meta-nodes for communities
for (const [commId, nodes] of communities) {
const metaNodeUri = `meta_community_${commId}`
metaGraph.nodes.set(metaNodeUri, {
uri: metaNodeUri,
type: 'meta_community',
originalNodes: nodes,
properties: new Map()
})
metaGraph.adjacency.set(metaNodeUri, new Set())
communityNodes.push(metaNodeUri)
}
// Create meta-edges between communities
const communityEdges = new Map()
for (const [edgeKey, edge] of graph.edges) {
const sourceComm = nodeToCommId.get(edge.source)
const targetComm = nodeToCommId.get(edge.target)
if (sourceComm !== targetComm) {
const metaEdgeKey = `meta_community_${sourceComm}->meta_community_${targetComm}`
if (!communityEdges.has(metaEdgeKey)) {
communityEdges.set(metaEdgeKey, {
source: `meta_community_${sourceComm}`,
target: `meta_community_${targetComm}`,
weight: 0,
originalEdges: []
})
}
const metaEdge = communityEdges.get(metaEdgeKey)
metaEdge.weight += edge.weight
metaEdge.originalEdges.push(edge)
metaGraph.adjacency.get(`meta_community_${sourceComm}`).add(`meta_community_${targetComm}`)
}
}
metaGraph.edges = communityEdges
return {
graph: metaGraph,
nodeMapping: communityNodes
}
}
/**
* Calculate modularity gain for moving a node between communities
* @param {Object} graph - Graph representation
* @param {string} node - Node to move
* @param {number} fromComm - Source community
* @param {number} toComm - Target community
* @param {Map} nodeToCommId - Current community assignment
* @returns {number} Modularity gain
*/
calculateModularityGain(graph, node, fromComm, toComm, nodeToCommId) {
// Simplified modularity gain calculation
// In practice, this would need full modularity computation
const nodeConnections = graph.adjacency.get(node) || new Set()
let toCommWeight = 0
let fromCommWeight = 0
for (const neighbor of nodeConnections) {
const neighborComm = nodeToCommId.get(neighbor)
const edge = graph.edges.get(`${node}->${neighbor}`) || graph.edges.get(`${neighbor}->${node}`)
const weight = edge ? edge.weight : 1.0
if (neighborComm === toComm) {
toCommWeight += weight
} else if (neighborComm === fromComm) {
fromCommWeight += weight
}
}
return (toCommWeight - fromCommWeight) * this.options.resolution
}
/**
* Calculate modularity of current community structure
* @param {Object} graph - Graph representation
* @param {Map} nodeToCommId - Community assignment
* @returns {number} Modularity value
*/
calculateModularity(graph, nodeToCommId) {
let modularity = 0
const totalWeight = Array.from(graph.edges.values()).reduce((sum, edge) => sum + edge.weight, 0)
if (totalWeight === 0) return 0
const communities = this.buildCommunities(nodeToCommId)
for (const [commId, nodes] of communities) {
let internalWeight = 0
let totalDegree = 0
// Calculate internal edges and total degree for this community
for (const node of nodes) {
const neighbors = graph.adjacency.get(node) || new Set()
for (const neighbor of neighbors) {
const edge = graph.edges.get(`${node}->${neighbor}`) || graph.edges.get(`${neighbor}->${node}`)
const weight = edge ? edge.weight : 1.0
totalDegree += weight
if (nodes.has(neighbor)) {
internalWeight += weight
}
}
}
internalWeight /= 2 // Each internal edge counted twice
const expectedInternal = (totalDegree * totalDegree) / (4 * totalWeight)
modularity += (internalWeight / totalWeight) - this.options.resolution * expectedInternal
}
return modularity
}
/**
* Build communities map from node assignments
* @param {Map} nodeToCommId - Node to community mapping
* @returns {Map} Community ID to nodes mapping
*/
buildCommunities(nodeToCommId) {
const communities = new Map()
for (const [node, commId] of nodeToCommId) {
if (!communities.has(commId)) {
communities.set(commId, new Set())
}
communities.get(commId).add(node)
}
return communities
}
/**
* Filter out communities smaller than minimum size
* @param {Map} communities - Community mapping
* @param {number} minSize - Minimum community size
* @returns {Map} Filtered communities
*/
filterSmallCommunities(communities, minSize) {
const filtered = new Map()
let newCommId = 0
for (const [commId, nodes] of communities) {
if (nodes.size >= minSize) {
filtered.set(newCommId, nodes)
newCommId++
}
}
return filtered
}
/**
* Extract subgraph containing only specified nodes
* @param {Object} graph - Original graph
* @param {Set} nodes - Nodes to include
* @returns {Object} Subgraph
*/
extractSubgraph(graph, nodes) {
const subgraph = {
nodes: new Map(),
edges: new Map(),
adjacency: new Map()
}
// Add nodes
for (const node of nodes) {
if (graph.nodes.has(node)) {
subgraph.nodes.set(node, graph.nodes.get(node))
subgraph.adjacency.set(node, new Set())
}
}
// Add edges between included nodes
for (const [edgeKey, edge] of graph.edges) {
if (nodes.has(edge.source) && nodes.has(edge.target)) {
subgraph.edges.set(edgeKey, edge)
subgraph.adjacency.get(edge.source).add(edge.target)
}
}
return subgraph
}
/**
* Find connected components in a subgraph
* @param {Object} subgraph - Subgraph to analyze
* @returns {Array} Array of connected components
*/
findConnectedComponents(subgraph) {
const visited = new Set()
const components = []
const dfs = (startNode) => {
const stack = [startNode]
const component = []
while (stack.length > 0) {
const node = stack.pop()
if (visited.has(node)) continue
visited.add(node)
component.push(node)
const neighbors = subgraph.adjacency.get(node) || new Set()
for (const neighbor of neighbors) {
if (!visited.has(neighbor)) {
stack.push(neighbor)
}
}
}
return component
}
for (const node of subgraph.nodes.keys()) {
if (!visited.has(node)) {
const component = dfs(node)
if (component.length > 0) {
components.push(component)
}
}
}
return components
}
/**
* Compute statistics for detected communities
* @param {Map} communities - Community mapping
* @param {Object} graph - Original graph
* @returns {Object} Community statistics
*/
computeCommunityStatistics(communities, graph) {
const sizes = []
let totalInternalEdges = 0
let totalExternalEdges = 0
for (const [commId, nodes] of communities) {
sizes.push(nodes.size)
// Count internal vs external edges for this community
for (const node of nodes) {
const neighbors = graph.adjacency.get(node) || new Set()
for (const neighbor of neighbors) {
if (nodes.has(neighbor)) {
totalInternalEdges++
} else {
totalExternalEdges++
}
}
}
}
totalInternalEdges /= 2 // Each edge counted twice
const avgSize = sizes.length > 0 ? sizes.reduce((a, b) => a + b, 0) / sizes.length : 0
const maxSize = sizes.length > 0 ? Math.max(...sizes) : 0
const minSize = sizes.length > 0 ? Math.min(...sizes) : 0
return {
communityCount: communities.size,
avgSize,
maxSize,
minSize,
totalInternalEdges,
totalExternalEdges,
internalRatio: totalInternalEdges / (totalInternalEdges + totalExternalEdges)
}
}
/**
* Shuffle array in place
* @param {Array} array - Array to shuffle
* @returns {Array} Shuffled array
*/
shuffleArray(array) {
for (let i = array.length - 1; i > 0; i--) {
const j = Math.floor(this.random() * (i + 1))
;[array[i], array[j]] = [array[j], array[i]]
}
return array
}
/**
* Export communities to RDF format
* @param {Object} results - Community detection results
* @param {Dataset} targetDataset - Target RDF dataset
*/
exportCommunitiesToRDF(results, targetDataset) {
logger.info('Exporting communities to RDF...')
const analysisUri = `http://example.org/ragno/community-analysis/${Date.now()}`
const analysisNode = rdf.namedNode(analysisUri)
// Add analysis metadata
targetDataset.add(rdf.quad(
analysisNode,
rdf.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
rdf.namedNode('http://purl.org/stuff/ragno/CommunityAnalysis')
))
targetDataset.add(rdf.quad(
analysisNode,
rdf.namedNode('http://purl.org/stuff/ragno/algorithm'),
rdf.literal(results.algorithm)
))
targetDataset.add(rdf.quad(
analysisNode,
rdf.namedNode('http://purl.org/stuff/ragno/modularity'),
rdf.literal(results.modularity)
))
// Export communities
for (let commId = 0; commId < results.communities.length; commId++) {
const nodes = results.communities[commId]
const communityUri = `http://example.org/ragno/community/${commId}`
const communityNode = rdf.namedNode(communityUri)
targetDataset.add(rdf.quad(
communityNode,
rdf.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
rdf.namedNode('http://purl.org/stuff/ragno/Community')
))
targetDataset.add(rdf.quad(
communityNode,
rdf.namedNode('http://purl.org/stuff/ragno/hasSize'),
rdf.literal(nodes.size)
))
// Add community membership
for (const nodeUri of nodes) {
targetDataset.add(rdf.quad(
rdf.namedNode(nodeUri),
rdf.namedNode('http://purl.org/stuff/ragno/inCommunity'),
communityNode
))
}
}
logger.info('Communities exported to RDF')
}
/**
* Get current statistics
* @returns {Object} Current clustering statistics
*/
getStatistics() {
return { ...this.stats }
}
/**
* Build a graph from an RDF dataset (delegates to GraphAnalytics)
* @param {Dataset} dataset - RDF-Ext dataset
* @param {Object} [options] - Graph construction options
* @returns {Object} Graph representation
*/
async buildGraphFromRDF(dataset, options = {}) {
if (!this.graphAnalytics) {
const GraphAnalytics = (await import('./GraphAnalytics.js')).default;
this.graphAnalytics = new GraphAnalytics(this.options);
}
return this.graphAnalytics.buildGraphFromRDF(dataset, options);
}
}