//! Operation log for anti-entropy and partition recovery //! //! This module maintains a bounded log of recent operations for each entity, //! enabling peers to request missing deltas after network partitions or when //! they join late. //! //! The operation log: //! - Stores EntityDelta messages for recent operations //! - Bounded by time (keep operations from last N minutes) or size (max M ops) //! - Allows peers to request operations newer than their vector clock //! - Supports periodic anti-entropy sync to repair partitions use std::collections::{ HashMap, VecDeque, }; use bevy::prelude::*; use crate::networking::{ GossipBridge, NodeVectorClock, messages::{ EntityDelta, SyncMessage, VersionedMessage, }, vector_clock::{ NodeId, VectorClock, }, }; /// Maximum operations to keep per entity (prevents unbounded growth) const MAX_OPS_PER_ENTITY: usize = 100; /// Maximum age for operations (in seconds) const MAX_OP_AGE_SECS: u64 = 300; // 5 minutes /// Maximum number of entities to track (prevents unbounded growth) const MAX_ENTITIES: usize = 10_000; /// Operation log entry with timestamp #[derive(Debug, Clone)] struct LogEntry { /// The entity delta operation delta: EntityDelta, /// When this operation was created (for pruning old ops) timestamp: std::time::Instant, } /// Resource storing the operation log for all entities /// /// This is used for anti-entropy - peers can request operations they're missing /// by comparing vector clocks. /// /// # Bounded Growth /// /// The operation log is bounded in three ways: /// - Max operations per entity: `MAX_OPS_PER_ENTITY` (100) /// - Max operation age: `MAX_OP_AGE_SECS` (300 seconds / 5 minutes) /// - Max entities: `MAX_ENTITIES` (10,000) /// /// When limits are exceeded, oldest operations/entities are pruned /// automatically. #[derive(Resource)] pub struct OperationLog { /// Map from entity ID to list of recent operations logs: HashMap>, /// Total number of operations across all entities (for monitoring) total_ops: usize, } impl OperationLog { /// Create a new operation log pub fn new() -> Self { Self { logs: HashMap::new(), total_ops: 0, } } /// Record an operation in the log /// /// This should be called whenever we generate or apply an EntityDelta. /// /// # Example /// /// ``` /// use lib::networking::{ /// EntityDelta, /// OperationLog, /// VectorClock, /// }; /// use uuid::Uuid; /// /// let mut log = OperationLog::new(); /// let entity_id = Uuid::new_v4(); /// let node_id = Uuid::new_v4(); /// let clock = VectorClock::new(); /// /// let delta = EntityDelta::new(entity_id, node_id, clock, vec![]); /// log.record_operation(delta); /// ``` pub fn record_operation(&mut self, delta: EntityDelta) { // Check if we're at the entity limit if self.logs.len() >= MAX_ENTITIES && !self.logs.contains_key(&delta.entity_id) { // Prune oldest entity (by finding entity with oldest operation) if let Some(oldest_entity_id) = self.find_oldest_entity() { warn!( "Operation log at entity limit ({}), pruning oldest entity {:?}", MAX_ENTITIES, oldest_entity_id ); if let Some(removed_log) = self.logs.remove(&oldest_entity_id) { self.total_ops = self.total_ops.saturating_sub(removed_log.len()); } } } let entry = LogEntry { delta: delta.clone(), timestamp: std::time::Instant::now(), }; let log = self .logs .entry(delta.entity_id) .or_insert_with(VecDeque::new); log.push_back(entry); self.total_ops += 1; // Prune if we exceed max ops per entity while log.len() > MAX_OPS_PER_ENTITY { log.pop_front(); self.total_ops = self.total_ops.saturating_sub(1); } } /// Find the entity with the oldest operation (for LRU eviction) fn find_oldest_entity(&self) -> Option { self.logs .iter() .filter_map(|(entity_id, log)| log.front().map(|entry| (*entity_id, entry.timestamp))) .min_by_key(|(_, timestamp)| *timestamp) .map(|(entity_id, _)| entity_id) } /// Get operations for an entity that are newer than a given vector clock /// /// This is used to respond to SyncRequest messages. pub fn get_operations_newer_than( &self, entity_id: uuid::Uuid, their_clock: &VectorClock, ) -> Vec { let Some(log) = self.logs.get(&entity_id) else { return vec![]; }; log.iter() .filter(|entry| { // Include operation if they haven't seen it yet // (their clock happened before the operation's clock) their_clock.happened_before(&entry.delta.vector_clock) }) .map(|entry| entry.delta.clone()) .collect() } /// Get all operations newer than a vector clock across all entities /// /// This is used to respond to SyncRequest for the entire world state. pub fn get_all_operations_newer_than(&self, their_clock: &VectorClock) -> Vec { let mut deltas = Vec::new(); for (entity_id, _log) in &self.logs { let entity_deltas = self.get_operations_newer_than(*entity_id, their_clock); deltas.extend(entity_deltas); } deltas } /// Prune old operations from the log /// /// This should be called periodically to prevent unbounded growth. /// Removes operations older than MAX_OP_AGE_SECS. pub fn prune_old_operations(&mut self) { let max_age = std::time::Duration::from_secs(MAX_OP_AGE_SECS); let now = std::time::Instant::now(); let mut pruned_count = 0; for log in self.logs.values_mut() { let before_len = log.len(); log.retain(|entry| now.duration_since(entry.timestamp) < max_age); pruned_count += before_len - log.len(); } // Update total_ops counter self.total_ops = self.total_ops.saturating_sub(pruned_count); // Remove empty logs self.logs.retain(|_, log| !log.is_empty()); } /// Get the number of operations in the log pub fn total_operations(&self) -> usize { self.total_ops } /// Get the number of entities with logged operations pub fn num_entities(&self) -> usize { self.logs.len() } } impl Default for OperationLog { fn default() -> Self { Self::new() } } /// Build a SyncRequest message /// /// This asks peers to send us any operations we're missing. /// /// # Example /// /// ``` /// use lib::networking::{ /// VectorClock, /// build_sync_request, /// }; /// use uuid::Uuid; /// /// let node_id = Uuid::new_v4(); /// let clock = VectorClock::new(); /// let request = build_sync_request(node_id, clock); /// ``` pub fn build_sync_request(node_id: NodeId, vector_clock: VectorClock) -> VersionedMessage { VersionedMessage::new(SyncMessage::SyncRequest { node_id, vector_clock, }) } /// Build a MissingDeltas response /// /// This contains operations that the requesting peer is missing. pub fn build_missing_deltas(deltas: Vec) -> VersionedMessage { VersionedMessage::new(SyncMessage::MissingDeltas { deltas }) } /// System to handle SyncRequest messages /// /// When we receive a SyncRequest, compare vector clocks and send any /// operations the peer is missing. /// /// Add this to your app: /// /// ```no_run /// use bevy::prelude::*; /// use lib::networking::handle_sync_requests_system; /// /// App::new().add_systems(Update, handle_sync_requests_system); /// ``` pub fn handle_sync_requests_system( bridge: Option>, operation_log: Res, ) { let Some(bridge) = bridge else { return; }; // Poll for SyncRequest messages while let Some(message) = bridge.try_recv() { match message.message { | SyncMessage::SyncRequest { node_id: requesting_node, vector_clock: their_clock, } => { debug!("Received SyncRequest from node {}", requesting_node); // Find operations they're missing let missing_deltas = operation_log.get_all_operations_newer_than(&their_clock); if !missing_deltas.is_empty() { info!( "Sending {} missing deltas to node {}", missing_deltas.len(), requesting_node ); // Send MissingDeltas response let response = build_missing_deltas(missing_deltas); if let Err(e) = bridge.send(response) { error!("Failed to send MissingDeltas: {}", e); } } else { debug!("No missing deltas for node {}", requesting_node); } }, | _ => { // Not a SyncRequest, ignore }, } } } /// System to handle MissingDeltas messages /// /// When we receive MissingDeltas (in response to our SyncRequest), apply them. pub fn handle_missing_deltas_system(world: &mut World) { // Check if bridge exists if world.get_resource::().is_none() { return; } // Clone the bridge to avoid borrowing issues let bridge = world.resource::().clone(); // Poll for MissingDeltas messages while let Some(message) = bridge.try_recv() { match message.message { | SyncMessage::MissingDeltas { deltas } => { info!("Received MissingDeltas with {} operations", deltas.len()); // Apply each delta for delta in deltas { debug!("Applying missing delta for entity {:?}", delta.entity_id); crate::networking::apply_entity_delta(&delta, world); } }, | _ => { // Not MissingDeltas, ignore }, } } } /// System to periodically send SyncRequest for anti-entropy /// /// This runs every N seconds to request any operations we might be missing, /// helping to repair network partitions. /// /// **NOTE:** This is a simple timer-based implementation. Phase 14 will add /// adaptive sync intervals based on network conditions. pub fn periodic_sync_system( bridge: Option>, node_clock: Res, time: Res