//! Bevy systems for the persistence layer //! //! This module provides systems that integrate the persistence layer with Bevy's ECS. //! These systems handle dirty tracking, write buffering, and flushing to SQLite. use crate::persistence::*; use crate::persistence::error::Result; use bevy::prelude::*; use bevy::tasks::{IoTaskPool, Task}; use futures_lite::future; use rusqlite::Connection; use std::sync::{Arc, Mutex}; use std::time::Instant; /// Resource wrapping the SQLite connection #[derive(Clone, bevy::prelude::Resource)] pub struct PersistenceDb { pub conn: Arc>, } impl PersistenceDb { pub fn new(conn: Connection) -> Self { Self { conn: Arc::new(Mutex::new(conn)), } } pub fn from_path(path: impl AsRef) -> Result { let conn = initialize_persistence_db(path)?; Ok(Self::new(conn)) } pub fn in_memory() -> Result { let conn = Connection::open_in_memory()?; configure_sqlite_for_persistence(&conn)?; create_persistence_schema(&conn)?; Ok(Self::new(conn)) } /// Acquire the database connection with proper error handling /// /// Handles mutex poisoning gracefully by converting to PersistenceError. /// If a thread panics while holding the mutex, subsequent lock attempts /// will fail with a poisoned error, which this method converts to a /// recoverable error instead of panicking. /// /// # Returns /// - `Ok(MutexGuard)`: Locked connection ready for use /// - `Err(PersistenceError)`: If mutex is poisoned pub fn lock(&self) -> Result> { self.conn.lock() .map_err(|e| PersistenceError::Other(format!("Database connection mutex poisoned: {}", e))) } } /// Resource for tracking when the last checkpoint occurred #[derive(Debug, bevy::prelude::Resource)] pub struct CheckpointTimer { pub last_checkpoint: Instant, } impl Default for CheckpointTimer { fn default() -> Self { Self { last_checkpoint: Instant::now(), } } } /// Resource for tracking pending async flush tasks #[derive(Default, bevy::prelude::Resource)] pub struct PendingFlushTasks { pub tasks: Vec>>, } /// Result of an async flush operation #[derive(Debug, Clone)] pub struct FlushResult { pub operations_count: usize, pub duration: std::time::Duration, pub bytes_written: u64, } /// Helper function to calculate total bytes written from operations fn calculate_bytes_written(ops: &[PersistenceOp]) -> u64 { ops.iter() .map(|op| match op { PersistenceOp::UpsertComponent { data, .. } => data.len() as u64, PersistenceOp::LogOperation { operation, .. } => operation.len() as u64, _ => 0, }) .sum() } /// Helper function to perform a flush with metrics tracking (synchronous) /// /// Used for critical operations like shutdown where we need to block fn perform_flush_sync( ops: &[PersistenceOp], db: &PersistenceDb, metrics: &mut PersistenceMetrics, ) -> Result<()> { if ops.is_empty() { return Ok(()); } let start = Instant::now(); let count = { let mut conn = db.lock()?; flush_to_sqlite(ops, &mut conn)? }; let duration = start.elapsed(); let bytes_written = calculate_bytes_written(ops); metrics.record_flush(count, duration, bytes_written); Ok(()) } /// Helper function to perform a flush asynchronously (for normal operations) /// /// This runs on the I/O task pool to avoid blocking the main thread fn perform_flush_async( ops: Vec, db: PersistenceDb, ) -> Result { if ops.is_empty() { return Ok(FlushResult { operations_count: 0, duration: std::time::Duration::ZERO, bytes_written: 0, }); } let bytes_written = calculate_bytes_written(&ops); let start = Instant::now(); let count = { let mut conn = db.lock()?; flush_to_sqlite(&ops, &mut conn)? }; let duration = start.elapsed(); Ok(FlushResult { operations_count: count, duration, bytes_written, }) } /// System to flush the write buffer to SQLite asynchronously /// /// This system runs on a schedule based on the configuration and battery status. /// It spawns async tasks to avoid blocking the main thread and handles errors gracefully. /// /// The system also polls pending flush tasks and updates metrics when they complete. pub fn flush_system( mut write_buffer: ResMut, db: Res, config: Res, battery: Res, mut metrics: ResMut, mut pending_tasks: ResMut, mut health: ResMut, mut failure_events: MessageWriter, mut recovery_events: MessageWriter, ) { // First, poll and handle completed async flush tasks pending_tasks.tasks.retain_mut(|task| { if let Some(result) = future::block_on(future::poll_once(task)) { match result { Ok(flush_result) => { let previous_failures = health.consecutive_flush_failures; health.record_flush_success(); // Update metrics metrics.record_flush( flush_result.operations_count, flush_result.duration, flush_result.bytes_written, ); // Emit recovery event if we recovered from failures if previous_failures > 0 { recovery_events.write(PersistenceRecoveryEvent { previous_failures, }); } } Err(e) => { health.record_flush_failure(); let error_msg = format!("{}", e); error!( "Async flush failed (attempt {}/{}): {}", health.consecutive_flush_failures, PersistenceHealth::CIRCUIT_BREAKER_THRESHOLD, error_msg ); // Emit failure event failure_events.write(PersistenceFailureEvent { error: error_msg, consecutive_failures: health.consecutive_flush_failures, circuit_breaker_open: health.circuit_breaker_open, }); } } false // Remove completed task } else { true // Keep pending task } }); // Check circuit breaker before spawning new flush if !health.should_attempt_operation() { return; } let flush_interval = config.get_flush_interval(battery.level, battery.is_charging); // Check if we should flush if !write_buffer.should_flush(flush_interval) { return; } // Take operations from buffer let ops = write_buffer.take_operations(); if ops.is_empty() { return; } // Spawn async flush task on I/O thread pool let task_pool = IoTaskPool::get(); let db_clone = db.clone(); let task = task_pool.spawn(async move { perform_flush_async(ops, db_clone.clone()) }); pending_tasks.tasks.push(task); // Update last flush time write_buffer.last_flush = Instant::now(); } /// System to checkpoint the WAL file /// /// This runs less frequently than flush_system to merge the WAL into the main database. pub fn checkpoint_system( db: &PersistenceDb, config: &PersistenceConfig, timer: &mut CheckpointTimer, metrics: &mut PersistenceMetrics, ) -> Result<()> { let checkpoint_interval = config.get_checkpoint_interval(); // Check if it's time to checkpoint if timer.last_checkpoint.elapsed() < checkpoint_interval { // Also check WAL size let wal_size = { let conn = db.lock()?; get_wal_size(&conn)? }; metrics.update_wal_size(wal_size as u64); // Force checkpoint if WAL is too large if wal_size < config.max_wal_size_bytes as i64 { return Ok(()); } } // Perform checkpoint let start = Instant::now(); let info = { let mut conn = db.lock()?; checkpoint_wal(&mut conn, CheckpointMode::Passive)? }; let duration = start.elapsed(); // Update metrics metrics.record_checkpoint(duration); timer.last_checkpoint = Instant::now(); // Log if checkpoint was busy if info.busy { tracing::warn!("WAL checkpoint was busy - some pages may not have been checkpointed"); } Ok(()) } /// System to handle application shutdown /// /// This ensures a final flush and checkpoint before the application exits. /// Uses synchronous flush to ensure all data is written before exit. /// /// **CRITICAL**: Waits for all pending async flush tasks to complete before /// proceeding with shutdown. This prevents data loss from in-flight operations. pub fn shutdown_system( write_buffer: &mut WriteBuffer, db: &PersistenceDb, metrics: &mut PersistenceMetrics, pending_tasks: Option<&mut PendingFlushTasks>, ) -> Result<()> { // CRITICAL: Wait for all pending async flushes to complete // This prevents data loss from in-flight operations if let Some(pending) = pending_tasks { info!("Waiting for {} pending flush tasks to complete before shutdown", pending.tasks.len()); for task in pending.tasks.drain(..) { // Block on each pending task to ensure completion match future::block_on(task) { Ok(flush_result) => { // Update metrics for completed flush metrics.record_flush( flush_result.operations_count, flush_result.duration, flush_result.bytes_written, ); debug!("Pending flush completed: {} operations", flush_result.operations_count); } Err(e) => { error!("Pending flush failed during shutdown: {}", e); // Continue with shutdown even if a task failed } } } info!("All pending flush tasks completed"); } // Force flush any remaining operations (synchronous for shutdown) let ops = write_buffer.take_operations(); perform_flush_sync(&ops, db, metrics)?; // Checkpoint the WAL let start = Instant::now(); { let mut conn = db.lock()?; checkpoint_wal(&mut conn, CheckpointMode::Truncate)?; // Mark clean shutdown mark_clean_shutdown(&mut conn)?; } let duration = start.elapsed(); metrics.record_checkpoint(duration); metrics.record_clean_shutdown(); Ok(()) } /// System to initialize persistence on startup /// /// This checks for crash recovery and sets up the session. pub fn startup_system(db: &PersistenceDb, metrics: &mut PersistenceMetrics) -> Result<()> { let mut conn = db.lock()?; // Check if previous session shut down cleanly let clean_shutdown = check_clean_shutdown(&mut conn)?; if !clean_shutdown { tracing::warn!("Previous session did not shut down cleanly - crash detected"); metrics.record_crash_recovery(); // Perform any necessary recovery operations here // For now, SQLite's WAL mode handles recovery automatically } else { tracing::info!("Previous session shut down cleanly"); } // Set up new session let session = SessionState::new(); set_session_state(&mut conn, "session_id", &session.session_id)?; Ok(()) } /// Helper function to force an immediate flush (for critical operations) /// /// Uses synchronous flush to ensure data is written immediately. /// Suitable for critical operations like iOS background events. pub fn force_flush( write_buffer: &mut WriteBuffer, db: &PersistenceDb, metrics: &mut PersistenceMetrics, ) -> Result<()> { let ops = write_buffer.take_operations(); perform_flush_sync(&ops, db, metrics)?; write_buffer.last_flush = Instant::now(); Ok(()) } #[cfg(test)] mod tests { use super::*; #[test] fn test_persistence_db_in_memory() -> Result<()> { let db = PersistenceDb::in_memory()?; // Verify we can write and read let entity_id = uuid::Uuid::new_v4(); let ops = vec![PersistenceOp::UpsertEntity { id: entity_id, data: EntityData { id: entity_id, created_at: chrono::Utc::now(), updated_at: chrono::Utc::now(), entity_type: "TestEntity".to_string(), }, }]; let mut conn = db.lock()?; flush_to_sqlite(&ops, &mut conn)?; Ok(()) } #[test] fn test_flush_system() -> Result<()> { let db = PersistenceDb::in_memory()?; let mut write_buffer = WriteBuffer::new(1000); let mut metrics = PersistenceMetrics::default(); // Add some operations let entity_id = uuid::Uuid::new_v4(); // First add the entity write_buffer.add(PersistenceOp::UpsertEntity { id: entity_id, data: EntityData { id: entity_id, created_at: chrono::Utc::now(), updated_at: chrono::Utc::now(), entity_type: "TestEntity".to_string(), }, }); // Then add a component write_buffer.add(PersistenceOp::UpsertComponent { entity_id, component_type: "Transform".to_string(), data: vec![1, 2, 3], }); // Take operations and flush synchronously (testing the flush logic) let ops = write_buffer.take_operations(); perform_flush_sync(&ops, &db, &mut metrics)?; assert_eq!(metrics.flush_count, 1); assert_eq!(write_buffer.len(), 0); Ok(()) } }