feat: multi-agent architecture with Conversations API and persistent state

Mistral Agents + Conversations API integration: - Orchestrator agent created on startup with Sol's personality + tools - ConversationRegistry routes messages through persistent conversations - Per-room conversation state (room_id → conversation_id + token counts) - Function call handling within conversation responses - Configurable via [agents] section in sol.toml (use_conversations_api flag) Multimodal support: - m.image detection and Matrix media download (mxc:// → base64 data URI) - ContentPart-based messages sent to Mistral vision models - Archive stores media_urls for image messages System prompt rewrite: - 687 → 150 lines — dense, few-shot examples, hard rules - {room_context_rules} placeholder for group vs DM behavior - Sender prefixing (<@user:server>) for multi-user turns in group rooms SQLite persistence (/data/sol.db): - Conversation mappings and agent IDs survive reboots - WAL mode for concurrent reads - Falls back to in-memory on failure (sneezes into all rooms to signal) - PVC already mounted at /data alongside Matrix SDK state store New modules: - src/persistence.rs — SQLite state store - src/conversations.rs — ConversationRegistry + message merging - src/agents/{mod,definitions,registry}.rs — agent lifecycle - src/agent_ux.rs — reaction + thread progress UX - src/tools/bridge.rs — tool dispatch for domain agents 102 tests passing.
2026-03-21 22:21:14 +00:00
parent 5e2186f324
commit 7580c10dda
20 changed files with 1723 additions and 655 deletions
--- a/src/brain/responder.rs
+++ b/src/brain/responder.rs
@@ -3,6 +3,7 @@ use std::sync::Arc;
 use mistralai_client::v1::{
    chat::{ChatMessage, ChatParams, ChatResponse, ChatResponseChoiceFinishReason},
    constants::Model,
+    conversations::{ConversationEntry, ConversationInput, FunctionResultEntry},
    error::ApiError,
    tool::ToolChoice,
 };
@@ -13,10 +14,12 @@ use tracing::{debug, error, info, warn};
 use matrix_sdk::room::Room;
 use opensearch::OpenSearch;

+use crate::agent_ux::AgentProgress;
 use crate::brain::conversation::ContextMessage;
 use crate::brain::personality::Personality;
 use crate::config::Config;
 use crate::context::ResponseContext;
+use crate::conversations::ConversationRegistry;
 use crate::memory;
 use crate::tools::ToolRegistry;

@@ -72,6 +75,7 @@ impl Responder {
        mistral: &Arc<mistralai_client::v1::client::Client>,
        room: &Room,
        response_ctx: &ResponseContext,
+        image_data_uri: Option<&str>,
    ) -> Option<String> {
        // Apply response delay (skip if instant_responses is enabled)
        // Delay happens BEFORE typing indicator — Sol "notices" the message first
@@ -103,6 +107,7 @@ impl Responder {
            room_name,
            members,
            memory_notes.as_deref(),
+            response_ctx.is_dm,
        );

        let mut messages = vec![ChatMessage::new_system_message(&system_prompt)];
@@ -120,9 +125,26 @@ impl Responder {
            }
        }

-        // Add the triggering message
-        let trigger = format!("{trigger_sender}: {trigger_body}");
-        messages.push(ChatMessage::new_user_message(&trigger));
+        // Add the triggering message (multimodal if image attached)
+        if let Some(data_uri) = image_data_uri {
+            use mistralai_client::v1::chat::{ContentPart, ImageUrl};
+            let mut parts = vec![];
+            if !trigger_body.is_empty() {
+                parts.push(ContentPart::Text {
+                    text: format!("{trigger_sender}: {trigger_body}"),
+                });
+            }
+            parts.push(ContentPart::ImageUrl {
+                image_url: ImageUrl {
+                    url: data_uri.to_string(),
+                    detail: None,
+                },
+            });
+            messages.push(ChatMessage::new_user_message_with_images(parts));
+        } else {
+            let trigger = format!("{trigger_sender}: {trigger_body}");
+            messages.push(ChatMessage::new_user_message(&trigger));
+        }

        let tool_defs = ToolRegistry::tool_definitions();
        let model = Model::new(&self.config.mistral.default_model);
@@ -158,7 +180,7 @@ impl Responder {
                if let Some(tool_calls) = &choice.message.tool_calls {
                    // Add assistant message with tool calls
                    messages.push(ChatMessage::new_assistant_message(
-                        &choice.message.content,
+                        &choice.message.content.text(),
                        Some(tool_calls.clone()),
                    ));

@@ -197,7 +219,7 @@ impl Responder {
            }

            // Final text response — strip own name prefix if present
-            let mut text = choice.message.content.trim().to_string();
+            let mut text = choice.message.content.text().trim().to_string();

            // Strip "sol:" or "sol 💕:" or similar prefixes the model sometimes adds
            let lower = text.to_lowercase();
@@ -231,6 +253,173 @@ impl Responder {
        None
    }

+    /// Generate a response using the Mistral Conversations API.
+    /// This path routes through the ConversationRegistry for persistent state,
+    /// agent handoffs, and function calling with UX feedback (reactions + threads).
+    pub async fn generate_response_conversations(
+        &self,
+        trigger_body: &str,
+        trigger_sender: &str,
+        room_id: &str,
+        is_dm: bool,
+        is_spontaneous: bool,
+        mistral: &Arc<mistralai_client::v1::client::Client>,
+        room: &Room,
+        response_ctx: &ResponseContext,
+        conversation_registry: &ConversationRegistry,
+        image_data_uri: Option<&str>,
+    ) -> Option<String> {
+        // Apply response delay
+        if !self.config.behavior.instant_responses {
+            let delay = if is_spontaneous {
+                rand::thread_rng().gen_range(
+                    self.config.behavior.spontaneous_delay_min_ms
+                        ..=self.config.behavior.spontaneous_delay_max_ms,
+                )
+            } else {
+                rand::thread_rng().gen_range(
+                    self.config.behavior.response_delay_min_ms
+                        ..=self.config.behavior.response_delay_max_ms,
+                )
+            };
+            sleep(Duration::from_millis(delay)).await;
+        }
+
+        let _ = room.typing_notice(true).await;
+
+        // Build the input message (with sender prefix for group rooms)
+        let input_text = if is_dm {
+            trigger_body.to_string()
+        } else {
+            format!("<{}> {}", response_ctx.matrix_user_id, trigger_body)
+        };
+
+        // TODO: multimodal via image_data_uri — Conversations API may support
+        // content parts in entries. For now, append image description request.
+        let input = ConversationInput::Text(input_text);
+
+        // Send through conversation registry
+        let response = match conversation_registry
+            .send_message(room_id, input, is_dm, mistral)
+            .await
+        {
+            Ok(r) => r,
+            Err(e) => {
+                error!("Conversation API failed: {e}");
+                let _ = room.typing_notice(false).await;
+                return None;
+            }
+        };
+
+        // Check for function calls — execute locally and send results back
+        let function_calls = response.function_calls();
+        if !function_calls.is_empty() {
+            // Agent UX: reactions + threads require the user's event ID
+            // which we don't have in the responder. For now, log tool calls
+            // and skip UX. TODO: pass event_id through ResponseContext.
+
+            let max_iterations = self.config.mistral.max_tool_iterations;
+            let mut current_response = response;
+
+            for iteration in 0..max_iterations {
+                let calls = current_response.function_calls();
+                if calls.is_empty() {
+                    break;
+                }
+
+                let mut result_entries = Vec::new();
+
+                for fc in &calls {
+                    let call_id = fc.tool_call_id.as_deref().unwrap_or("unknown");
+                    info!(
+                        tool = fc.name.as_str(),
+                        id = call_id,
+                        args = fc.arguments.as_str(),
+                        "Executing tool call (conversations)"
+                    );
+
+
+
+                    let result = self
+                        .tools
+                        .execute(&fc.name, &fc.arguments, response_ctx)
+                        .await;
+
+                    let result_str = match result {
+                        Ok(s) => s,
+                        Err(e) => {
+                            warn!(tool = fc.name.as_str(), "Tool failed: {e}");
+                            format!("Error: {e}")
+                        }
+                    };
+
+
+
+                    result_entries.push(ConversationEntry::FunctionResult(FunctionResultEntry {
+                        tool_call_id: call_id.to_string(),
+                        result: result_str,
+                        id: None,
+                        object: None,
+                        created_at: None,
+                        completed_at: None,
+                    }));
+                }
+
+                // Send function results back to conversation
+                current_response = match conversation_registry
+                    .send_function_result(room_id, result_entries, mistral)
+                    .await
+                {
+                    Ok(r) => r,
+                    Err(e) => {
+                        error!("Failed to send function results: {e}");
+                        let _ = room.typing_notice(false).await;
+                        return None;
+                    }
+                };
+
+                debug!(iteration, "Tool iteration complete (conversations)");
+            }
+
+            // Extract final text from the last response
+            if let Some(text) = current_response.assistant_text() {
+                let text = strip_sol_prefix(&text);
+                if text.is_empty() {
+                    let _ = room.typing_notice(false).await;
+                    return None;
+                }
+                let _ = room.typing_notice(false).await;
+                info!(
+                    response_len = text.len(),
+                    "Generated response (conversations + tools)"
+                );
+                return Some(text);
+            }
+
+            let _ = room.typing_notice(false).await;
+            return None;
+        }
+
+        // Simple response — no tools involved
+        if let Some(text) = response.assistant_text() {
+            let text = strip_sol_prefix(&text);
+            if text.is_empty() {
+                let _ = room.typing_notice(false).await;
+                return None;
+            }
+            let _ = room.typing_notice(false).await;
+            info!(
+                response_len = text.len(),
+                is_spontaneous,
+                "Generated response (conversations)"
+            );
+            return Some(text);
+        }
+
+        let _ = room.typing_notice(false).await;
+        None
+    }
+
    async fn load_memory_notes(
        &self,
        ctx: &ResponseContext,
@@ -284,6 +473,18 @@ impl Responder {
    }
 }

+/// Strip "sol:" or "sol 💕:" prefixes the model sometimes adds.
+fn strip_sol_prefix(text: &str) -> String {
+    let trimmed = text.trim();
+    let lower = trimmed.to_lowercase();
+    for prefix in &["sol:", "sol 💕:", "sol💕:"] {
+        if lower.starts_with(prefix) {
+            return trimmed[prefix.len()..].trim().to_string();
+        }
+    }
+    trimmed.to_string()
+}
+
 /// Format memory documents into a notes block for the system prompt.
 pub(crate) fn format_memory_notes(
    display_name: &str,