evaluator redesign: response types, silence, structural suppression

new engagement types: Respond (inline), ThreadReply (threaded), React, Ignore. LLM returns response_type to decide HOW to engage. silence mechanic: "shut up"/"be quiet" sets a 30min per-room timer. only direct @mention breaks through. structural suppression (A+B): - reply to non-Sol human → capped at React - 3+ human messages since Sol → forced passive mode threads have a lower relevance threshold (70% of spontaneous). time context injected into evaluator prompt.
2026-03-23 01:41:57 +00:00
parent 1058afb635
commit 3b62d86c45
3 changed files with 244 additions and 48 deletions
--- a/src/brain/evaluator.rs
+++ b/src/brain/evaluator.rs
@@ -12,7 +12,11 @@ use crate::config::Config;
 #[derive(Debug)]
 pub enum Engagement {
    MustRespond { reason: MustRespondReason },
-    MaybeRespond { relevance: f32, hook: String },
+    /// Respond inline in the room — Sol has something valuable to contribute.
+    Respond { relevance: f32, hook: String },
+    /// Respond in a thread — Sol has something to add but it's tangential
+    /// or the room is busy with a human-to-human conversation.
+    ThreadReply { relevance: f32, hook: String },
    React { emoji: String, relevance: f32 },
    Ignore,
 }
@@ -51,6 +55,8 @@ impl Evaluator {
        }
    }

+    /// `is_reply_to_human` — true if this message is a Matrix reply to a non-Sol user.
+    /// `messages_since_sol` — how many messages have been sent since Sol last spoke in this room.
    pub async fn evaluate(
        &self,
        sender: &str,
@@ -58,6 +64,9 @@ impl Evaluator {
        is_dm: bool,
        recent_messages: &[String],
        mistral: &Arc<mistralai_client::v1::client::Client>,
+        is_reply_to_human: bool,
+        messages_since_sol: usize,
+        is_silenced: bool,
    ) -> Engagement {
        let body_preview: String = body.chars().take(80).collect();

@@ -67,7 +76,7 @@ impl Evaluator {
            return Engagement::Ignore;
        }

-        // Direct mention: @sol:sunbeam.pt
+        // Direct mention: @sol:sunbeam.pt — always responds, breaks silence
        if self.mention_regex.is_match(body) {
            info!(sender, body = body_preview.as_str(), rule = "direct_mention", "Engagement: MustRespond");
            return Engagement::MustRespond {
@@ -75,7 +84,7 @@ impl Evaluator {
            };
        }

-        // DM
+        // DM — always responds (silence only applies to group rooms)
        if is_dm {
            info!(sender, body = body_preview.as_str(), rule = "dm", "Engagement: MustRespond");
            return Engagement::MustRespond {
@@ -83,6 +92,12 @@ impl Evaluator {
            };
        }

+        // If silenced in this room, only direct @mention breaks through (checked above)
+        if is_silenced {
+            debug!(sender, body = body_preview.as_str(), "Silenced in this room — ignoring");
+            return Engagement::Ignore;
+        }
+
        // Name invocation: "sol ..." or "hey sol ..."
        if self.name_regex.is_match(body) {
            info!(sender, body = body_preview.as_str(), rule = "name_invocation", "Engagement: MustRespond");
@@ -91,6 +106,32 @@ impl Evaluator {
            };
        }

+        // ── Structural suppression (A+B) ──
+
+        // A: If this is a reply to another human (not Sol), cap at React-only.
+        // People replying to each other aren't asking for Sol's input.
+        if is_reply_to_human {
+            info!(
+                sender, body = body_preview.as_str(),
+                rule = "reply_to_human",
+                "Reply to non-Sol human — suppressing to React-only"
+            );
+            // Still run the LLM eval for potential emoji reaction, but cap the result
+            let engagement = self.evaluate_relevance(body, recent_messages, mistral).await;
+            return match engagement {
+                Engagement::React { emoji, relevance } => Engagement::React { emoji, relevance },
+                Engagement::Respond { relevance, .. } if relevance >= self.config.behavior.reaction_threshold => {
+                    // Would have responded, but demote to just a reaction if the LLM suggested one
+                    Engagement::Ignore
+                }
+                _ => Engagement::Ignore,
+            };
+        }
+
+        // B: Consecutive message decay. After 3+ human messages without Sol,
+        // switch from active to passive evaluation context.
+        let force_passive = messages_since_sol >= 3;
+
        info!(
            sender, body = body_preview.as_str(),
            threshold = self.config.behavior.spontaneous_threshold,
@@ -98,11 +139,13 @@ impl Evaluator {
            context_len = recent_messages.len(),
            eval_window = self.config.behavior.evaluation_context_window,
            detect_sol = self.config.behavior.detect_sol_in_conversation,
+            messages_since_sol,
+            force_passive,
+            is_reply_to_human,
            "No rule match — running LLM relevance evaluation"
        );

-        // Cheap evaluation call for spontaneous responses
-        self.evaluate_relevance(body, recent_messages, mistral)
+        self.evaluate_relevance_with_mode(body, recent_messages, mistral, force_passive)
            .await
    }

@@ -140,6 +183,16 @@ impl Evaluator {
        body: &str,
        recent_messages: &[String],
        mistral: &Arc<mistralai_client::v1::client::Client>,
+    ) -> Engagement {
+        self.evaluate_relevance_with_mode(body, recent_messages, mistral, false).await
+    }
+
+    async fn evaluate_relevance_with_mode(
+        &self,
+        body: &str,
+        recent_messages: &[String],
+        mistral: &Arc<mistralai_client::v1::client::Client>,
+        force_passive: bool,
    ) -> Engagement {
        let window = self.config.behavior.evaluation_context_window;
        let context = recent_messages
@@ -151,8 +204,11 @@ impl Evaluator {
            .collect::<Vec<_>>()
            .join("\n");

-        // Check if Sol recently participated in this conversation
-        let sol_in_context = self.config.behavior.detect_sol_in_conversation
+        // Check if Sol recently participated in this conversation.
+        // force_passive overrides: if 3+ human messages since Sol spoke, treat as passive
+        // even if Sol's messages are visible in the context window.
+        let sol_in_context = !force_passive
+            && self.config.behavior.detect_sol_in_conversation
            && recent_messages.iter().any(|m| {
                let lower = m.to_lowercase();
                lower.starts_with("sol:") || lower.starts_with("sol ") || lower.contains("@sol:")
@@ -181,15 +237,16 @@ impl Evaluator {
            "Building evaluation prompt"
        );

-        // System message: Sol's full personality + evaluation framing.
-        // This gives the evaluator deep context on who Sol is, what they care about,
-        // and how they'd naturally engage — so relevance scoring reflects Sol's actual character.
+        // System message: Sol's full personality + evaluation framing + time context.
+        let tc = crate::time_context::TimeContext::now();
+
        let system = format!(
-            "You are Sol's engagement evaluator. Your job is to decide whether Sol should \
-             respond to a message in a group chat, based on Sol's personality, expertise, \
-             and relationship with the people in the room.\n\n\
+            "You are Sol's engagement evaluator. Your job is to decide whether and HOW Sol \
+             should respond to a message in a group chat.\n\n\
             # who sol is\n\n\
             {}\n\n\
+             # time\n\n\
+             {}\n\n\
             # your task\n\n\
             Read the conversation below and evaluate whether Sol would naturally want to \
             respond to the latest message. Consider:\n\
@@ -198,16 +255,25 @@ impl Evaluator {
             - Is someone implicitly asking for Sol's help (even without mentioning them)?\n\
             - Is this a continuation of something Sol was already involved in?\n\
             - Would Sol find this genuinely interesting or have something meaningful to add?\n\
-             - Would a reaction (emoji) be more appropriate than a full response?\n\n\
+             - Are two humans talking to each other? If so, Sol should NOT jump in unless \
+               directly relevant. Two people having a conversation doesn't need a third voice.\n\
+             - Would a reaction (emoji) be more appropriate than a full response?\n\
+             - Would responding in a thread (less intrusive) be better than inline?\n\n\
             {participation_note}\n\n\
             Respond ONLY with JSON:\n\
-             {{\"relevance\": 0.0-1.0, \"hook\": \"brief reason or empty string\", \"emoji\": \"a single emoji or empty string\"}}\n\n\
-             relevance=1.0 means Sol absolutely should respond, 0.0 means irrelevant.\n\
-             hook: if responding, a brief note on what Sol would engage with.\n\
-             emoji: if Sol wouldn't write a full response but might react, suggest a single \
-             emoji that feels natural and specific — not generic thumbs up. leave empty if \
-             no reaction fits.",
+             {{\"relevance\": 0.0-1.0, \"response_type\": \"message\"|\"thread\"|\"react\"|\"ignore\", \
+             \"hook\": \"brief reason or empty string\", \"emoji\": \"a single emoji or empty string\"}}\n\n\
+             relevance: 1.0 = Sol absolutely should respond, 0.0 = irrelevant.\n\
+             response_type:\n\
+             - \"message\": Sol has something genuinely valuable to add inline.\n\
+             - \"thread\": Sol has a useful aside or observation, but the main conversation \
+               is between humans — put it in a thread so it doesn't interrupt.\n\
+             - \"react\": emoji reaction only, no text.\n\
+             - \"ignore\": Sol has nothing to add.\n\
+             hook: if responding, what Sol would engage with.\n\
+             emoji: if reacting, a single emoji that feels natural and specific.",
            self.system_prompt,
+            tc.system_block(),
        );

        let user_prompt = format!(
@@ -249,33 +315,40 @@ impl Evaluator {
                        let relevance = val["relevance"].as_f64().unwrap_or(0.0) as f32;
                        let hook = val["hook"].as_str().unwrap_or("").to_string();
                        let emoji = val["emoji"].as_str().unwrap_or("").to_string();
+                        let response_type = val["response_type"].as_str().unwrap_or("ignore").to_string();
                        let threshold = self.config.behavior.spontaneous_threshold;
-                        let reaction_threshold = self.config.behavior.reaction_threshold;
                        let reaction_enabled = self.config.behavior.reaction_enabled;

                        info!(
                            relevance,
                            threshold,
-                            reaction_threshold,
+                            response_type = response_type.as_str(),
                            hook = hook.as_str(),
                            emoji = emoji.as_str(),
                            "LLM evaluation parsed"
                        );

-                        if relevance >= threshold {
-                            Engagement::MaybeRespond { relevance, hook }
-                        } else if reaction_enabled
-                            && relevance >= reaction_threshold
-                            && !emoji.is_empty()
-                        {
-                            info!(
-                                relevance,
-                                emoji = emoji.as_str(),
-                                "Reaction range — will react with emoji"
-                            );
-                            Engagement::React { emoji, relevance }
-                        } else {
-                            Engagement::Ignore
+                        // The LLM decides the response type, but we still gate on relevance threshold
+                        match response_type.as_str() {
+                            "message" if relevance >= threshold => {
+                                Engagement::Respond { relevance, hook }
+                            }
+                            "thread" if relevance >= threshold * 0.7 => {
+                                // Threads have a lower threshold — they're less intrusive
+                                Engagement::ThreadReply { relevance, hook }
+                            }
+                            "react" if reaction_enabled && !emoji.is_empty() => {
+                                Engagement::React { emoji, relevance }
+                            }
+                            // Fallback: if the model says "message" but relevance is below
+                            // threshold, check if it would qualify as a thread or reaction
+                            "message" | "thread" if relevance >= threshold * 0.7 => {
+                                Engagement::ThreadReply { relevance, hook }
+                            }
+                            _ if reaction_enabled && !emoji.is_empty() && relevance >= self.config.behavior.reaction_threshold => {
+                                Engagement::React { emoji, relevance }
+                            }
+                            _ => Engagement::Ignore,
                        }
                    }
                    Err(e) => {
--- a/src/config.rs
+++ b/src/config.rs
@@ -28,6 +28,18 @@ pub struct AgentsConfig {
    /// Whether to use the Conversations API (vs manual message management).
    #[serde(default)]
    pub use_conversations_api: bool,
+    /// Model for research micro-agents.
+    #[serde(default = "default_research_agent_model")]
+    pub research_model: String,
+    /// Max tool calls per research micro-agent.
+    #[serde(default = "default_research_max_iterations")]
+    pub research_max_iterations: usize,
+    /// Max parallel agents per research wave.
+    #[serde(default = "default_research_max_agents")]
+    pub research_max_agents: usize,
+    /// Max recursion depth for research agents spawning sub-agents.
+    #[serde(default = "default_research_max_depth")]
+    pub research_max_depth: usize,
 }

 impl Default for AgentsConfig {
@@ -37,6 +49,10 @@ impl Default for AgentsConfig {
            domain_model: default_model(),
            compaction_threshold: default_compaction_threshold(),
            use_conversations_api: false,
+            research_model: default_research_agent_model(),
+            research_max_iterations: default_research_max_iterations(),
+            research_max_agents: default_research_max_agents(),
+            research_max_depth: default_research_max_depth(),
        }
    }
 }
@@ -122,12 +138,22 @@ pub struct BehaviorConfig {
    pub script_fetch_allowlist: Vec<String>,
    #[serde(default = "default_memory_extraction_enabled")]
    pub memory_extraction_enabled: bool,
+    /// Minimum fraction of a source room's members that must also be in the
+    /// requesting room for cross-room search results to be visible.
+    /// 0.0 = no restriction, 1.0 = only same room.
+    #[serde(default = "default_room_overlap_threshold")]
+    pub room_overlap_threshold: f32,
+    /// Duration in ms that Sol stays silent after being told to be quiet.
+    #[serde(default = "default_silence_duration_ms")]
+    pub silence_duration_ms: u64,
 }

 #[derive(Debug, Clone, Deserialize, Default)]
 pub struct ServicesConfig {
    #[serde(default)]
    pub gitea: Option<GiteaConfig>,
+    #[serde(default)]
+    pub kratos: Option<KratosConfig>,
 }

 #[derive(Debug, Clone, Deserialize)]
@@ -135,6 +161,11 @@ pub struct GiteaConfig {
    pub url: String,
 }

+#[derive(Debug, Clone, Deserialize)]
+pub struct KratosConfig {
+    pub admin_url: String,
+}
+
 #[derive(Debug, Clone, Deserialize)]
 pub struct VaultConfig {
    /// OpenBao/Vault URL. Default: http://openbao.data.svc.cluster.local:8200
@@ -187,8 +218,14 @@ fn default_script_timeout_secs() -> u64 { 5 }
 fn default_script_max_heap_mb() -> usize { 64 }
 fn default_memory_index() -> String { "sol_user_memory".into() }
 fn default_memory_extraction_enabled() -> bool { true }
+fn default_room_overlap_threshold() -> f32 { 0.25 }
+fn default_silence_duration_ms() -> u64 { 1_800_000 } // 30 minutes
 fn default_db_path() -> String { "/data/sol.db".into() }
 fn default_compaction_threshold() -> u32 { 118000 } // ~90% of 131K context window
+fn default_research_agent_model() -> String { "ministral-3b-latest".into() }
+fn default_research_max_iterations() -> usize { 10 }
+fn default_research_max_agents() -> usize { 25 }
+fn default_research_max_depth() -> usize { 4 }

 impl Config {
    pub fn load(path: &str) -> anyhow::Result<Self> {
@@ -322,6 +359,17 @@ state_store_path = "/data/sol/state"
        assert!(config.services.gitea.is_none());
    }

+    #[test]
+    fn test_services_config_with_kratos() {
+        let with_kratos = format!(
+            "{}\n[services.kratos]\nadmin_url = \"http://kratos-admin:80\"\n",
+            MINIMAL_CONFIG
+        );
+        let config = Config::from_str(&with_kratos).unwrap();
+        let kratos = config.services.kratos.unwrap();
+        assert_eq!(kratos.admin_url, "http://kratos-admin:80");
+    }
+
    #[test]
    fn test_services_config_with_gitea() {
        let with_services = format!(
--- a/src/sync.rs
+++ b/src/sync.rs
@@ -42,6 +42,8 @@ pub struct AppState {
    pub last_response: Arc<Mutex<HashMap<String, Instant>>>,
    /// Tracks rooms where a response is currently being generated (in-flight guard)
    pub responding_in: Arc<Mutex<std::collections::HashSet<String>>>,
+    /// Rooms where Sol has been told to be quiet — maps room_id → silenced_until
+    pub silenced_until: Arc<Mutex<HashMap<String, Instant>>>,
 }

 pub async fn start_sync(client: Client, state: Arc<AppState>) -> anyhow::Result<()> {
@@ -193,6 +195,38 @@ async fn handle_message(
        );
    }

+    // Silence detection — if someone tells Sol to be quiet, set a per-room timer
+    {
+        let lower = body.to_lowercase();
+        let silence_phrases = [
+            "shut up", "be quiet", "shush", "silence", "stop talking",
+            "quiet down", "hush", "enough sol", "sol enough", "sol stop",
+            "sol shut up", "sol be quiet", "sol shush",
+        ];
+        if silence_phrases.iter().any(|p| lower.contains(p)) {
+            let duration = std::time::Duration::from_millis(
+                state.config.behavior.silence_duration_ms,
+            );
+            let until = Instant::now() + duration;
+            let mut silenced = state.silenced_until.lock().await;
+            silenced.insert(room_id.clone(), until);
+            info!(
+                room = room_id.as_str(),
+                duration_mins = state.config.behavior.silence_duration_ms / 60_000,
+                "Silenced in room"
+            );
+        }
+    }
+
+    // Check if Sol is currently silenced in this room
+    let is_silenced = {
+        let silenced = state.silenced_until.lock().await;
+        silenced
+            .get(&room_id)
+            .map(|until| Instant::now() < *until)
+            .unwrap_or(false)
+    };
+
    // Evaluate whether to respond
    let recent: Vec<String> = {
        let convs = state.conversations.lock().await;
@@ -203,28 +237,65 @@ async fn handle_message(
            .collect()
    };

+    // A: Check if this message is a reply to another human (not Sol)
+    let is_reply_to_human = is_reply && !is_dm && {
+        // If it's a reply, check the conversation context for who the previous
+        // message was from. We don't have event IDs in context, so we use a
+        // heuristic: if the most recent message before this one was from a human
+        // (not Sol), this reply is likely directed at them.
+        let convs = state.conversations.lock().await;
+        let ctx = convs.get_context(&room_id);
+        let sol_id = &state.config.matrix.user_id;
+        // Check the message before the current one (last in context before we added ours)
+        ctx.iter().rev().skip(1).next()
+            .map(|m| m.sender != *sol_id)
+            .unwrap_or(false)
+    };
+
+    // B: Count messages since Sol last spoke in this room
+    let messages_since_sol = {
+        let convs = state.conversations.lock().await;
+        let ctx = convs.get_context(&room_id);
+        let sol_id = &state.config.matrix.user_id;
+        ctx.iter().rev().take_while(|m| m.sender != *sol_id).count()
+    };
+
    let engagement = state
        .evaluator
-        .evaluate(&sender, &body, is_dm, &recent, &state.mistral)
+        .evaluate(
+            &sender, &body, is_dm, &recent, &state.mistral,
+            is_reply_to_human, messages_since_sol, is_silenced,
+        )
        .await;

-    let (should_respond, is_spontaneous) = match engagement {
+    // use_thread: if true, Sol responds in a thread instead of inline
+    let (should_respond, is_spontaneous, use_thread) = match engagement {
        Engagement::MustRespond { reason } => {
            info!(room = room_id.as_str(), ?reason, "Must respond");
-            (true, false)
+            // Direct mention breaks silence
+            if is_silenced {
+                let mut silenced = state.silenced_until.lock().await;
+                silenced.remove(&room_id);
+                info!(room = room_id.as_str(), "Silence broken by direct mention");
+            }
+            (true, false, false)
        }
-        Engagement::MaybeRespond { relevance, hook } => {
-            info!(room = room_id.as_str(), relevance, hook = hook.as_str(), "Maybe respond (spontaneous)");
-            (true, true)
+        Engagement::Respond { relevance, hook } => {
+            info!(room = room_id.as_str(), relevance, hook = hook.as_str(), "Respond (spontaneous)");
+            (true, true, false)
+        }
+        Engagement::ThreadReply { relevance, hook } => {
+            info!(room = room_id.as_str(), relevance, hook = hook.as_str(), "Thread reply (spontaneous)");
+            (true, true, true)
        }
        Engagement::React { emoji, relevance } => {
            info!(room = room_id.as_str(), relevance, emoji = emoji.as_str(), "Reacting with emoji");
            if let Err(e) = matrix_utils::send_reaction(&room, event.event_id.clone().into(), &emoji).await {
                error!("Failed to send reaction: {e}");
            }
-            (false, false)
+            (false, false, false)
        }
-        Engagement::Ignore => (false, false),
+        Engagement::Ignore => (false, false, false),
    };

    if !should_respond {
@@ -310,6 +381,7 @@ async fn handle_message(
                &state.conversation_registry,
                image_data_uri.as_deref(),
                context_hint,
+                event.event_id.clone().into(),
            )
            .await
    } else {
@@ -331,17 +403,20 @@ async fn handle_message(
    };

    if let Some(text) = response {
-        // Reply with reference only when directly addressed. Spontaneous
-        // and DM messages are sent as plain content — feels more natural.
-        let content = if !is_spontaneous && !is_dm {
+        let content = if use_thread {
+            // Thread reply — less intrusive, for tangential contributions
+            matrix_utils::make_thread_reply(&text, event.event_id.to_owned())
+        } else if !is_spontaneous && !is_dm {
+            // Direct reply — when explicitly addressed
            matrix_utils::make_reply_content(&text, event.event_id.to_owned())
        } else {
+            // Plain message — spontaneous or DM, feels more natural
            ruma::events::room::message::RoomMessageEventContent::text_markdown(&text)
        };
        if let Err(e) = room.send(content).await {
            error!("Failed to send response: {e}");
        } else {
-            info!(room = room_id.as_str(), len = text.len(), is_dm, "Response sent");
+            info!(room = room_id.as_str(), len = text.len(), is_dm, use_thread, "Response sent");
        }
        // Post-response memory extraction (fire-and-forget)
        if state.config.behavior.memory_extraction_enabled {