From 3b62d86c454ce8c847ae4d6baf6447f372c15d7f Mon Sep 17 00:00:00 2001 From: Sienna Meridian Satterwhite Date: Mon, 23 Mar 2026 01:41:57 +0000 Subject: [PATCH] evaluator redesign: response types, silence, structural suppression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit new engagement types: Respond (inline), ThreadReply (threaded), React, Ignore. LLM returns response_type to decide HOW to engage. silence mechanic: "shut up"/"be quiet" sets a 30min per-room timer. only direct @mention breaks through. structural suppression (A+B): - reply to non-Sol human → capped at React - 3+ human messages since Sol → forced passive mode threads have a lower relevance threshold (70% of spontaneous). time context injected into evaluator prompt. --- src/brain/evaluator.rs | 145 +++++++++++++++++++++++++++++++---------- src/config.rs | 48 ++++++++++++++ src/sync.rs | 99 ++++++++++++++++++++++++---- 3 files changed, 244 insertions(+), 48 deletions(-) diff --git a/src/brain/evaluator.rs b/src/brain/evaluator.rs index 9ae9078..9377346 100644 --- a/src/brain/evaluator.rs +++ b/src/brain/evaluator.rs @@ -12,7 +12,11 @@ use crate::config::Config; #[derive(Debug)] pub enum Engagement { MustRespond { reason: MustRespondReason }, - MaybeRespond { relevance: f32, hook: String }, + /// Respond inline in the room — Sol has something valuable to contribute. + Respond { relevance: f32, hook: String }, + /// Respond in a thread — Sol has something to add but it's tangential + /// or the room is busy with a human-to-human conversation. + ThreadReply { relevance: f32, hook: String }, React { emoji: String, relevance: f32 }, Ignore, } @@ -51,6 +55,8 @@ impl Evaluator { } } + /// `is_reply_to_human` — true if this message is a Matrix reply to a non-Sol user. + /// `messages_since_sol` — how many messages have been sent since Sol last spoke in this room. pub async fn evaluate( &self, sender: &str, @@ -58,6 +64,9 @@ impl Evaluator { is_dm: bool, recent_messages: &[String], mistral: &Arc, + is_reply_to_human: bool, + messages_since_sol: usize, + is_silenced: bool, ) -> Engagement { let body_preview: String = body.chars().take(80).collect(); @@ -67,7 +76,7 @@ impl Evaluator { return Engagement::Ignore; } - // Direct mention: @sol:sunbeam.pt + // Direct mention: @sol:sunbeam.pt — always responds, breaks silence if self.mention_regex.is_match(body) { info!(sender, body = body_preview.as_str(), rule = "direct_mention", "Engagement: MustRespond"); return Engagement::MustRespond { @@ -75,7 +84,7 @@ impl Evaluator { }; } - // DM + // DM — always responds (silence only applies to group rooms) if is_dm { info!(sender, body = body_preview.as_str(), rule = "dm", "Engagement: MustRespond"); return Engagement::MustRespond { @@ -83,6 +92,12 @@ impl Evaluator { }; } + // If silenced in this room, only direct @mention breaks through (checked above) + if is_silenced { + debug!(sender, body = body_preview.as_str(), "Silenced in this room — ignoring"); + return Engagement::Ignore; + } + // Name invocation: "sol ..." or "hey sol ..." if self.name_regex.is_match(body) { info!(sender, body = body_preview.as_str(), rule = "name_invocation", "Engagement: MustRespond"); @@ -91,6 +106,32 @@ impl Evaluator { }; } + // ── Structural suppression (A+B) ── + + // A: If this is a reply to another human (not Sol), cap at React-only. + // People replying to each other aren't asking for Sol's input. + if is_reply_to_human { + info!( + sender, body = body_preview.as_str(), + rule = "reply_to_human", + "Reply to non-Sol human — suppressing to React-only" + ); + // Still run the LLM eval for potential emoji reaction, but cap the result + let engagement = self.evaluate_relevance(body, recent_messages, mistral).await; + return match engagement { + Engagement::React { emoji, relevance } => Engagement::React { emoji, relevance }, + Engagement::Respond { relevance, .. } if relevance >= self.config.behavior.reaction_threshold => { + // Would have responded, but demote to just a reaction if the LLM suggested one + Engagement::Ignore + } + _ => Engagement::Ignore, + }; + } + + // B: Consecutive message decay. After 3+ human messages without Sol, + // switch from active to passive evaluation context. + let force_passive = messages_since_sol >= 3; + info!( sender, body = body_preview.as_str(), threshold = self.config.behavior.spontaneous_threshold, @@ -98,11 +139,13 @@ impl Evaluator { context_len = recent_messages.len(), eval_window = self.config.behavior.evaluation_context_window, detect_sol = self.config.behavior.detect_sol_in_conversation, + messages_since_sol, + force_passive, + is_reply_to_human, "No rule match — running LLM relevance evaluation" ); - // Cheap evaluation call for spontaneous responses - self.evaluate_relevance(body, recent_messages, mistral) + self.evaluate_relevance_with_mode(body, recent_messages, mistral, force_passive) .await } @@ -140,6 +183,16 @@ impl Evaluator { body: &str, recent_messages: &[String], mistral: &Arc, + ) -> Engagement { + self.evaluate_relevance_with_mode(body, recent_messages, mistral, false).await + } + + async fn evaluate_relevance_with_mode( + &self, + body: &str, + recent_messages: &[String], + mistral: &Arc, + force_passive: bool, ) -> Engagement { let window = self.config.behavior.evaluation_context_window; let context = recent_messages @@ -151,8 +204,11 @@ impl Evaluator { .collect::>() .join("\n"); - // Check if Sol recently participated in this conversation - let sol_in_context = self.config.behavior.detect_sol_in_conversation + // Check if Sol recently participated in this conversation. + // force_passive overrides: if 3+ human messages since Sol spoke, treat as passive + // even if Sol's messages are visible in the context window. + let sol_in_context = !force_passive + && self.config.behavior.detect_sol_in_conversation && recent_messages.iter().any(|m| { let lower = m.to_lowercase(); lower.starts_with("sol:") || lower.starts_with("sol ") || lower.contains("@sol:") @@ -181,15 +237,16 @@ impl Evaluator { "Building evaluation prompt" ); - // System message: Sol's full personality + evaluation framing. - // This gives the evaluator deep context on who Sol is, what they care about, - // and how they'd naturally engage — so relevance scoring reflects Sol's actual character. + // System message: Sol's full personality + evaluation framing + time context. + let tc = crate::time_context::TimeContext::now(); + let system = format!( - "You are Sol's engagement evaluator. Your job is to decide whether Sol should \ - respond to a message in a group chat, based on Sol's personality, expertise, \ - and relationship with the people in the room.\n\n\ + "You are Sol's engagement evaluator. Your job is to decide whether and HOW Sol \ + should respond to a message in a group chat.\n\n\ # who sol is\n\n\ {}\n\n\ + # time\n\n\ + {}\n\n\ # your task\n\n\ Read the conversation below and evaluate whether Sol would naturally want to \ respond to the latest message. Consider:\n\ @@ -198,16 +255,25 @@ impl Evaluator { - Is someone implicitly asking for Sol's help (even without mentioning them)?\n\ - Is this a continuation of something Sol was already involved in?\n\ - Would Sol find this genuinely interesting or have something meaningful to add?\n\ - - Would a reaction (emoji) be more appropriate than a full response?\n\n\ + - Are two humans talking to each other? If so, Sol should NOT jump in unless \ + directly relevant. Two people having a conversation doesn't need a third voice.\n\ + - Would a reaction (emoji) be more appropriate than a full response?\n\ + - Would responding in a thread (less intrusive) be better than inline?\n\n\ {participation_note}\n\n\ Respond ONLY with JSON:\n\ - {{\"relevance\": 0.0-1.0, \"hook\": \"brief reason or empty string\", \"emoji\": \"a single emoji or empty string\"}}\n\n\ - relevance=1.0 means Sol absolutely should respond, 0.0 means irrelevant.\n\ - hook: if responding, a brief note on what Sol would engage with.\n\ - emoji: if Sol wouldn't write a full response but might react, suggest a single \ - emoji that feels natural and specific — not generic thumbs up. leave empty if \ - no reaction fits.", + {{\"relevance\": 0.0-1.0, \"response_type\": \"message\"|\"thread\"|\"react\"|\"ignore\", \ + \"hook\": \"brief reason or empty string\", \"emoji\": \"a single emoji or empty string\"}}\n\n\ + relevance: 1.0 = Sol absolutely should respond, 0.0 = irrelevant.\n\ + response_type:\n\ + - \"message\": Sol has something genuinely valuable to add inline.\n\ + - \"thread\": Sol has a useful aside or observation, but the main conversation \ + is between humans — put it in a thread so it doesn't interrupt.\n\ + - \"react\": emoji reaction only, no text.\n\ + - \"ignore\": Sol has nothing to add.\n\ + hook: if responding, what Sol would engage with.\n\ + emoji: if reacting, a single emoji that feels natural and specific.", self.system_prompt, + tc.system_block(), ); let user_prompt = format!( @@ -249,33 +315,40 @@ impl Evaluator { let relevance = val["relevance"].as_f64().unwrap_or(0.0) as f32; let hook = val["hook"].as_str().unwrap_or("").to_string(); let emoji = val["emoji"].as_str().unwrap_or("").to_string(); + let response_type = val["response_type"].as_str().unwrap_or("ignore").to_string(); let threshold = self.config.behavior.spontaneous_threshold; - let reaction_threshold = self.config.behavior.reaction_threshold; let reaction_enabled = self.config.behavior.reaction_enabled; info!( relevance, threshold, - reaction_threshold, + response_type = response_type.as_str(), hook = hook.as_str(), emoji = emoji.as_str(), "LLM evaluation parsed" ); - if relevance >= threshold { - Engagement::MaybeRespond { relevance, hook } - } else if reaction_enabled - && relevance >= reaction_threshold - && !emoji.is_empty() - { - info!( - relevance, - emoji = emoji.as_str(), - "Reaction range — will react with emoji" - ); - Engagement::React { emoji, relevance } - } else { - Engagement::Ignore + // The LLM decides the response type, but we still gate on relevance threshold + match response_type.as_str() { + "message" if relevance >= threshold => { + Engagement::Respond { relevance, hook } + } + "thread" if relevance >= threshold * 0.7 => { + // Threads have a lower threshold — they're less intrusive + Engagement::ThreadReply { relevance, hook } + } + "react" if reaction_enabled && !emoji.is_empty() => { + Engagement::React { emoji, relevance } + } + // Fallback: if the model says "message" but relevance is below + // threshold, check if it would qualify as a thread or reaction + "message" | "thread" if relevance >= threshold * 0.7 => { + Engagement::ThreadReply { relevance, hook } + } + _ if reaction_enabled && !emoji.is_empty() && relevance >= self.config.behavior.reaction_threshold => { + Engagement::React { emoji, relevance } + } + _ => Engagement::Ignore, } } Err(e) => { diff --git a/src/config.rs b/src/config.rs index a73ab4f..956846e 100644 --- a/src/config.rs +++ b/src/config.rs @@ -28,6 +28,18 @@ pub struct AgentsConfig { /// Whether to use the Conversations API (vs manual message management). #[serde(default)] pub use_conversations_api: bool, + /// Model for research micro-agents. + #[serde(default = "default_research_agent_model")] + pub research_model: String, + /// Max tool calls per research micro-agent. + #[serde(default = "default_research_max_iterations")] + pub research_max_iterations: usize, + /// Max parallel agents per research wave. + #[serde(default = "default_research_max_agents")] + pub research_max_agents: usize, + /// Max recursion depth for research agents spawning sub-agents. + #[serde(default = "default_research_max_depth")] + pub research_max_depth: usize, } impl Default for AgentsConfig { @@ -37,6 +49,10 @@ impl Default for AgentsConfig { domain_model: default_model(), compaction_threshold: default_compaction_threshold(), use_conversations_api: false, + research_model: default_research_agent_model(), + research_max_iterations: default_research_max_iterations(), + research_max_agents: default_research_max_agents(), + research_max_depth: default_research_max_depth(), } } } @@ -122,12 +138,22 @@ pub struct BehaviorConfig { pub script_fetch_allowlist: Vec, #[serde(default = "default_memory_extraction_enabled")] pub memory_extraction_enabled: bool, + /// Minimum fraction of a source room's members that must also be in the + /// requesting room for cross-room search results to be visible. + /// 0.0 = no restriction, 1.0 = only same room. + #[serde(default = "default_room_overlap_threshold")] + pub room_overlap_threshold: f32, + /// Duration in ms that Sol stays silent after being told to be quiet. + #[serde(default = "default_silence_duration_ms")] + pub silence_duration_ms: u64, } #[derive(Debug, Clone, Deserialize, Default)] pub struct ServicesConfig { #[serde(default)] pub gitea: Option, + #[serde(default)] + pub kratos: Option, } #[derive(Debug, Clone, Deserialize)] @@ -135,6 +161,11 @@ pub struct GiteaConfig { pub url: String, } +#[derive(Debug, Clone, Deserialize)] +pub struct KratosConfig { + pub admin_url: String, +} + #[derive(Debug, Clone, Deserialize)] pub struct VaultConfig { /// OpenBao/Vault URL. Default: http://openbao.data.svc.cluster.local:8200 @@ -187,8 +218,14 @@ fn default_script_timeout_secs() -> u64 { 5 } fn default_script_max_heap_mb() -> usize { 64 } fn default_memory_index() -> String { "sol_user_memory".into() } fn default_memory_extraction_enabled() -> bool { true } +fn default_room_overlap_threshold() -> f32 { 0.25 } +fn default_silence_duration_ms() -> u64 { 1_800_000 } // 30 minutes fn default_db_path() -> String { "/data/sol.db".into() } fn default_compaction_threshold() -> u32 { 118000 } // ~90% of 131K context window +fn default_research_agent_model() -> String { "ministral-3b-latest".into() } +fn default_research_max_iterations() -> usize { 10 } +fn default_research_max_agents() -> usize { 25 } +fn default_research_max_depth() -> usize { 4 } impl Config { pub fn load(path: &str) -> anyhow::Result { @@ -322,6 +359,17 @@ state_store_path = "/data/sol/state" assert!(config.services.gitea.is_none()); } + #[test] + fn test_services_config_with_kratos() { + let with_kratos = format!( + "{}\n[services.kratos]\nadmin_url = \"http://kratos-admin:80\"\n", + MINIMAL_CONFIG + ); + let config = Config::from_str(&with_kratos).unwrap(); + let kratos = config.services.kratos.unwrap(); + assert_eq!(kratos.admin_url, "http://kratos-admin:80"); + } + #[test] fn test_services_config_with_gitea() { let with_services = format!( diff --git a/src/sync.rs b/src/sync.rs index 5548538..63007cf 100644 --- a/src/sync.rs +++ b/src/sync.rs @@ -42,6 +42,8 @@ pub struct AppState { pub last_response: Arc>>, /// Tracks rooms where a response is currently being generated (in-flight guard) pub responding_in: Arc>>, + /// Rooms where Sol has been told to be quiet — maps room_id → silenced_until + pub silenced_until: Arc>>, } pub async fn start_sync(client: Client, state: Arc) -> anyhow::Result<()> { @@ -193,6 +195,38 @@ async fn handle_message( ); } + // Silence detection — if someone tells Sol to be quiet, set a per-room timer + { + let lower = body.to_lowercase(); + let silence_phrases = [ + "shut up", "be quiet", "shush", "silence", "stop talking", + "quiet down", "hush", "enough sol", "sol enough", "sol stop", + "sol shut up", "sol be quiet", "sol shush", + ]; + if silence_phrases.iter().any(|p| lower.contains(p)) { + let duration = std::time::Duration::from_millis( + state.config.behavior.silence_duration_ms, + ); + let until = Instant::now() + duration; + let mut silenced = state.silenced_until.lock().await; + silenced.insert(room_id.clone(), until); + info!( + room = room_id.as_str(), + duration_mins = state.config.behavior.silence_duration_ms / 60_000, + "Silenced in room" + ); + } + } + + // Check if Sol is currently silenced in this room + let is_silenced = { + let silenced = state.silenced_until.lock().await; + silenced + .get(&room_id) + .map(|until| Instant::now() < *until) + .unwrap_or(false) + }; + // Evaluate whether to respond let recent: Vec = { let convs = state.conversations.lock().await; @@ -203,28 +237,65 @@ async fn handle_message( .collect() }; + // A: Check if this message is a reply to another human (not Sol) + let is_reply_to_human = is_reply && !is_dm && { + // If it's a reply, check the conversation context for who the previous + // message was from. We don't have event IDs in context, so we use a + // heuristic: if the most recent message before this one was from a human + // (not Sol), this reply is likely directed at them. + let convs = state.conversations.lock().await; + let ctx = convs.get_context(&room_id); + let sol_id = &state.config.matrix.user_id; + // Check the message before the current one (last in context before we added ours) + ctx.iter().rev().skip(1).next() + .map(|m| m.sender != *sol_id) + .unwrap_or(false) + }; + + // B: Count messages since Sol last spoke in this room + let messages_since_sol = { + let convs = state.conversations.lock().await; + let ctx = convs.get_context(&room_id); + let sol_id = &state.config.matrix.user_id; + ctx.iter().rev().take_while(|m| m.sender != *sol_id).count() + }; + let engagement = state .evaluator - .evaluate(&sender, &body, is_dm, &recent, &state.mistral) + .evaluate( + &sender, &body, is_dm, &recent, &state.mistral, + is_reply_to_human, messages_since_sol, is_silenced, + ) .await; - let (should_respond, is_spontaneous) = match engagement { + // use_thread: if true, Sol responds in a thread instead of inline + let (should_respond, is_spontaneous, use_thread) = match engagement { Engagement::MustRespond { reason } => { info!(room = room_id.as_str(), ?reason, "Must respond"); - (true, false) + // Direct mention breaks silence + if is_silenced { + let mut silenced = state.silenced_until.lock().await; + silenced.remove(&room_id); + info!(room = room_id.as_str(), "Silence broken by direct mention"); + } + (true, false, false) } - Engagement::MaybeRespond { relevance, hook } => { - info!(room = room_id.as_str(), relevance, hook = hook.as_str(), "Maybe respond (spontaneous)"); - (true, true) + Engagement::Respond { relevance, hook } => { + info!(room = room_id.as_str(), relevance, hook = hook.as_str(), "Respond (spontaneous)"); + (true, true, false) + } + Engagement::ThreadReply { relevance, hook } => { + info!(room = room_id.as_str(), relevance, hook = hook.as_str(), "Thread reply (spontaneous)"); + (true, true, true) } Engagement::React { emoji, relevance } => { info!(room = room_id.as_str(), relevance, emoji = emoji.as_str(), "Reacting with emoji"); if let Err(e) = matrix_utils::send_reaction(&room, event.event_id.clone().into(), &emoji).await { error!("Failed to send reaction: {e}"); } - (false, false) + (false, false, false) } - Engagement::Ignore => (false, false), + Engagement::Ignore => (false, false, false), }; if !should_respond { @@ -310,6 +381,7 @@ async fn handle_message( &state.conversation_registry, image_data_uri.as_deref(), context_hint, + event.event_id.clone().into(), ) .await } else { @@ -331,17 +403,20 @@ async fn handle_message( }; if let Some(text) = response { - // Reply with reference only when directly addressed. Spontaneous - // and DM messages are sent as plain content — feels more natural. - let content = if !is_spontaneous && !is_dm { + let content = if use_thread { + // Thread reply — less intrusive, for tangential contributions + matrix_utils::make_thread_reply(&text, event.event_id.to_owned()) + } else if !is_spontaneous && !is_dm { + // Direct reply — when explicitly addressed matrix_utils::make_reply_content(&text, event.event_id.to_owned()) } else { + // Plain message — spontaneous or DM, feels more natural ruma::events::room::message::RoomMessageEventContent::text_markdown(&text) }; if let Err(e) = room.send(content).await { error!("Failed to send response: {e}"); } else { - info!(room = room_id.as_str(), len = text.len(), is_dm, "Response sent"); + info!(room = room_id.as_str(), len = text.len(), is_dm, use_thread, "Response sent"); } // Post-response memory extraction (fire-and-forget) if state.config.behavior.memory_extraction_enabled {