evaluator redesign: response types, silence, structural suppression

new engagement types: Respond (inline), ThreadReply (threaded),
React, Ignore. LLM returns response_type to decide HOW to engage.

silence mechanic: "shut up"/"be quiet" sets a 30min per-room timer.
only direct @mention breaks through.

structural suppression (A+B):
- reply to non-Sol human → capped at React
- 3+ human messages since Sol → forced passive mode

threads have a lower relevance threshold (70% of spontaneous).
time context injected into evaluator prompt.
This commit is contained in:
2026-03-23 01:41:57 +00:00
parent 1058afb635
commit 3b62d86c45
3 changed files with 244 additions and 48 deletions

View File

@@ -12,7 +12,11 @@ use crate::config::Config;
#[derive(Debug)]
pub enum Engagement {
MustRespond { reason: MustRespondReason },
MaybeRespond { relevance: f32, hook: String },
/// Respond inline in the room — Sol has something valuable to contribute.
Respond { relevance: f32, hook: String },
/// Respond in a thread — Sol has something to add but it's tangential
/// or the room is busy with a human-to-human conversation.
ThreadReply { relevance: f32, hook: String },
React { emoji: String, relevance: f32 },
Ignore,
}
@@ -51,6 +55,8 @@ impl Evaluator {
}
}
/// `is_reply_to_human` — true if this message is a Matrix reply to a non-Sol user.
/// `messages_since_sol` — how many messages have been sent since Sol last spoke in this room.
pub async fn evaluate(
&self,
sender: &str,
@@ -58,6 +64,9 @@ impl Evaluator {
is_dm: bool,
recent_messages: &[String],
mistral: &Arc<mistralai_client::v1::client::Client>,
is_reply_to_human: bool,
messages_since_sol: usize,
is_silenced: bool,
) -> Engagement {
let body_preview: String = body.chars().take(80).collect();
@@ -67,7 +76,7 @@ impl Evaluator {
return Engagement::Ignore;
}
// Direct mention: @sol:sunbeam.pt
// Direct mention: @sol:sunbeam.pt — always responds, breaks silence
if self.mention_regex.is_match(body) {
info!(sender, body = body_preview.as_str(), rule = "direct_mention", "Engagement: MustRespond");
return Engagement::MustRespond {
@@ -75,7 +84,7 @@ impl Evaluator {
};
}
// DM
// DM — always responds (silence only applies to group rooms)
if is_dm {
info!(sender, body = body_preview.as_str(), rule = "dm", "Engagement: MustRespond");
return Engagement::MustRespond {
@@ -83,6 +92,12 @@ impl Evaluator {
};
}
// If silenced in this room, only direct @mention breaks through (checked above)
if is_silenced {
debug!(sender, body = body_preview.as_str(), "Silenced in this room — ignoring");
return Engagement::Ignore;
}
// Name invocation: "sol ..." or "hey sol ..."
if self.name_regex.is_match(body) {
info!(sender, body = body_preview.as_str(), rule = "name_invocation", "Engagement: MustRespond");
@@ -91,6 +106,32 @@ impl Evaluator {
};
}
// ── Structural suppression (A+B) ──
// A: If this is a reply to another human (not Sol), cap at React-only.
// People replying to each other aren't asking for Sol's input.
if is_reply_to_human {
info!(
sender, body = body_preview.as_str(),
rule = "reply_to_human",
"Reply to non-Sol human — suppressing to React-only"
);
// Still run the LLM eval for potential emoji reaction, but cap the result
let engagement = self.evaluate_relevance(body, recent_messages, mistral).await;
return match engagement {
Engagement::React { emoji, relevance } => Engagement::React { emoji, relevance },
Engagement::Respond { relevance, .. } if relevance >= self.config.behavior.reaction_threshold => {
// Would have responded, but demote to just a reaction if the LLM suggested one
Engagement::Ignore
}
_ => Engagement::Ignore,
};
}
// B: Consecutive message decay. After 3+ human messages without Sol,
// switch from active to passive evaluation context.
let force_passive = messages_since_sol >= 3;
info!(
sender, body = body_preview.as_str(),
threshold = self.config.behavior.spontaneous_threshold,
@@ -98,11 +139,13 @@ impl Evaluator {
context_len = recent_messages.len(),
eval_window = self.config.behavior.evaluation_context_window,
detect_sol = self.config.behavior.detect_sol_in_conversation,
messages_since_sol,
force_passive,
is_reply_to_human,
"No rule match — running LLM relevance evaluation"
);
// Cheap evaluation call for spontaneous responses
self.evaluate_relevance(body, recent_messages, mistral)
self.evaluate_relevance_with_mode(body, recent_messages, mistral, force_passive)
.await
}
@@ -140,6 +183,16 @@ impl Evaluator {
body: &str,
recent_messages: &[String],
mistral: &Arc<mistralai_client::v1::client::Client>,
) -> Engagement {
self.evaluate_relevance_with_mode(body, recent_messages, mistral, false).await
}
async fn evaluate_relevance_with_mode(
&self,
body: &str,
recent_messages: &[String],
mistral: &Arc<mistralai_client::v1::client::Client>,
force_passive: bool,
) -> Engagement {
let window = self.config.behavior.evaluation_context_window;
let context = recent_messages
@@ -151,8 +204,11 @@ impl Evaluator {
.collect::<Vec<_>>()
.join("\n");
// Check if Sol recently participated in this conversation
let sol_in_context = self.config.behavior.detect_sol_in_conversation
// Check if Sol recently participated in this conversation.
// force_passive overrides: if 3+ human messages since Sol spoke, treat as passive
// even if Sol's messages are visible in the context window.
let sol_in_context = !force_passive
&& self.config.behavior.detect_sol_in_conversation
&& recent_messages.iter().any(|m| {
let lower = m.to_lowercase();
lower.starts_with("sol:") || lower.starts_with("sol ") || lower.contains("@sol:")
@@ -181,15 +237,16 @@ impl Evaluator {
"Building evaluation prompt"
);
// System message: Sol's full personality + evaluation framing.
// This gives the evaluator deep context on who Sol is, what they care about,
// and how they'd naturally engage — so relevance scoring reflects Sol's actual character.
// System message: Sol's full personality + evaluation framing + time context.
let tc = crate::time_context::TimeContext::now();
let system = format!(
"You are Sol's engagement evaluator. Your job is to decide whether Sol should \
respond to a message in a group chat, based on Sol's personality, expertise, \
and relationship with the people in the room.\n\n\
"You are Sol's engagement evaluator. Your job is to decide whether and HOW Sol \
should respond to a message in a group chat.\n\n\
# who sol is\n\n\
{}\n\n\
# time\n\n\
{}\n\n\
# your task\n\n\
Read the conversation below and evaluate whether Sol would naturally want to \
respond to the latest message. Consider:\n\
@@ -198,16 +255,25 @@ impl Evaluator {
- Is someone implicitly asking for Sol's help (even without mentioning them)?\n\
- Is this a continuation of something Sol was already involved in?\n\
- Would Sol find this genuinely interesting or have something meaningful to add?\n\
- Would a reaction (emoji) be more appropriate than a full response?\n\n\
- Are two humans talking to each other? If so, Sol should NOT jump in unless \
directly relevant. Two people having a conversation doesn't need a third voice.\n\
- Would a reaction (emoji) be more appropriate than a full response?\n\
- Would responding in a thread (less intrusive) be better than inline?\n\n\
{participation_note}\n\n\
Respond ONLY with JSON:\n\
{{\"relevance\": 0.0-1.0, \"hook\": \"brief reason or empty string\", \"emoji\": \"a single emoji or empty string\"}}\n\n\
relevance=1.0 means Sol absolutely should respond, 0.0 means irrelevant.\n\
hook: if responding, a brief note on what Sol would engage with.\n\
emoji: if Sol wouldn't write a full response but might react, suggest a single \
emoji that feels natural and specific — not generic thumbs up. leave empty if \
no reaction fits.",
{{\"relevance\": 0.0-1.0, \"response_type\": \"message\"|\"thread\"|\"react\"|\"ignore\", \
\"hook\": \"brief reason or empty string\", \"emoji\": \"a single emoji or empty string\"}}\n\n\
relevance: 1.0 = Sol absolutely should respond, 0.0 = irrelevant.\n\
response_type:\n\
- \"message\": Sol has something genuinely valuable to add inline.\n\
- \"thread\": Sol has a useful aside or observation, but the main conversation \
is between humans — put it in a thread so it doesn't interrupt.\n\
- \"react\": emoji reaction only, no text.\n\
- \"ignore\": Sol has nothing to add.\n\
hook: if responding, what Sol would engage with.\n\
emoji: if reacting, a single emoji that feels natural and specific.",
self.system_prompt,
tc.system_block(),
);
let user_prompt = format!(
@@ -249,33 +315,40 @@ impl Evaluator {
let relevance = val["relevance"].as_f64().unwrap_or(0.0) as f32;
let hook = val["hook"].as_str().unwrap_or("").to_string();
let emoji = val["emoji"].as_str().unwrap_or("").to_string();
let response_type = val["response_type"].as_str().unwrap_or("ignore").to_string();
let threshold = self.config.behavior.spontaneous_threshold;
let reaction_threshold = self.config.behavior.reaction_threshold;
let reaction_enabled = self.config.behavior.reaction_enabled;
info!(
relevance,
threshold,
reaction_threshold,
response_type = response_type.as_str(),
hook = hook.as_str(),
emoji = emoji.as_str(),
"LLM evaluation parsed"
);
if relevance >= threshold {
Engagement::MaybeRespond { relevance, hook }
} else if reaction_enabled
&& relevance >= reaction_threshold
&& !emoji.is_empty()
{
info!(
relevance,
emoji = emoji.as_str(),
"Reaction range — will react with emoji"
);
Engagement::React { emoji, relevance }
} else {
Engagement::Ignore
// The LLM decides the response type, but we still gate on relevance threshold
match response_type.as_str() {
"message" if relevance >= threshold => {
Engagement::Respond { relevance, hook }
}
"thread" if relevance >= threshold * 0.7 => {
// Threads have a lower threshold — they're less intrusive
Engagement::ThreadReply { relevance, hook }
}
"react" if reaction_enabled && !emoji.is_empty() => {
Engagement::React { emoji, relevance }
}
// Fallback: if the model says "message" but relevance is below
// threshold, check if it would qualify as a thread or reaction
"message" | "thread" if relevance >= threshold * 0.7 => {
Engagement::ThreadReply { relevance, hook }
}
_ if reaction_enabled && !emoji.is_empty() && relevance >= self.config.behavior.reaction_threshold => {
Engagement::React { emoji, relevance }
}
_ => Engagement::Ignore,
}
}
Err(e) => {

View File

@@ -28,6 +28,18 @@ pub struct AgentsConfig {
/// Whether to use the Conversations API (vs manual message management).
#[serde(default)]
pub use_conversations_api: bool,
/// Model for research micro-agents.
#[serde(default = "default_research_agent_model")]
pub research_model: String,
/// Max tool calls per research micro-agent.
#[serde(default = "default_research_max_iterations")]
pub research_max_iterations: usize,
/// Max parallel agents per research wave.
#[serde(default = "default_research_max_agents")]
pub research_max_agents: usize,
/// Max recursion depth for research agents spawning sub-agents.
#[serde(default = "default_research_max_depth")]
pub research_max_depth: usize,
}
impl Default for AgentsConfig {
@@ -37,6 +49,10 @@ impl Default for AgentsConfig {
domain_model: default_model(),
compaction_threshold: default_compaction_threshold(),
use_conversations_api: false,
research_model: default_research_agent_model(),
research_max_iterations: default_research_max_iterations(),
research_max_agents: default_research_max_agents(),
research_max_depth: default_research_max_depth(),
}
}
}
@@ -122,12 +138,22 @@ pub struct BehaviorConfig {
pub script_fetch_allowlist: Vec<String>,
#[serde(default = "default_memory_extraction_enabled")]
pub memory_extraction_enabled: bool,
/// Minimum fraction of a source room's members that must also be in the
/// requesting room for cross-room search results to be visible.
/// 0.0 = no restriction, 1.0 = only same room.
#[serde(default = "default_room_overlap_threshold")]
pub room_overlap_threshold: f32,
/// Duration in ms that Sol stays silent after being told to be quiet.
#[serde(default = "default_silence_duration_ms")]
pub silence_duration_ms: u64,
}
#[derive(Debug, Clone, Deserialize, Default)]
pub struct ServicesConfig {
#[serde(default)]
pub gitea: Option<GiteaConfig>,
#[serde(default)]
pub kratos: Option<KratosConfig>,
}
#[derive(Debug, Clone, Deserialize)]
@@ -135,6 +161,11 @@ pub struct GiteaConfig {
pub url: String,
}
#[derive(Debug, Clone, Deserialize)]
pub struct KratosConfig {
pub admin_url: String,
}
#[derive(Debug, Clone, Deserialize)]
pub struct VaultConfig {
/// OpenBao/Vault URL. Default: http://openbao.data.svc.cluster.local:8200
@@ -187,8 +218,14 @@ fn default_script_timeout_secs() -> u64 { 5 }
fn default_script_max_heap_mb() -> usize { 64 }
fn default_memory_index() -> String { "sol_user_memory".into() }
fn default_memory_extraction_enabled() -> bool { true }
fn default_room_overlap_threshold() -> f32 { 0.25 }
fn default_silence_duration_ms() -> u64 { 1_800_000 } // 30 minutes
fn default_db_path() -> String { "/data/sol.db".into() }
fn default_compaction_threshold() -> u32 { 118000 } // ~90% of 131K context window
fn default_research_agent_model() -> String { "ministral-3b-latest".into() }
fn default_research_max_iterations() -> usize { 10 }
fn default_research_max_agents() -> usize { 25 }
fn default_research_max_depth() -> usize { 4 }
impl Config {
pub fn load(path: &str) -> anyhow::Result<Self> {
@@ -322,6 +359,17 @@ state_store_path = "/data/sol/state"
assert!(config.services.gitea.is_none());
}
#[test]
fn test_services_config_with_kratos() {
let with_kratos = format!(
"{}\n[services.kratos]\nadmin_url = \"http://kratos-admin:80\"\n",
MINIMAL_CONFIG
);
let config = Config::from_str(&with_kratos).unwrap();
let kratos = config.services.kratos.unwrap();
assert_eq!(kratos.admin_url, "http://kratos-admin:80");
}
#[test]
fn test_services_config_with_gitea() {
let with_services = format!(

View File

@@ -42,6 +42,8 @@ pub struct AppState {
pub last_response: Arc<Mutex<HashMap<String, Instant>>>,
/// Tracks rooms where a response is currently being generated (in-flight guard)
pub responding_in: Arc<Mutex<std::collections::HashSet<String>>>,
/// Rooms where Sol has been told to be quiet — maps room_id → silenced_until
pub silenced_until: Arc<Mutex<HashMap<String, Instant>>>,
}
pub async fn start_sync(client: Client, state: Arc<AppState>) -> anyhow::Result<()> {
@@ -193,6 +195,38 @@ async fn handle_message(
);
}
// Silence detection — if someone tells Sol to be quiet, set a per-room timer
{
let lower = body.to_lowercase();
let silence_phrases = [
"shut up", "be quiet", "shush", "silence", "stop talking",
"quiet down", "hush", "enough sol", "sol enough", "sol stop",
"sol shut up", "sol be quiet", "sol shush",
];
if silence_phrases.iter().any(|p| lower.contains(p)) {
let duration = std::time::Duration::from_millis(
state.config.behavior.silence_duration_ms,
);
let until = Instant::now() + duration;
let mut silenced = state.silenced_until.lock().await;
silenced.insert(room_id.clone(), until);
info!(
room = room_id.as_str(),
duration_mins = state.config.behavior.silence_duration_ms / 60_000,
"Silenced in room"
);
}
}
// Check if Sol is currently silenced in this room
let is_silenced = {
let silenced = state.silenced_until.lock().await;
silenced
.get(&room_id)
.map(|until| Instant::now() < *until)
.unwrap_or(false)
};
// Evaluate whether to respond
let recent: Vec<String> = {
let convs = state.conversations.lock().await;
@@ -203,28 +237,65 @@ async fn handle_message(
.collect()
};
// A: Check if this message is a reply to another human (not Sol)
let is_reply_to_human = is_reply && !is_dm && {
// If it's a reply, check the conversation context for who the previous
// message was from. We don't have event IDs in context, so we use a
// heuristic: if the most recent message before this one was from a human
// (not Sol), this reply is likely directed at them.
let convs = state.conversations.lock().await;
let ctx = convs.get_context(&room_id);
let sol_id = &state.config.matrix.user_id;
// Check the message before the current one (last in context before we added ours)
ctx.iter().rev().skip(1).next()
.map(|m| m.sender != *sol_id)
.unwrap_or(false)
};
// B: Count messages since Sol last spoke in this room
let messages_since_sol = {
let convs = state.conversations.lock().await;
let ctx = convs.get_context(&room_id);
let sol_id = &state.config.matrix.user_id;
ctx.iter().rev().take_while(|m| m.sender != *sol_id).count()
};
let engagement = state
.evaluator
.evaluate(&sender, &body, is_dm, &recent, &state.mistral)
.evaluate(
&sender, &body, is_dm, &recent, &state.mistral,
is_reply_to_human, messages_since_sol, is_silenced,
)
.await;
let (should_respond, is_spontaneous) = match engagement {
// use_thread: if true, Sol responds in a thread instead of inline
let (should_respond, is_spontaneous, use_thread) = match engagement {
Engagement::MustRespond { reason } => {
info!(room = room_id.as_str(), ?reason, "Must respond");
(true, false)
// Direct mention breaks silence
if is_silenced {
let mut silenced = state.silenced_until.lock().await;
silenced.remove(&room_id);
info!(room = room_id.as_str(), "Silence broken by direct mention");
}
(true, false, false)
}
Engagement::MaybeRespond { relevance, hook } => {
info!(room = room_id.as_str(), relevance, hook = hook.as_str(), "Maybe respond (spontaneous)");
(true, true)
Engagement::Respond { relevance, hook } => {
info!(room = room_id.as_str(), relevance, hook = hook.as_str(), "Respond (spontaneous)");
(true, true, false)
}
Engagement::ThreadReply { relevance, hook } => {
info!(room = room_id.as_str(), relevance, hook = hook.as_str(), "Thread reply (spontaneous)");
(true, true, true)
}
Engagement::React { emoji, relevance } => {
info!(room = room_id.as_str(), relevance, emoji = emoji.as_str(), "Reacting with emoji");
if let Err(e) = matrix_utils::send_reaction(&room, event.event_id.clone().into(), &emoji).await {
error!("Failed to send reaction: {e}");
}
(false, false)
(false, false, false)
}
Engagement::Ignore => (false, false),
Engagement::Ignore => (false, false, false),
};
if !should_respond {
@@ -310,6 +381,7 @@ async fn handle_message(
&state.conversation_registry,
image_data_uri.as_deref(),
context_hint,
event.event_id.clone().into(),
)
.await
} else {
@@ -331,17 +403,20 @@ async fn handle_message(
};
if let Some(text) = response {
// Reply with reference only when directly addressed. Spontaneous
// and DM messages are sent as plain content — feels more natural.
let content = if !is_spontaneous && !is_dm {
let content = if use_thread {
// Thread reply — less intrusive, for tangential contributions
matrix_utils::make_thread_reply(&text, event.event_id.to_owned())
} else if !is_spontaneous && !is_dm {
// Direct reply — when explicitly addressed
matrix_utils::make_reply_content(&text, event.event_id.to_owned())
} else {
// Plain message — spontaneous or DM, feels more natural
ruma::events::room::message::RoomMessageEventContent::text_markdown(&text)
};
if let Err(e) = room.send(content).await {
error!("Failed to send response: {e}");
} else {
info!(room = room_id.as_str(), len = text.len(), is_dm, "Response sent");
info!(room = room_id.as_str(), len = text.len(), is_dm, use_thread, "Response sent");
}
// Post-response memory extraction (fire-and-forget)
if state.config.behavior.memory_extraction_enabled {