feat: initial Sol virtual librarian implementation

Matrix bot with E2EE (matrix-sdk 0.9) that passively archives all messages to OpenSearch and responds to queries via Mistral AI with function calling tools. Core systems: - Archive: bulk OpenSearch indexer with batch/flush, edit/redaction handling, embedding pipeline passthrough - Brain: rule-based engagement evaluator (mentions, DMs, name invocations), LLM-powered spontaneous engagement, per-room conversation context windows, response delay simulation - Tools: search_archive, get_room_context, list_rooms, get_room_members registered as Mistral function calling tools with iterative tool loop - Personality: templated system prompt with Sol's librarian persona 47 unit tests covering config, evaluator, conversation windowing, personality templates, schema serialization, and search query building.
2026-03-20 21:40:13 +00:00
commit 4dc20bee23
21 changed files with 6934 additions and 0 deletions
--- a/src/brain/evaluator.rs
+++ b/src/brain/evaluator.rs
@@ -0,0 +1,307 @@
+use std::sync::Arc;
+
+use mistralai_client::v1::{
+    chat::{ChatMessage, ChatParams, ResponseFormat},
+    constants::Model,
+};
+use regex::Regex;
+use tracing::{debug, warn};
+
+use crate::config::Config;
+
+#[derive(Debug)]
+pub enum Engagement {
+    MustRespond { reason: MustRespondReason },
+    MaybeRespond { relevance: f32, hook: String },
+    Ignore,
+}
+
+#[derive(Debug)]
+pub enum MustRespondReason {
+    DirectMention,
+    DirectMessage,
+    NameInvocation,
+}
+
+pub struct Evaluator {
+    config: Arc<Config>,
+    mention_regex: Regex,
+    name_regex: Regex,
+}
+
+impl Evaluator {
+    // todo(sienna): regex must be configrable
+    pub fn new(config: Arc<Config>) -> Self {
+        let user_id = &config.matrix.user_id;
+        let mention_pattern = regex::escape(user_id);
+        let mention_regex = Regex::new(&mention_pattern).expect("Failed to compile mention regex");
+        let name_regex =
+            Regex::new(r"(?i)(?:^|\bhey\s+)\bsol\b").expect("Failed to compile name regex");
+
+        Self {
+            config,
+            mention_regex,
+            name_regex,
+        }
+    }
+
+    pub async fn evaluate(
+        &self,
+        sender: &str,
+        body: &str,
+        is_dm: bool,
+        recent_messages: &[String],
+        mistral: &Arc<mistralai_client::v1::client::Client>,
+    ) -> Engagement {
+        // Don't respond to ourselves
+        if sender == self.config.matrix.user_id {
+            return Engagement::Ignore;
+        }
+
+        // Direct mention: @sol:sunbeam.pt
+        if self.mention_regex.is_match(body) {
+            return Engagement::MustRespond {
+                reason: MustRespondReason::DirectMention,
+            };
+        }
+
+        // DM
+        if is_dm {
+            return Engagement::MustRespond {
+                reason: MustRespondReason::DirectMessage,
+            };
+        }
+
+        // Name invocation: "sol ..." or "hey sol ..."
+        if self.name_regex.is_match(body) {
+            return Engagement::MustRespond {
+                reason: MustRespondReason::NameInvocation,
+            };
+        }
+
+        // Cheap evaluation call for spontaneous responses
+        self.evaluate_relevance(body, recent_messages, mistral)
+            .await
+    }
+
+    /// Check rule-based engagement (without calling Mistral). Returns Some(Engagement)
+    /// if a rule matched, None if we need to fall through to the LLM evaluation.
+    pub fn evaluate_rules(
+        &self,
+        sender: &str,
+        body: &str,
+        is_dm: bool,
+    ) -> Option<Engagement> {
+        if sender == self.config.matrix.user_id {
+            return Some(Engagement::Ignore);
+        }
+        if self.mention_regex.is_match(body) {
+            return Some(Engagement::MustRespond {
+                reason: MustRespondReason::DirectMention,
+            });
+        }
+        if is_dm {
+            return Some(Engagement::MustRespond {
+                reason: MustRespondReason::DirectMessage,
+            });
+        }
+        if self.name_regex.is_match(body) {
+            return Some(Engagement::MustRespond {
+                reason: MustRespondReason::NameInvocation,
+            });
+        }
+        None
+    }
+
+    async fn evaluate_relevance(
+        &self,
+        body: &str,
+        recent_messages: &[String],
+        mistral: &Arc<mistralai_client::v1::client::Client>,
+    ) -> Engagement {
+        let context = recent_messages
+            .iter()
+            .rev()
+            .take(5) //todo(sienna): must be configurable
+            .rev()
+            .cloned()
+            .collect::<Vec<_>>()
+            .join("\n");
+
+        let prompt = format!(
+            "You are evaluating whether a virtual librarian named Sol should spontaneously join \
+             a conversation. Sol has deep knowledge of the group's message archive and helps \
+             people find information.\n\n\
+             Recent conversation:\n{context}\n\n\
+             Latest message: {body}\n\n\
+             Respond ONLY with JSON: {{\"relevance\": 0.0-1.0, \"hook\": \"brief reason or empty string\"}}\n\
+             relevance=1.0 means Sol absolutely should respond, 0.0 means irrelevant."
+        );
+
+        let messages = vec![ChatMessage::new_user_message(&prompt)];
+        let params = ChatParams {
+            response_format: Some(ResponseFormat::json_object()),
+            temperature: Some(0.1),
+            max_tokens: Some(100),
+            ..Default::default()
+        };
+
+        let model = Model::new(&self.config.mistral.evaluation_model);
+        let client = Arc::clone(mistral);
+        let result = tokio::task::spawn_blocking(move || {
+            client.chat(model, messages, Some(params))
+        })
+        .await
+        .unwrap_or_else(|e| Err(mistralai_client::v1::error::ApiError {
+            message: format!("spawn_blocking join error: {e}"),
+        }));
+
+        match result {
+            Ok(response) => {
+                let text = &response.choices[0].message.content;
+                match serde_json::from_str::<serde_json::Value>(text) {
+                    Ok(val) => {
+                        let relevance = val["relevance"].as_f64().unwrap_or(0.0) as f32;
+                        let hook = val["hook"].as_str().unwrap_or("").to_string();
+
+                        debug!(relevance, hook = hook.as_str(), "Evaluation result");
+
+                        if relevance >= self.config.behavior.spontaneous_threshold {
+                            Engagement::MaybeRespond { relevance, hook }
+                        } else {
+                            Engagement::Ignore
+                        }
+                    }
+                    Err(e) => {
+                        warn!("Failed to parse evaluation response: {e}");
+                        Engagement::Ignore
+                    }
+                }
+            }
+            Err(e) => {
+                warn!("Evaluation call failed: {e}");
+                Engagement::Ignore
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::config::Config;
+
+    fn test_config() -> Arc<Config> {
+        let toml = r#"
+[matrix]
+homeserver_url = "https://chat.sunbeam.pt"
+user_id = "@sol:sunbeam.pt"
+state_store_path = "/tmp/sol"
+
+[opensearch]
+url = "http://localhost:9200"
+index = "test"
+
+[mistral]
+[behavior]
+"#;
+        Arc::new(Config::from_str(toml).unwrap())
+    }
+
+    fn evaluator() -> Evaluator {
+        Evaluator::new(test_config())
+    }
+
+    #[test]
+    fn test_ignore_own_messages() {
+        let ev = evaluator();
+        let result = ev.evaluate_rules("@sol:sunbeam.pt", "hello everyone", false);
+        assert!(matches!(result, Some(Engagement::Ignore)));
+    }
+
+    #[test]
+    fn test_direct_mention() {
+        let ev = evaluator();
+        let result = ev.evaluate_rules("@alice:sunbeam.pt", "hey @sol:sunbeam.pt what's up?", false);
+        assert!(matches!(
+            result,
+            Some(Engagement::MustRespond { reason: MustRespondReason::DirectMention })
+        ));
+    }
+
+    #[test]
+    fn test_dm_detection() {
+        let ev = evaluator();
+        let result = ev.evaluate_rules("@alice:sunbeam.pt", "random message", true);
+        assert!(matches!(
+            result,
+            Some(Engagement::MustRespond { reason: MustRespondReason::DirectMessage })
+        ));
+    }
+
+    #[test]
+    fn test_name_invocation_start_of_message() {
+        let ev = evaluator();
+        let result = ev.evaluate_rules("@alice:sunbeam.pt", "sol, can you find that link?", false);
+        assert!(matches!(
+            result,
+            Some(Engagement::MustRespond { reason: MustRespondReason::NameInvocation })
+        ));
+    }
+
+    #[test]
+    fn test_name_invocation_hey_sol() {
+        let ev = evaluator();
+        let result = ev.evaluate_rules("@alice:sunbeam.pt", "hey sol do you remember?", false);
+        assert!(matches!(
+            result,
+            Some(Engagement::MustRespond { reason: MustRespondReason::NameInvocation })
+        ));
+    }
+
+    #[test]
+    fn test_name_invocation_case_insensitive() {
+        let ev = evaluator();
+        let result = ev.evaluate_rules("@alice:sunbeam.pt", "Hey Sol, help me", false);
+        assert!(matches!(
+            result,
+            Some(Engagement::MustRespond { reason: MustRespondReason::NameInvocation })
+        ));
+    }
+
+    #[test]
+    fn test_name_invocation_sol_uppercase() {
+        let ev = evaluator();
+        let result = ev.evaluate_rules("@alice:sunbeam.pt", "SOL what do you think?", false);
+        assert!(matches!(
+            result,
+            Some(Engagement::MustRespond { reason: MustRespondReason::NameInvocation })
+        ));
+    }
+
+    #[test]
+    fn test_no_false_positive_solstice() {
+        let ev = evaluator();
+        // "solstice" should NOT trigger name invocation — \b boundary prevents it
+        let result = ev.evaluate_rules("@alice:sunbeam.pt", "the solstice is coming", false);
+        assert!(result.is_none());
+    }
+
+    #[test]
+    fn test_random_message_falls_through() {
+        let ev = evaluator();
+        let result = ev.evaluate_rules("@alice:sunbeam.pt", "what's for lunch?", false);
+        assert!(result.is_none());
+    }
+
+    #[test]
+    fn test_priority_mention_over_dm() {
+        // When both mention and DM are true, mention should match first
+        let ev = evaluator();
+        let result = ev.evaluate_rules("@alice:sunbeam.pt", "hi @sol:sunbeam.pt", true);
+        assert!(matches!(
+            result,
+            Some(Engagement::MustRespond { reason: MustRespondReason::DirectMention })
+        ));
+    }
+}