enhance evaluator with full system prompt context

the evaluator now receives sol's entire system prompt as a system message, giving ministral-3b deep context on sol's personality when scoring relevance. evaluation context window bumped from 25 to 200 messages, room/dm context windows unified at 200. pre-computed timestamp variables ({ts_yesterday}, {ts_1h_ago}, {ts_last_week}) added to personality template for accurate time references without LLM math.
2026-03-22 14:58:11 +00:00
parent cf0f640c66
commit 2333dda904
3 changed files with 143 additions and 19 deletions
--- a/src/brain/evaluator.rs
+++ b/src/brain/evaluator.rs
@@ -28,11 +28,13 @@ pub struct Evaluator {
    config: Arc<Config>,
    mention_regex: Regex,
    name_regex: Regex,
+    /// Sol's system prompt — gives the evaluator full context on who Sol is.
+    system_prompt: String,
 }

 impl Evaluator {
    // todo(sienna): regex must be configrable
-    pub fn new(config: Arc<Config>) -> Self {
+    pub fn new(config: Arc<Config>, system_prompt: String) -> Self {
        let user_id = &config.matrix.user_id;
        // Match both plain @sol:sunbeam.pt and Matrix link format [sol](https://matrix.to/#/@sol:sunbeam.pt)
        let escaped = regex::escape(user_id);
@@ -45,6 +47,7 @@ impl Evaluator {
            config,
            mention_regex,
            name_regex,
+            system_prompt,
        }
    }

@@ -161,8 +164,7 @@ impl Evaluator {
             directed at them even if not mentioned by name.".to_string();

        let default_passive = "Sol has NOT spoken in this conversation yet. Only score high if the message \
-             is clearly relevant to Sol's expertise (archive search, finding past conversations, \
-             information retrieval) or touches a topic Sol has genuine insight on.".to_string();
+             is clearly relevant to Sol's expertise or touches a topic Sol has genuine insight on.".to_string();

        let participation_note = if sol_in_context {
            self.config.behavior.evaluation_prompt_active.as_deref()
@@ -175,26 +177,51 @@ impl Evaluator {
        info!(
            sol_in_context,
            context_window = window,
+            context_messages = recent_messages.len(),
            "Building evaluation prompt"
        );

-        let prompt = format!(
-            "You are evaluating whether Sol should respond to a message in a group chat. \
-             Sol is a librarian with access to the team's message archive.\n\n\
-             Recent conversation:\n{context}\n\n\
-             Latest message: {body}\n\n\
+        // System message: Sol's full personality + evaluation framing.
+        // This gives the evaluator deep context on who Sol is, what they care about,
+        // and how they'd naturally engage — so relevance scoring reflects Sol's actual character.
+        let system = format!(
+            "You are Sol's engagement evaluator. Your job is to decide whether Sol should \
+             respond to a message in a group chat, based on Sol's personality, expertise, \
+             and relationship with the people in the room.\n\n\
+             # who sol is\n\n\
+             {}\n\n\
+             # your task\n\n\
+             Read the conversation below and evaluate whether Sol would naturally want to \
+             respond to the latest message. Consider:\n\
+             - Does Sol have relevant knowledge, skills, or tools for this topic?\n\
+             - Would Sol's personality naturally lead them to engage here?\n\
+             - Is someone implicitly asking for Sol's help (even without mentioning them)?\n\
+             - Is this a continuation of something Sol was already involved in?\n\
+             - Would Sol find this genuinely interesting or have something meaningful to add?\n\
+             - Would a reaction (emoji) be more appropriate than a full response?\n\n\
             {participation_note}\n\n\
-             Respond ONLY with JSON: {{\"relevance\": 0.0-1.0, \"hook\": \"brief reason or empty string\", \"emoji\": \"a single emoji reaction or empty string\"}}\n\
+             Respond ONLY with JSON:\n\
+             {{\"relevance\": 0.0-1.0, \"hook\": \"brief reason or empty string\", \"emoji\": \"a single emoji or empty string\"}}\n\n\
             relevance=1.0 means Sol absolutely should respond, 0.0 means irrelevant.\n\
-             emoji: if Sol wouldn't write a full response but might react to the message, suggest a single emoji. \
-             pick something that feels natural and specific to the message — not generic thumbs up. leave empty if no reaction fits."
+             hook: if responding, a brief note on what Sol would engage with.\n\
+             emoji: if Sol wouldn't write a full response but might react, suggest a single \
+             emoji that feels natural and specific — not generic thumbs up. leave empty if \
+             no reaction fits.",
+            self.system_prompt,
        );

-        let messages = vec![ChatMessage::new_user_message(&prompt)];
+        let user_prompt = format!(
+            "# conversation\n\n{context}\n\n# latest message\n\n{body}"
+        );
+
+        let messages = vec![
+            ChatMessage::new_system_message(&system),
+            ChatMessage::new_user_message(&user_prompt),
+        ];
        let params = ChatParams {
            response_format: Some(ResponseFormat::json_object()),
            temperature: Some(0.1),
-            max_tokens: Some(100),
+            max_tokens: Some(150),
            ..Default::default()
        };

@@ -288,7 +315,7 @@ index = "test"
    }

    fn evaluator() -> Evaluator {
-        Evaluator::new(test_config())
+        Evaluator::new(test_config(), "you are sol, a virtual librarian.".to_string())
    }

    #[test]