the evaluator now receives sol's entire system prompt as a system
message, giving ministral-3b deep context on sol's personality when
scoring relevance. evaluation context window bumped from 25 to 200
messages, room/dm context windows unified at 200.
pre-computed timestamp variables ({ts_yesterday}, {ts_1h_ago},
{ts_last_week}) added to personality template for accurate time
references without LLM math.
414 lines
15 KiB
Rust
414 lines
15 KiB
Rust
use std::sync::Arc;
|
|
|
|
use mistralai_client::v1::{
|
|
chat::{ChatMessage, ChatParams, ResponseFormat},
|
|
constants::Model,
|
|
};
|
|
use regex::Regex;
|
|
use tracing::{debug, info, warn};
|
|
|
|
use crate::config::Config;
|
|
|
|
#[derive(Debug)]
|
|
pub enum Engagement {
|
|
MustRespond { reason: MustRespondReason },
|
|
MaybeRespond { relevance: f32, hook: String },
|
|
React { emoji: String, relevance: f32 },
|
|
Ignore,
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub enum MustRespondReason {
|
|
DirectMention,
|
|
DirectMessage,
|
|
NameInvocation,
|
|
}
|
|
|
|
pub struct Evaluator {
|
|
config: Arc<Config>,
|
|
mention_regex: Regex,
|
|
name_regex: Regex,
|
|
/// Sol's system prompt — gives the evaluator full context on who Sol is.
|
|
system_prompt: String,
|
|
}
|
|
|
|
impl Evaluator {
|
|
// todo(sienna): regex must be configrable
|
|
pub fn new(config: Arc<Config>, system_prompt: String) -> Self {
|
|
let user_id = &config.matrix.user_id;
|
|
// Match both plain @sol:sunbeam.pt and Matrix link format [sol](https://matrix.to/#/@sol:sunbeam.pt)
|
|
let escaped = regex::escape(user_id);
|
|
let mention_pattern = format!(r"{}|matrix\.to/#/{}", escaped, escaped);
|
|
let mention_regex = Regex::new(&mention_pattern).expect("Failed to compile mention regex");
|
|
let name_regex =
|
|
Regex::new(r"(?i)(?:^|\bhey\s+)\bsol\b").expect("Failed to compile name regex");
|
|
|
|
Self {
|
|
config,
|
|
mention_regex,
|
|
name_regex,
|
|
system_prompt,
|
|
}
|
|
}
|
|
|
|
pub async fn evaluate(
|
|
&self,
|
|
sender: &str,
|
|
body: &str,
|
|
is_dm: bool,
|
|
recent_messages: &[String],
|
|
mistral: &Arc<mistralai_client::v1::client::Client>,
|
|
) -> Engagement {
|
|
let body_preview: String = body.chars().take(80).collect();
|
|
|
|
// Don't respond to ourselves
|
|
if sender == self.config.matrix.user_id {
|
|
debug!(sender, body = body_preview.as_str(), "Ignoring own message");
|
|
return Engagement::Ignore;
|
|
}
|
|
|
|
// Direct mention: @sol:sunbeam.pt
|
|
if self.mention_regex.is_match(body) {
|
|
info!(sender, body = body_preview.as_str(), rule = "direct_mention", "Engagement: MustRespond");
|
|
return Engagement::MustRespond {
|
|
reason: MustRespondReason::DirectMention,
|
|
};
|
|
}
|
|
|
|
// DM
|
|
if is_dm {
|
|
info!(sender, body = body_preview.as_str(), rule = "dm", "Engagement: MustRespond");
|
|
return Engagement::MustRespond {
|
|
reason: MustRespondReason::DirectMessage,
|
|
};
|
|
}
|
|
|
|
// Name invocation: "sol ..." or "hey sol ..."
|
|
if self.name_regex.is_match(body) {
|
|
info!(sender, body = body_preview.as_str(), rule = "name_invocation", "Engagement: MustRespond");
|
|
return Engagement::MustRespond {
|
|
reason: MustRespondReason::NameInvocation,
|
|
};
|
|
}
|
|
|
|
info!(
|
|
sender, body = body_preview.as_str(),
|
|
threshold = self.config.behavior.spontaneous_threshold,
|
|
model = self.config.mistral.evaluation_model.as_str(),
|
|
context_len = recent_messages.len(),
|
|
eval_window = self.config.behavior.evaluation_context_window,
|
|
detect_sol = self.config.behavior.detect_sol_in_conversation,
|
|
"No rule match — running LLM relevance evaluation"
|
|
);
|
|
|
|
// Cheap evaluation call for spontaneous responses
|
|
self.evaluate_relevance(body, recent_messages, mistral)
|
|
.await
|
|
}
|
|
|
|
/// Check rule-based engagement (without calling Mistral). Returns Some(Engagement)
|
|
/// if a rule matched, None if we need to fall through to the LLM evaluation.
|
|
pub fn evaluate_rules(
|
|
&self,
|
|
sender: &str,
|
|
body: &str,
|
|
is_dm: bool,
|
|
) -> Option<Engagement> {
|
|
if sender == self.config.matrix.user_id {
|
|
return Some(Engagement::Ignore);
|
|
}
|
|
if self.mention_regex.is_match(body) {
|
|
return Some(Engagement::MustRespond {
|
|
reason: MustRespondReason::DirectMention,
|
|
});
|
|
}
|
|
if is_dm {
|
|
return Some(Engagement::MustRespond {
|
|
reason: MustRespondReason::DirectMessage,
|
|
});
|
|
}
|
|
if self.name_regex.is_match(body) {
|
|
return Some(Engagement::MustRespond {
|
|
reason: MustRespondReason::NameInvocation,
|
|
});
|
|
}
|
|
None
|
|
}
|
|
|
|
async fn evaluate_relevance(
|
|
&self,
|
|
body: &str,
|
|
recent_messages: &[String],
|
|
mistral: &Arc<mistralai_client::v1::client::Client>,
|
|
) -> Engagement {
|
|
let window = self.config.behavior.evaluation_context_window;
|
|
let context = recent_messages
|
|
.iter()
|
|
.rev()
|
|
.take(window)
|
|
.rev()
|
|
.cloned()
|
|
.collect::<Vec<_>>()
|
|
.join("\n");
|
|
|
|
// Check if Sol recently participated in this conversation
|
|
let sol_in_context = self.config.behavior.detect_sol_in_conversation
|
|
&& recent_messages.iter().any(|m| {
|
|
let lower = m.to_lowercase();
|
|
lower.starts_with("sol:") || lower.starts_with("sol ") || lower.contains("@sol:")
|
|
});
|
|
|
|
let default_active = "Sol is ALREADY part of this conversation (see messages above from Sol). \
|
|
Messages that follow up on Sol's response, ask Sol a question, or continue \
|
|
a thread Sol is in should score HIGH (0.8+). Sol should respond to follow-ups \
|
|
directed at them even if not mentioned by name.".to_string();
|
|
|
|
let default_passive = "Sol has NOT spoken in this conversation yet. Only score high if the message \
|
|
is clearly relevant to Sol's expertise or touches a topic Sol has genuine insight on.".to_string();
|
|
|
|
let participation_note = if sol_in_context {
|
|
self.config.behavior.evaluation_prompt_active.as_deref()
|
|
.unwrap_or(&default_active)
|
|
} else {
|
|
self.config.behavior.evaluation_prompt_passive.as_deref()
|
|
.unwrap_or(&default_passive)
|
|
};
|
|
|
|
info!(
|
|
sol_in_context,
|
|
context_window = window,
|
|
context_messages = recent_messages.len(),
|
|
"Building evaluation prompt"
|
|
);
|
|
|
|
// System message: Sol's full personality + evaluation framing.
|
|
// This gives the evaluator deep context on who Sol is, what they care about,
|
|
// and how they'd naturally engage — so relevance scoring reflects Sol's actual character.
|
|
let system = format!(
|
|
"You are Sol's engagement evaluator. Your job is to decide whether Sol should \
|
|
respond to a message in a group chat, based on Sol's personality, expertise, \
|
|
and relationship with the people in the room.\n\n\
|
|
# who sol is\n\n\
|
|
{}\n\n\
|
|
# your task\n\n\
|
|
Read the conversation below and evaluate whether Sol would naturally want to \
|
|
respond to the latest message. Consider:\n\
|
|
- Does Sol have relevant knowledge, skills, or tools for this topic?\n\
|
|
- Would Sol's personality naturally lead them to engage here?\n\
|
|
- Is someone implicitly asking for Sol's help (even without mentioning them)?\n\
|
|
- Is this a continuation of something Sol was already involved in?\n\
|
|
- Would Sol find this genuinely interesting or have something meaningful to add?\n\
|
|
- Would a reaction (emoji) be more appropriate than a full response?\n\n\
|
|
{participation_note}\n\n\
|
|
Respond ONLY with JSON:\n\
|
|
{{\"relevance\": 0.0-1.0, \"hook\": \"brief reason or empty string\", \"emoji\": \"a single emoji or empty string\"}}\n\n\
|
|
relevance=1.0 means Sol absolutely should respond, 0.0 means irrelevant.\n\
|
|
hook: if responding, a brief note on what Sol would engage with.\n\
|
|
emoji: if Sol wouldn't write a full response but might react, suggest a single \
|
|
emoji that feels natural and specific — not generic thumbs up. leave empty if \
|
|
no reaction fits.",
|
|
self.system_prompt,
|
|
);
|
|
|
|
let user_prompt = format!(
|
|
"# conversation\n\n{context}\n\n# latest message\n\n{body}"
|
|
);
|
|
|
|
let messages = vec![
|
|
ChatMessage::new_system_message(&system),
|
|
ChatMessage::new_user_message(&user_prompt),
|
|
];
|
|
let params = ChatParams {
|
|
response_format: Some(ResponseFormat::json_object()),
|
|
temperature: Some(0.1),
|
|
max_tokens: Some(150),
|
|
..Default::default()
|
|
};
|
|
|
|
let model = Model::new(&self.config.mistral.evaluation_model);
|
|
let client = Arc::clone(mistral);
|
|
let result = tokio::task::spawn_blocking(move || {
|
|
client.chat(model, messages, Some(params))
|
|
})
|
|
.await
|
|
.unwrap_or_else(|e| Err(mistralai_client::v1::error::ApiError {
|
|
message: format!("spawn_blocking join error: {e}"),
|
|
}));
|
|
|
|
match result {
|
|
Ok(response) => {
|
|
let text = response.choices[0].message.content.text();
|
|
info!(
|
|
raw_response = text.as_str(),
|
|
model = self.config.mistral.evaluation_model.as_str(),
|
|
"LLM evaluation raw response"
|
|
);
|
|
|
|
match serde_json::from_str::<serde_json::Value>(&text) {
|
|
Ok(val) => {
|
|
let relevance = val["relevance"].as_f64().unwrap_or(0.0) as f32;
|
|
let hook = val["hook"].as_str().unwrap_or("").to_string();
|
|
let emoji = val["emoji"].as_str().unwrap_or("").to_string();
|
|
let threshold = self.config.behavior.spontaneous_threshold;
|
|
let reaction_threshold = self.config.behavior.reaction_threshold;
|
|
let reaction_enabled = self.config.behavior.reaction_enabled;
|
|
|
|
info!(
|
|
relevance,
|
|
threshold,
|
|
reaction_threshold,
|
|
hook = hook.as_str(),
|
|
emoji = emoji.as_str(),
|
|
"LLM evaluation parsed"
|
|
);
|
|
|
|
if relevance >= threshold {
|
|
Engagement::MaybeRespond { relevance, hook }
|
|
} else if reaction_enabled
|
|
&& relevance >= reaction_threshold
|
|
&& !emoji.is_empty()
|
|
{
|
|
info!(
|
|
relevance,
|
|
emoji = emoji.as_str(),
|
|
"Reaction range — will react with emoji"
|
|
);
|
|
Engagement::React { emoji, relevance }
|
|
} else {
|
|
Engagement::Ignore
|
|
}
|
|
}
|
|
Err(e) => {
|
|
warn!(raw = text.as_str(), "Failed to parse evaluation response: {e}");
|
|
Engagement::Ignore
|
|
}
|
|
}
|
|
}
|
|
Err(e) => {
|
|
warn!("Evaluation call failed: {e}");
|
|
Engagement::Ignore
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use crate::config::Config;
|
|
|
|
fn test_config() -> Arc<Config> {
|
|
let toml = r#"
|
|
[matrix]
|
|
homeserver_url = "https://chat.sunbeam.pt"
|
|
user_id = "@sol:sunbeam.pt"
|
|
state_store_path = "/tmp/sol"
|
|
|
|
[opensearch]
|
|
url = "http://localhost:9200"
|
|
index = "test"
|
|
|
|
[mistral]
|
|
[behavior]
|
|
"#;
|
|
Arc::new(Config::from_str(toml).unwrap())
|
|
}
|
|
|
|
fn evaluator() -> Evaluator {
|
|
Evaluator::new(test_config(), "you are sol, a virtual librarian.".to_string())
|
|
}
|
|
|
|
#[test]
|
|
fn test_ignore_own_messages() {
|
|
let ev = evaluator();
|
|
let result = ev.evaluate_rules("@sol:sunbeam.pt", "hello everyone", false);
|
|
assert!(matches!(result, Some(Engagement::Ignore)));
|
|
}
|
|
|
|
#[test]
|
|
fn test_direct_mention() {
|
|
let ev = evaluator();
|
|
let result = ev.evaluate_rules("@alice:sunbeam.pt", "hey @sol:sunbeam.pt what's up?", false);
|
|
assert!(matches!(
|
|
result,
|
|
Some(Engagement::MustRespond { reason: MustRespondReason::DirectMention })
|
|
));
|
|
}
|
|
|
|
#[test]
|
|
fn test_dm_detection() {
|
|
let ev = evaluator();
|
|
let result = ev.evaluate_rules("@alice:sunbeam.pt", "random message", true);
|
|
assert!(matches!(
|
|
result,
|
|
Some(Engagement::MustRespond { reason: MustRespondReason::DirectMessage })
|
|
));
|
|
}
|
|
|
|
#[test]
|
|
fn test_name_invocation_start_of_message() {
|
|
let ev = evaluator();
|
|
let result = ev.evaluate_rules("@alice:sunbeam.pt", "sol, can you find that link?", false);
|
|
assert!(matches!(
|
|
result,
|
|
Some(Engagement::MustRespond { reason: MustRespondReason::NameInvocation })
|
|
));
|
|
}
|
|
|
|
#[test]
|
|
fn test_name_invocation_hey_sol() {
|
|
let ev = evaluator();
|
|
let result = ev.evaluate_rules("@alice:sunbeam.pt", "hey sol do you remember?", false);
|
|
assert!(matches!(
|
|
result,
|
|
Some(Engagement::MustRespond { reason: MustRespondReason::NameInvocation })
|
|
));
|
|
}
|
|
|
|
#[test]
|
|
fn test_name_invocation_case_insensitive() {
|
|
let ev = evaluator();
|
|
let result = ev.evaluate_rules("@alice:sunbeam.pt", "Hey Sol, help me", false);
|
|
assert!(matches!(
|
|
result,
|
|
Some(Engagement::MustRespond { reason: MustRespondReason::NameInvocation })
|
|
));
|
|
}
|
|
|
|
#[test]
|
|
fn test_name_invocation_sol_uppercase() {
|
|
let ev = evaluator();
|
|
let result = ev.evaluate_rules("@alice:sunbeam.pt", "SOL what do you think?", false);
|
|
assert!(matches!(
|
|
result,
|
|
Some(Engagement::MustRespond { reason: MustRespondReason::NameInvocation })
|
|
));
|
|
}
|
|
|
|
#[test]
|
|
fn test_no_false_positive_solstice() {
|
|
let ev = evaluator();
|
|
// "solstice" should NOT trigger name invocation — \b boundary prevents it
|
|
let result = ev.evaluate_rules("@alice:sunbeam.pt", "the solstice is coming", false);
|
|
assert!(result.is_none());
|
|
}
|
|
|
|
#[test]
|
|
fn test_random_message_falls_through() {
|
|
let ev = evaluator();
|
|
let result = ev.evaluate_rules("@alice:sunbeam.pt", "what's for lunch?", false);
|
|
assert!(result.is_none());
|
|
}
|
|
|
|
#[test]
|
|
fn test_priority_mention_over_dm() {
|
|
// When both mention and DM are true, mention should match first
|
|
let ev = evaluator();
|
|
let result = ev.evaluate_rules("@alice:sunbeam.pt", "hi @sol:sunbeam.pt", true);
|
|
assert!(matches!(
|
|
result,
|
|
Some(Engagement::MustRespond { reason: MustRespondReason::DirectMention })
|
|
));
|
|
}
|
|
}
|