enhance evaluator with full system prompt context
the evaluator now receives sol's entire system prompt as a system
message, giving ministral-3b deep context on sol's personality when
scoring relevance. evaluation context window bumped from 25 to 200
messages, room/dm context windows unified at 200.
pre-computed timestamp variables ({ts_yesterday}, {ts_1h_ago},
{ts_last_week}) added to personality template for accurate time
references without LLM math.
This commit is contained in:
@@ -28,11 +28,13 @@ pub struct Evaluator {
|
||||
config: Arc<Config>,
|
||||
mention_regex: Regex,
|
||||
name_regex: Regex,
|
||||
/// Sol's system prompt — gives the evaluator full context on who Sol is.
|
||||
system_prompt: String,
|
||||
}
|
||||
|
||||
impl Evaluator {
|
||||
// todo(sienna): regex must be configrable
|
||||
pub fn new(config: Arc<Config>) -> Self {
|
||||
pub fn new(config: Arc<Config>, system_prompt: String) -> Self {
|
||||
let user_id = &config.matrix.user_id;
|
||||
// Match both plain @sol:sunbeam.pt and Matrix link format [sol](https://matrix.to/#/@sol:sunbeam.pt)
|
||||
let escaped = regex::escape(user_id);
|
||||
@@ -45,6 +47,7 @@ impl Evaluator {
|
||||
config,
|
||||
mention_regex,
|
||||
name_regex,
|
||||
system_prompt,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -161,8 +164,7 @@ impl Evaluator {
|
||||
directed at them even if not mentioned by name.".to_string();
|
||||
|
||||
let default_passive = "Sol has NOT spoken in this conversation yet. Only score high if the message \
|
||||
is clearly relevant to Sol's expertise (archive search, finding past conversations, \
|
||||
information retrieval) or touches a topic Sol has genuine insight on.".to_string();
|
||||
is clearly relevant to Sol's expertise or touches a topic Sol has genuine insight on.".to_string();
|
||||
|
||||
let participation_note = if sol_in_context {
|
||||
self.config.behavior.evaluation_prompt_active.as_deref()
|
||||
@@ -175,26 +177,51 @@ impl Evaluator {
|
||||
info!(
|
||||
sol_in_context,
|
||||
context_window = window,
|
||||
context_messages = recent_messages.len(),
|
||||
"Building evaluation prompt"
|
||||
);
|
||||
|
||||
let prompt = format!(
|
||||
"You are evaluating whether Sol should respond to a message in a group chat. \
|
||||
Sol is a librarian with access to the team's message archive.\n\n\
|
||||
Recent conversation:\n{context}\n\n\
|
||||
Latest message: {body}\n\n\
|
||||
// System message: Sol's full personality + evaluation framing.
|
||||
// This gives the evaluator deep context on who Sol is, what they care about,
|
||||
// and how they'd naturally engage — so relevance scoring reflects Sol's actual character.
|
||||
let system = format!(
|
||||
"You are Sol's engagement evaluator. Your job is to decide whether Sol should \
|
||||
respond to a message in a group chat, based on Sol's personality, expertise, \
|
||||
and relationship with the people in the room.\n\n\
|
||||
# who sol is\n\n\
|
||||
{}\n\n\
|
||||
# your task\n\n\
|
||||
Read the conversation below and evaluate whether Sol would naturally want to \
|
||||
respond to the latest message. Consider:\n\
|
||||
- Does Sol have relevant knowledge, skills, or tools for this topic?\n\
|
||||
- Would Sol's personality naturally lead them to engage here?\n\
|
||||
- Is someone implicitly asking for Sol's help (even without mentioning them)?\n\
|
||||
- Is this a continuation of something Sol was already involved in?\n\
|
||||
- Would Sol find this genuinely interesting or have something meaningful to add?\n\
|
||||
- Would a reaction (emoji) be more appropriate than a full response?\n\n\
|
||||
{participation_note}\n\n\
|
||||
Respond ONLY with JSON: {{\"relevance\": 0.0-1.0, \"hook\": \"brief reason or empty string\", \"emoji\": \"a single emoji reaction or empty string\"}}\n\
|
||||
Respond ONLY with JSON:\n\
|
||||
{{\"relevance\": 0.0-1.0, \"hook\": \"brief reason or empty string\", \"emoji\": \"a single emoji or empty string\"}}\n\n\
|
||||
relevance=1.0 means Sol absolutely should respond, 0.0 means irrelevant.\n\
|
||||
emoji: if Sol wouldn't write a full response but might react to the message, suggest a single emoji. \
|
||||
pick something that feels natural and specific to the message — not generic thumbs up. leave empty if no reaction fits."
|
||||
hook: if responding, a brief note on what Sol would engage with.\n\
|
||||
emoji: if Sol wouldn't write a full response but might react, suggest a single \
|
||||
emoji that feels natural and specific — not generic thumbs up. leave empty if \
|
||||
no reaction fits.",
|
||||
self.system_prompt,
|
||||
);
|
||||
|
||||
let messages = vec![ChatMessage::new_user_message(&prompt)];
|
||||
let user_prompt = format!(
|
||||
"# conversation\n\n{context}\n\n# latest message\n\n{body}"
|
||||
);
|
||||
|
||||
let messages = vec![
|
||||
ChatMessage::new_system_message(&system),
|
||||
ChatMessage::new_user_message(&user_prompt),
|
||||
];
|
||||
let params = ChatParams {
|
||||
response_format: Some(ResponseFormat::json_object()),
|
||||
temperature: Some(0.1),
|
||||
max_tokens: Some(100),
|
||||
max_tokens: Some(150),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
@@ -288,7 +315,7 @@ index = "test"
|
||||
}
|
||||
|
||||
fn evaluator() -> Evaluator {
|
||||
Evaluator::new(test_config())
|
||||
Evaluator::new(test_config(), "you are sol, a virtual librarian.".to_string())
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -23,6 +23,11 @@ impl Personality {
|
||||
let epoch_ms = now.timestamp_millis().to_string();
|
||||
let members_str = members.join(", ");
|
||||
|
||||
// Pre-compute reference timestamps so the model doesn't have to do math
|
||||
let ts_1h_ago = (now - chrono::Duration::hours(1)).timestamp_millis().to_string();
|
||||
let ts_yesterday = (now - chrono::Duration::days(1)).timestamp_millis().to_string();
|
||||
let ts_last_week = (now - chrono::Duration::days(7)).timestamp_millis().to_string();
|
||||
|
||||
let room_context_rules = if is_dm {
|
||||
String::new()
|
||||
} else {
|
||||
@@ -37,6 +42,9 @@ impl Personality {
|
||||
self.template
|
||||
.replace("{date}", &date)
|
||||
.replace("{epoch_ms}", &epoch_ms)
|
||||
.replace("{ts_1h_ago}", &ts_1h_ago)
|
||||
.replace("{ts_yesterday}", &ts_yesterday)
|
||||
.replace("{ts_last_week}", &ts_last_week)
|
||||
.replace("{room_name}", room_name)
|
||||
.replace("{members}", &members_str)
|
||||
.replace("{room_context_rules}", &room_context_rules)
|
||||
@@ -122,4 +130,34 @@ mod tests {
|
||||
let result = p.build_system_prompt("room", &[], None, false);
|
||||
assert_eq!(result, "Before\n\nAfter");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_timestamp_variables_substituted() {
|
||||
let p = Personality::new(
|
||||
"now={epoch_ms} 1h={ts_1h_ago} yesterday={ts_yesterday} week={ts_last_week}".to_string(),
|
||||
);
|
||||
let result = p.build_system_prompt("room", &[], None, false);
|
||||
// Should NOT contain the literal placeholders
|
||||
assert!(!result.contains("{epoch_ms}"));
|
||||
assert!(!result.contains("{ts_1h_ago}"));
|
||||
assert!(!result.contains("{ts_yesterday}"));
|
||||
assert!(!result.contains("{ts_last_week}"));
|
||||
// Should contain numeric values
|
||||
assert!(result.starts_with("now="));
|
||||
assert!(result.contains("1h="));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_room_context_rules_dm_empty() {
|
||||
let p = Personality::new("{room_context_rules}".to_string());
|
||||
let result = p.build_system_prompt("room", &[], None, true);
|
||||
assert!(result.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_room_context_rules_group_nonempty() {
|
||||
let p = Personality::new("{room_context_rules}".to_string());
|
||||
let result = p.build_system_prompt("room", &[], None, false);
|
||||
assert!(result.contains("group room"));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,6 +8,10 @@ pub struct Config {
|
||||
pub behavior: BehaviorConfig,
|
||||
#[serde(default)]
|
||||
pub agents: AgentsConfig,
|
||||
#[serde(default)]
|
||||
pub services: ServicesConfig,
|
||||
#[serde(default)]
|
||||
pub vault: VaultConfig,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
@@ -120,6 +124,44 @@ pub struct BehaviorConfig {
|
||||
pub memory_extraction_enabled: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Default)]
|
||||
pub struct ServicesConfig {
|
||||
#[serde(default)]
|
||||
pub gitea: Option<GiteaConfig>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct GiteaConfig {
|
||||
pub url: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct VaultConfig {
|
||||
/// OpenBao/Vault URL. Default: http://openbao.data.svc.cluster.local:8200
|
||||
#[serde(default = "default_vault_url")]
|
||||
pub url: String,
|
||||
/// Kubernetes auth role name. Default: sol-agent
|
||||
#[serde(default = "default_vault_role")]
|
||||
pub role: String,
|
||||
/// KV v2 mount path. Default: secret
|
||||
#[serde(default = "default_vault_mount")]
|
||||
pub mount: String,
|
||||
}
|
||||
|
||||
impl Default for VaultConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
url: default_vault_url(),
|
||||
role: default_vault_role(),
|
||||
mount: default_vault_mount(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn default_vault_url() -> String { "http://openbao.data.svc.cluster.local:8200".into() }
|
||||
fn default_vault_role() -> String { "sol-agent".into() }
|
||||
fn default_vault_mount() -> String { "secret".into() }
|
||||
|
||||
fn default_batch_size() -> usize { 50 }
|
||||
fn default_flush_interval_ms() -> u64 { 2000 }
|
||||
fn default_embedding_pipeline() -> String { "tuwunel_embedding_pipeline".into() }
|
||||
@@ -133,12 +175,12 @@ fn default_spontaneous_delay_min_ms() -> u64 { 15000 }
|
||||
fn default_spontaneous_delay_max_ms() -> u64 { 60000 }
|
||||
fn default_spontaneous_threshold() -> f32 { 0.85 }
|
||||
fn default_cooldown_after_response_ms() -> u64 { 15000 }
|
||||
fn default_evaluation_context_window() -> usize { 25 }
|
||||
fn default_evaluation_context_window() -> usize { 200 }
|
||||
fn default_detect_sol_in_conversation() -> bool { true }
|
||||
fn default_reaction_threshold() -> f32 { 0.6 }
|
||||
fn default_reaction_enabled() -> bool { true }
|
||||
fn default_room_context_window() -> usize { 30 }
|
||||
fn default_dm_context_window() -> usize { 100 }
|
||||
fn default_room_context_window() -> usize { 200 }
|
||||
fn default_dm_context_window() -> usize { 200 }
|
||||
fn default_backfill_on_join() -> bool { true }
|
||||
fn default_backfill_limit() -> usize { 10000 }
|
||||
fn default_script_timeout_secs() -> u64 { 5 }
|
||||
@@ -237,8 +279,8 @@ backfill_limit = 5000
|
||||
assert!((config.behavior.spontaneous_threshold - 0.85).abs() < f32::EPSILON);
|
||||
assert!(!config.behavior.instant_responses);
|
||||
assert_eq!(config.behavior.cooldown_after_response_ms, 15000);
|
||||
assert_eq!(config.behavior.room_context_window, 30);
|
||||
assert_eq!(config.behavior.dm_context_window, 100);
|
||||
assert_eq!(config.behavior.room_context_window, 200);
|
||||
assert_eq!(config.behavior.dm_context_window, 200);
|
||||
assert!(config.behavior.backfill_on_join);
|
||||
assert_eq!(config.behavior.backfill_limit, 10000);
|
||||
assert!(config.behavior.memory_extraction_enabled);
|
||||
@@ -274,6 +316,23 @@ state_store_path = "/data/sol/state"
|
||||
assert!(Config::from_str(bad).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_services_config_default_is_none() {
|
||||
let config = Config::from_str(MINIMAL_CONFIG).unwrap();
|
||||
assert!(config.services.gitea.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_services_config_with_gitea() {
|
||||
let with_services = format!(
|
||||
"{}\n[services.gitea]\nurl = \"http://gitea:3000\"\n",
|
||||
MINIMAL_CONFIG
|
||||
);
|
||||
let config = Config::from_str(&with_services).unwrap();
|
||||
let gitea = config.services.gitea.unwrap();
|
||||
assert_eq!(gitea.url, "http://gitea:3000");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_missing_required_field_fails() {
|
||||
let bad = r#"
|
||||
|
||||
Reference in New Issue
Block a user