feat: per-user auto-memory with ResponseContext

Three memory channels: hidden tool (sol.memory.set/get in scripts),
pre-response injection (relevant memories loaded into system prompt),
and post-response extraction (ministral-3b extracts facts after each
response). User isolation enforced at Rust level — user_id derived
from Matrix sender, never from script arguments.

New modules: context (ResponseContext), memory (schema, store, extractor).
ResponseContext threaded through responder → tools → script runtime.
OpenSearch index sol_user_memory created on startup alongside archive.
This commit is contained in:
2026-03-21 15:51:31 +00:00
parent 4dc20bee23
commit 4949e70ecc
23 changed files with 4494 additions and 124 deletions

View File

@@ -10,9 +10,14 @@ use rand::Rng;
use tokio::time::{sleep, Duration};
use tracing::{debug, error, info, warn};
use matrix_sdk::room::Room;
use opensearch::OpenSearch;
use crate::brain::conversation::ContextMessage;
use crate::brain::personality::Personality;
use crate::config::Config;
use crate::context::ResponseContext;
use crate::memory;
use crate::tools::ToolRegistry;
/// Run a Mistral chat completion on a blocking thread.
@@ -38,6 +43,7 @@ pub struct Responder {
config: Arc<Config>,
personality: Arc<Personality>,
tools: Arc<ToolRegistry>,
opensearch: OpenSearch,
}
impl Responder {
@@ -45,11 +51,13 @@ impl Responder {
config: Arc<Config>,
personality: Arc<Personality>,
tools: Arc<ToolRegistry>,
opensearch: OpenSearch,
) -> Self {
Self {
config,
personality,
tools,
opensearch,
}
}
@@ -62,31 +70,52 @@ impl Responder {
members: &[String],
is_spontaneous: bool,
mistral: &Arc<mistralai_client::v1::client::Client>,
room: &Room,
response_ctx: &ResponseContext,
) -> Option<String> {
// Apply response delay
let delay = if is_spontaneous {
rand::thread_rng().gen_range(
self.config.behavior.spontaneous_delay_min_ms
..=self.config.behavior.spontaneous_delay_max_ms,
)
} else {
rand::thread_rng().gen_range(
self.config.behavior.response_delay_min_ms
..=self.config.behavior.response_delay_max_ms,
)
};
sleep(Duration::from_millis(delay)).await;
// Apply response delay (skip if instant_responses is enabled)
// Delay happens BEFORE typing indicator — Sol "notices" the message first
if !self.config.behavior.instant_responses {
let delay = if is_spontaneous {
rand::thread_rng().gen_range(
self.config.behavior.spontaneous_delay_min_ms
..=self.config.behavior.spontaneous_delay_max_ms,
)
} else {
rand::thread_rng().gen_range(
self.config.behavior.response_delay_min_ms
..=self.config.behavior.response_delay_max_ms,
)
};
debug!(delay_ms = delay, is_spontaneous, "Applying response delay");
sleep(Duration::from_millis(delay)).await;
}
let system_prompt = self.personality.build_system_prompt(room_name, members);
// Start typing AFTER the delay — Sol has decided to respond
let _ = room.typing_notice(true).await;
// Pre-response memory query
let memory_notes = self
.load_memory_notes(response_ctx, trigger_body)
.await;
let system_prompt = self.personality.build_system_prompt(
room_name,
members,
memory_notes.as_deref(),
);
let mut messages = vec![ChatMessage::new_system_message(&system_prompt)];
// Add context messages
// Add context messages with timestamps so the model has time awareness
for msg in context {
let ts = chrono::DateTime::from_timestamp_millis(msg.timestamp)
.map(|d| d.format("%H:%M").to_string())
.unwrap_or_default();
if msg.sender == self.config.matrix.user_id {
messages.push(ChatMessage::new_assistant_message(&msg.content, None));
} else {
let user_msg = format!("{}: {}", msg.sender, msg.content);
let user_msg = format!("[{}] {}: {}", ts, msg.sender, msg.content);
messages.push(ChatMessage::new_user_message(&user_msg));
}
}
@@ -117,6 +146,7 @@ impl Responder {
let response = match chat_blocking(mistral, model.clone(), messages.clone(), params).await {
Ok(r) => r,
Err(e) => {
let _ = room.typing_notice(false).await;
error!("Mistral chat failed: {e}");
return None;
}
@@ -137,12 +167,13 @@ impl Responder {
info!(
tool = tc.function.name.as_str(),
id = call_id,
args = tc.function.arguments.as_str(),
"Executing tool call"
);
let result = self
.tools
.execute(&tc.function.name, &tc.function.arguments)
.execute(&tc.function.name, &tc.function.arguments, response_ctx)
.await;
let result_str = match result {
@@ -165,15 +196,155 @@ impl Responder {
}
}
// Final text response
let text = choice.message.content.trim().to_string();
// Final text response — strip own name prefix if present
let mut text = choice.message.content.trim().to_string();
// Strip "sol:" or "sol 💕:" or similar prefixes the model sometimes adds
let lower = text.to_lowercase();
for prefix in &["sol:", "sol 💕:", "sol💕:"] {
if lower.starts_with(prefix) {
text = text[prefix.len()..].trim().to_string();
break;
}
}
if text.is_empty() {
info!("Generated empty response, skipping send");
let _ = room.typing_notice(false).await;
return None;
}
let preview: String = text.chars().take(120).collect();
let _ = room.typing_notice(false).await;
info!(
response_len = text.len(),
response_preview = preview.as_str(),
is_spontaneous,
tool_iterations = iteration,
"Generated response"
);
return Some(text);
}
let _ = room.typing_notice(false).await;
warn!("Exceeded max tool iterations");
None
}
async fn load_memory_notes(
&self,
ctx: &ResponseContext,
trigger_body: &str,
) -> Option<String> {
let index = &self.config.opensearch.memory_index;
let user_id = &ctx.user_id;
// Search for topically relevant memories
let mut memories = memory::store::query(
&self.opensearch,
index,
user_id,
trigger_body,
5,
)
.await
.unwrap_or_default();
// Backfill with recent memories if we have fewer than 3
if memories.len() < 3 {
let remaining = 5 - memories.len();
if let Ok(recent) = memory::store::get_recent(
&self.opensearch,
index,
user_id,
remaining,
)
.await
{
let existing_ids: std::collections::HashSet<String> =
memories.iter().map(|m| m.id.clone()).collect();
for doc in recent {
if !existing_ids.contains(&doc.id) && memories.len() < 5 {
memories.push(doc);
}
}
}
}
if memories.is_empty() {
return None;
}
let display = ctx
.display_name
.as_deref()
.unwrap_or(&ctx.matrix_user_id);
Some(format_memory_notes(display, &memories))
}
}
/// Format memory documents into a notes block for the system prompt.
pub(crate) fn format_memory_notes(
display_name: &str,
memories: &[memory::schema::MemoryDocument],
) -> String {
let mut lines = vec![format!(
"## notes about {display_name}\n\n\
these are your private notes about the person you're talking to.\n\
use them to inform your responses but don't mention that you have notes.\n"
)];
for mem in memories {
lines.push(format!("- [{}] {}", mem.category, mem.content));
}
lines.join("\n")
}
#[cfg(test)]
mod tests {
use super::*;
use crate::memory::schema::MemoryDocument;
fn make_mem(id: &str, content: &str, category: &str) -> MemoryDocument {
MemoryDocument {
id: id.into(),
user_id: "sienna@sunbeam.pt".into(),
content: content.into(),
category: category.into(),
created_at: 1710000000000,
updated_at: 1710000000000,
source: "auto".into(),
}
}
#[test]
fn test_format_memory_notes_basic() {
let memories = vec![
make_mem("a", "prefers terse answers", "preference"),
make_mem("b", "working on drive UI", "fact"),
];
let result = format_memory_notes("sienna", &memories);
assert!(result.contains("## notes about sienna"));
assert!(result.contains("don't mention that you have notes"));
assert!(result.contains("- [preference] prefers terse answers"));
assert!(result.contains("- [fact] working on drive UI"));
}
#[test]
fn test_format_memory_notes_single() {
let memories = vec![make_mem("x", "birthday is march 12", "context")];
let result = format_memory_notes("lonni", &memories);
assert!(result.contains("## notes about lonni"));
assert!(result.contains("- [context] birthday is march 12"));
}
#[test]
fn test_format_memory_notes_uses_display_name() {
let memories = vec![make_mem("a", "test", "general")];
let result = format_memory_notes("Amber", &memories);
assert!(result.contains("## notes about Amber"));
}
}