Three memory channels: hidden tool (sol.memory.set/get in scripts), pre-response injection (relevant memories loaded into system prompt), and post-response extraction (ministral-3b extracts facts after each response). User isolation enforced at Rust level — user_id derived from Matrix sender, never from script arguments. New modules: context (ResponseContext), memory (schema, store, extractor). ResponseContext threaded through responder → tools → script runtime. OpenSearch index sol_user_memory created on startup alongside archive.
243 lines
8.1 KiB
Rust
243 lines
8.1 KiB
Rust
use opensearch::OpenSearch;
|
|
use serde::{Deserialize, Serialize};
|
|
use tracing::info;
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct ArchiveDocument {
|
|
pub event_id: String,
|
|
pub room_id: String,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub room_name: Option<String>,
|
|
pub sender: String,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub sender_name: Option<String>,
|
|
pub timestamp: i64,
|
|
pub content: String,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub reply_to: Option<String>,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub thread_id: Option<String>,
|
|
#[serde(default)]
|
|
pub media_urls: Vec<String>,
|
|
pub event_type: String,
|
|
#[serde(default)]
|
|
pub edited: bool,
|
|
#[serde(default)]
|
|
pub redacted: bool,
|
|
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
|
pub reactions: Vec<Reaction>,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct Reaction {
|
|
pub sender: String,
|
|
pub emoji: String,
|
|
pub timestamp: i64,
|
|
}
|
|
|
|
const INDEX_MAPPING: &str = r#"{
|
|
"settings": {
|
|
"number_of_shards": 1,
|
|
"number_of_replicas": 0
|
|
},
|
|
"mappings": {
|
|
"properties": {
|
|
"event_id": { "type": "keyword" },
|
|
"room_id": { "type": "keyword" },
|
|
"room_name": { "type": "keyword" },
|
|
"sender": { "type": "keyword" },
|
|
"sender_name": { "type": "keyword" },
|
|
"timestamp": { "type": "date", "format": "epoch_millis" },
|
|
"content": { "type": "text", "analyzer": "standard" },
|
|
"reply_to": { "type": "keyword" },
|
|
"thread_id": { "type": "keyword" },
|
|
"media_urls": { "type": "keyword" },
|
|
"event_type": { "type": "keyword" },
|
|
"edited": { "type": "boolean" },
|
|
"redacted": { "type": "boolean" },
|
|
"reactions": {
|
|
"type": "nested",
|
|
"properties": {
|
|
"sender": { "type": "keyword" },
|
|
"emoji": { "type": "keyword" },
|
|
"timestamp": { "type": "date", "format": "epoch_millis" }
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}"#;
|
|
|
|
pub fn index_mapping_json() -> &'static str {
|
|
INDEX_MAPPING
|
|
}
|
|
|
|
pub async fn create_index_if_not_exists(client: &OpenSearch, index: &str) -> anyhow::Result<()> {
|
|
let exists = client
|
|
.indices()
|
|
.exists(opensearch::indices::IndicesExistsParts::Index(&[index]))
|
|
.send()
|
|
.await?;
|
|
|
|
if exists.status_code().is_success() {
|
|
info!(index, "OpenSearch index already exists");
|
|
// Ensure reactions field exists (added after initial schema)
|
|
let reactions_mapping = serde_json::json!({
|
|
"properties": {
|
|
"reactions": {
|
|
"type": "nested",
|
|
"properties": {
|
|
"sender": { "type": "keyword" },
|
|
"emoji": { "type": "keyword" },
|
|
"timestamp": { "type": "date", "format": "epoch_millis" }
|
|
}
|
|
}
|
|
}
|
|
});
|
|
let _ = client
|
|
.indices()
|
|
.put_mapping(opensearch::indices::IndicesPutMappingParts::Index(&[index]))
|
|
.body(reactions_mapping)
|
|
.send()
|
|
.await;
|
|
return Ok(());
|
|
}
|
|
|
|
let mapping: serde_json::Value = serde_json::from_str(INDEX_MAPPING)?;
|
|
let response = client
|
|
.indices()
|
|
.create(opensearch::indices::IndicesCreateParts::Index(index))
|
|
.body(mapping)
|
|
.send()
|
|
.await?;
|
|
|
|
if !response.status_code().is_success() {
|
|
let body = response.text().await?;
|
|
anyhow::bail!("Failed to create index {index}: {body}");
|
|
}
|
|
|
|
info!(index, "Created OpenSearch index");
|
|
Ok(())
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
fn sample_doc() -> ArchiveDocument {
|
|
ArchiveDocument {
|
|
event_id: "$abc123:sunbeam.pt".to_string(),
|
|
room_id: "!room:sunbeam.pt".to_string(),
|
|
room_name: Some("general".to_string()),
|
|
sender: "@alice:sunbeam.pt".to_string(),
|
|
sender_name: Some("Alice".to_string()),
|
|
timestamp: 1710000000000,
|
|
content: "hello world".to_string(),
|
|
reply_to: None,
|
|
thread_id: None,
|
|
media_urls: vec![],
|
|
event_type: "m.room.message".to_string(),
|
|
edited: false,
|
|
redacted: false,
|
|
reactions: vec![],
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_serialize_full_doc() {
|
|
let doc = sample_doc();
|
|
let json = serde_json::to_value(&doc).unwrap();
|
|
|
|
assert_eq!(json["event_id"], "$abc123:sunbeam.pt");
|
|
assert_eq!(json["room_id"], "!room:sunbeam.pt");
|
|
assert_eq!(json["room_name"], "general");
|
|
assert_eq!(json["sender"], "@alice:sunbeam.pt");
|
|
assert_eq!(json["sender_name"], "Alice");
|
|
assert_eq!(json["timestamp"], 1710000000000_i64);
|
|
assert_eq!(json["content"], "hello world");
|
|
assert_eq!(json["event_type"], "m.room.message");
|
|
assert_eq!(json["edited"], false);
|
|
assert_eq!(json["redacted"], false);
|
|
assert!(json["media_urls"].as_array().unwrap().is_empty());
|
|
}
|
|
|
|
#[test]
|
|
fn test_skip_none_fields() {
|
|
let doc = sample_doc();
|
|
let json_str = serde_json::to_string(&doc).unwrap();
|
|
// reply_to and thread_id are None, should be omitted
|
|
assert!(!json_str.contains("reply_to"));
|
|
assert!(!json_str.contains("thread_id"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_serialize_with_optional_fields() {
|
|
let mut doc = sample_doc();
|
|
doc.reply_to = Some("$parent:sunbeam.pt".to_string());
|
|
doc.thread_id = Some("$thread:sunbeam.pt".to_string());
|
|
doc.media_urls = vec!["mxc://sunbeam.pt/abc".to_string()];
|
|
doc.edited = true;
|
|
|
|
let json = serde_json::to_value(&doc).unwrap();
|
|
assert_eq!(json["reply_to"], "$parent:sunbeam.pt");
|
|
assert_eq!(json["thread_id"], "$thread:sunbeam.pt");
|
|
assert_eq!(json["media_urls"][0], "mxc://sunbeam.pt/abc");
|
|
assert_eq!(json["edited"], true);
|
|
}
|
|
|
|
#[test]
|
|
fn test_deserialize_roundtrip() {
|
|
let doc = sample_doc();
|
|
let json_str = serde_json::to_string(&doc).unwrap();
|
|
let deserialized: ArchiveDocument = serde_json::from_str(&json_str).unwrap();
|
|
|
|
assert_eq!(deserialized.event_id, doc.event_id);
|
|
assert_eq!(deserialized.room_id, doc.room_id);
|
|
assert_eq!(deserialized.room_name, doc.room_name);
|
|
assert_eq!(deserialized.sender, doc.sender);
|
|
assert_eq!(deserialized.content, doc.content);
|
|
assert_eq!(deserialized.timestamp, doc.timestamp);
|
|
assert_eq!(deserialized.edited, doc.edited);
|
|
assert_eq!(deserialized.redacted, doc.redacted);
|
|
}
|
|
|
|
#[test]
|
|
fn test_deserialize_with_defaults() {
|
|
// Simulate a document missing optional/default fields
|
|
let json = r#"{
|
|
"event_id": "$x:s",
|
|
"room_id": "!r:s",
|
|
"sender": "@a:s",
|
|
"timestamp": 1000,
|
|
"content": "test",
|
|
"event_type": "m.room.message"
|
|
}"#;
|
|
let doc: ArchiveDocument = serde_json::from_str(json).unwrap();
|
|
assert!(doc.room_name.is_none());
|
|
assert!(doc.sender_name.is_none());
|
|
assert!(doc.reply_to.is_none());
|
|
assert!(doc.thread_id.is_none());
|
|
assert!(doc.media_urls.is_empty());
|
|
assert!(!doc.edited);
|
|
assert!(!doc.redacted);
|
|
}
|
|
|
|
#[test]
|
|
fn test_index_mapping_is_valid_json() {
|
|
let mapping: serde_json::Value =
|
|
serde_json::from_str(index_mapping_json()).unwrap();
|
|
assert!(mapping["settings"]["number_of_shards"].is_number());
|
|
assert!(mapping["mappings"]["properties"]["event_id"]["type"]
|
|
.as_str()
|
|
.unwrap()
|
|
== "keyword");
|
|
assert!(mapping["mappings"]["properties"]["content"]["type"]
|
|
.as_str()
|
|
.unwrap()
|
|
== "text");
|
|
assert!(mapping["mappings"]["properties"]["timestamp"]["type"]
|
|
.as_str()
|
|
.unwrap()
|
|
== "date");
|
|
}
|
|
}
|