feat: initial Sol virtual librarian implementation
Matrix bot with E2EE (matrix-sdk 0.9) that passively archives all messages to OpenSearch and responds to queries via Mistral AI with function calling tools. Core systems: - Archive: bulk OpenSearch indexer with batch/flush, edit/redaction handling, embedding pipeline passthrough - Brain: rule-based engagement evaluator (mentions, DMs, name invocations), LLM-powered spontaneous engagement, per-room conversation context windows, response delay simulation - Tools: search_archive, get_room_context, list_rooms, get_room_members registered as Mistral function calling tools with iterative tool loop - Personality: templated system prompt with Sol's librarian persona 47 unit tests covering config, evaluator, conversation windowing, personality templates, schema serialization, and search query building.
This commit is contained in:
274
src/tools/search.rs
Normal file
274
src/tools/search.rs
Normal file
@@ -0,0 +1,274 @@
|
||||
use opensearch::OpenSearch;
|
||||
use serde::Deserialize;
|
||||
use serde_json::json;
|
||||
use tracing::debug;
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct SearchArgs {
|
||||
pub query: String,
|
||||
#[serde(default)]
|
||||
pub room: Option<String>,
|
||||
#[serde(default)]
|
||||
pub sender: Option<String>,
|
||||
#[serde(default)]
|
||||
pub after: Option<String>,
|
||||
#[serde(default)]
|
||||
pub before: Option<String>,
|
||||
#[serde(default = "default_limit")]
|
||||
pub limit: usize,
|
||||
#[serde(default)]
|
||||
pub semantic: Option<bool>,
|
||||
}
|
||||
|
||||
fn default_limit() -> usize { 10 }
|
||||
|
||||
/// Build the OpenSearch query body from parsed SearchArgs. Extracted for testability.
|
||||
pub fn build_search_query(args: &SearchArgs) -> serde_json::Value {
|
||||
let must = vec![json!({
|
||||
"match": { "content": args.query }
|
||||
})];
|
||||
|
||||
let mut filter = vec![json!({
|
||||
"term": { "redacted": false }
|
||||
})];
|
||||
|
||||
if let Some(ref room) = args.room {
|
||||
filter.push(json!({ "term": { "room_name": room } }));
|
||||
}
|
||||
if let Some(ref sender) = args.sender {
|
||||
filter.push(json!({ "term": { "sender_name": sender } }));
|
||||
}
|
||||
|
||||
let mut range = serde_json::Map::new();
|
||||
if let Some(ref after) = args.after {
|
||||
if let Ok(ts) = after.parse::<i64>() {
|
||||
range.insert("gte".into(), json!(ts));
|
||||
}
|
||||
}
|
||||
if let Some(ref before) = args.before {
|
||||
if let Ok(ts) = before.parse::<i64>() {
|
||||
range.insert("lte".into(), json!(ts));
|
||||
}
|
||||
}
|
||||
if !range.is_empty() {
|
||||
filter.push(json!({ "range": { "timestamp": range } }));
|
||||
}
|
||||
|
||||
json!({
|
||||
"size": args.limit,
|
||||
"query": {
|
||||
"bool": {
|
||||
"must": must,
|
||||
"filter": filter
|
||||
}
|
||||
},
|
||||
"sort": [{ "timestamp": "desc" }],
|
||||
"_source": ["event_id", "room_name", "sender_name", "timestamp", "content"]
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn search_archive(
|
||||
client: &OpenSearch,
|
||||
index: &str,
|
||||
args_json: &str,
|
||||
) -> anyhow::Result<String> {
|
||||
let args: SearchArgs = serde_json::from_str(args_json)?;
|
||||
debug!(query = args.query.as_str(), "Searching archive");
|
||||
|
||||
let query_body = build_search_query(&args);
|
||||
|
||||
let response = client
|
||||
.search(opensearch::SearchParts::Index(&[index]))
|
||||
.body(query_body)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
let body: serde_json::Value = response.json().await?;
|
||||
let hits = &body["hits"]["hits"];
|
||||
|
||||
let Some(hits_arr) = hits.as_array() else {
|
||||
return Ok("No results found.".into());
|
||||
};
|
||||
|
||||
if hits_arr.is_empty() {
|
||||
return Ok("No results found.".into());
|
||||
}
|
||||
|
||||
let mut output = String::new();
|
||||
for (i, hit) in hits_arr.iter().enumerate() {
|
||||
let src = &hit["_source"];
|
||||
let sender = src["sender_name"].as_str().unwrap_or("unknown");
|
||||
let room = src["room_name"].as_str().unwrap_or("unknown");
|
||||
let content = src["content"].as_str().unwrap_or("");
|
||||
let ts = src["timestamp"].as_i64().unwrap_or(0);
|
||||
|
||||
let dt = chrono::DateTime::from_timestamp_millis(ts)
|
||||
.map(|d| d.format("%Y-%m-%d %H:%M").to_string())
|
||||
.unwrap_or_else(|| "unknown date".into());
|
||||
|
||||
output.push_str(&format!(
|
||||
"{}. [{dt}] #{room} — {sender}: {content}\n",
|
||||
i + 1
|
||||
));
|
||||
}
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn parse_args(json: &str) -> SearchArgs {
|
||||
serde_json::from_str(json).unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_minimal_args() {
|
||||
let args = parse_args(r#"{"query": "hello"}"#);
|
||||
assert_eq!(args.query, "hello");
|
||||
assert!(args.room.is_none());
|
||||
assert!(args.sender.is_none());
|
||||
assert!(args.after.is_none());
|
||||
assert!(args.before.is_none());
|
||||
assert_eq!(args.limit, 10); // default
|
||||
assert!(args.semantic.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_full_args() {
|
||||
let args = parse_args(r#"{
|
||||
"query": "meeting notes",
|
||||
"room": "general",
|
||||
"sender": "Alice",
|
||||
"after": "1710000000000",
|
||||
"before": "1710100000000",
|
||||
"limit": 25,
|
||||
"semantic": true
|
||||
}"#);
|
||||
assert_eq!(args.query, "meeting notes");
|
||||
assert_eq!(args.room.as_deref(), Some("general"));
|
||||
assert_eq!(args.sender.as_deref(), Some("Alice"));
|
||||
assert_eq!(args.after.as_deref(), Some("1710000000000"));
|
||||
assert_eq!(args.before.as_deref(), Some("1710100000000"));
|
||||
assert_eq!(args.limit, 25);
|
||||
assert_eq!(args.semantic, Some(true));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_basic() {
|
||||
let args = parse_args(r#"{"query": "test"}"#);
|
||||
let q = build_search_query(&args);
|
||||
|
||||
assert_eq!(q["size"], 10);
|
||||
assert_eq!(q["query"]["bool"]["must"][0]["match"]["content"], "test");
|
||||
assert_eq!(q["query"]["bool"]["filter"][0]["term"]["redacted"], false);
|
||||
assert_eq!(q["sort"][0]["timestamp"], "desc");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_with_room_filter() {
|
||||
let args = parse_args(r#"{"query": "hello", "room": "design"}"#);
|
||||
let q = build_search_query(&args);
|
||||
|
||||
let filters = q["query"]["bool"]["filter"].as_array().unwrap();
|
||||
assert_eq!(filters.len(), 2);
|
||||
assert_eq!(filters[1]["term"]["room_name"], "design");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_with_sender_filter() {
|
||||
let args = parse_args(r#"{"query": "hello", "sender": "Bob"}"#);
|
||||
let q = build_search_query(&args);
|
||||
|
||||
let filters = q["query"]["bool"]["filter"].as_array().unwrap();
|
||||
assert_eq!(filters.len(), 2);
|
||||
assert_eq!(filters[1]["term"]["sender_name"], "Bob");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_with_room_and_sender() {
|
||||
let args = parse_args(r#"{"query": "hello", "room": "dev", "sender": "Carol"}"#);
|
||||
let q = build_search_query(&args);
|
||||
|
||||
let filters = q["query"]["bool"]["filter"].as_array().unwrap();
|
||||
assert_eq!(filters.len(), 3);
|
||||
assert_eq!(filters[1]["term"]["room_name"], "dev");
|
||||
assert_eq!(filters[2]["term"]["sender_name"], "Carol");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_with_date_range() {
|
||||
let args = parse_args(r#"{
|
||||
"query": "hello",
|
||||
"after": "1710000000000",
|
||||
"before": "1710100000000"
|
||||
}"#);
|
||||
let q = build_search_query(&args);
|
||||
|
||||
let filters = q["query"]["bool"]["filter"].as_array().unwrap();
|
||||
let range_filter = &filters[1]["range"]["timestamp"];
|
||||
assert_eq!(range_filter["gte"], 1710000000000_i64);
|
||||
assert_eq!(range_filter["lte"], 1710100000000_i64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_with_after_only() {
|
||||
let args = parse_args(r#"{"query": "hello", "after": "1710000000000"}"#);
|
||||
let q = build_search_query(&args);
|
||||
|
||||
let filters = q["query"]["bool"]["filter"].as_array().unwrap();
|
||||
let range_filter = &filters[1]["range"]["timestamp"];
|
||||
assert_eq!(range_filter["gte"], 1710000000000_i64);
|
||||
assert!(range_filter.get("lte").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_with_custom_limit() {
|
||||
let args = parse_args(r#"{"query": "hello", "limit": 50}"#);
|
||||
let q = build_search_query(&args);
|
||||
assert_eq!(q["size"], 50);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_all_filters_combined() {
|
||||
let args = parse_args(r#"{
|
||||
"query": "architecture",
|
||||
"room": "engineering",
|
||||
"sender": "Sienna",
|
||||
"after": "1000",
|
||||
"before": "2000",
|
||||
"limit": 5
|
||||
}"#);
|
||||
let q = build_search_query(&args);
|
||||
|
||||
assert_eq!(q["size"], 5);
|
||||
let filters = q["query"]["bool"]["filter"].as_array().unwrap();
|
||||
// redacted=false, room, sender, range = 4 filters
|
||||
assert_eq!(filters.len(), 4);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invalid_timestamp_ignored() {
|
||||
let args = parse_args(r#"{"query": "hello", "after": "not-a-number"}"#);
|
||||
let q = build_search_query(&args);
|
||||
|
||||
let filters = q["query"]["bool"]["filter"].as_array().unwrap();
|
||||
// Only the redacted filter, no range since parse failed
|
||||
assert_eq!(filters.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_source_fields() {
|
||||
let args = parse_args(r#"{"query": "test"}"#);
|
||||
let q = build_search_query(&args);
|
||||
|
||||
let source = q["_source"].as_array().unwrap();
|
||||
let fields: Vec<&str> = source.iter().map(|v| v.as_str().unwrap()).collect();
|
||||
assert!(fields.contains(&"event_id"));
|
||||
assert!(fields.contains(&"room_name"));
|
||||
assert!(fields.contains(&"sender_name"));
|
||||
assert!(fields.contains(&"timestamp"));
|
||||
assert!(fields.contains(&"content"));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user