use opensearch::OpenSearch; use serde::Deserialize; use serde_json::json; use tracing::{debug, info}; #[derive(Debug, Deserialize)] pub struct SearchArgs { pub query: String, #[serde(default)] pub room: Option, #[serde(default)] pub sender: Option, #[serde(default)] pub after: Option, #[serde(default)] pub before: Option, #[serde(default = "default_limit")] pub limit: usize, #[serde(default)] pub semantic: Option, } fn default_limit() -> usize { 10 } /// Build the OpenSearch query body from parsed SearchArgs. Extracted for testability. /// `allowed_room_ids` restricts results to rooms that pass the member overlap check. pub fn build_search_query(args: &SearchArgs, allowed_room_ids: &[String]) -> serde_json::Value { // Handle empty/wildcard queries as match_all let must = if args.query.is_empty() || args.query == "*" { vec![json!({ "match_all": {} })] } else { vec![json!({ "match": { "content": args.query } })] }; let mut filter = vec![json!({ "term": { "redacted": false } })]; // Restrict to rooms that pass the member overlap threshold. // This is a system-level security filter — Sol never sees results from excluded rooms. if !allowed_room_ids.is_empty() { filter.push(json!({ "terms": { "room_id": allowed_room_ids } })); } if let Some(ref room) = args.room { filter.push(json!({ "term": { "room_name": room } })); } if let Some(ref sender) = args.sender { filter.push(json!({ "term": { "sender_name": sender } })); } let mut range = serde_json::Map::new(); if let Some(ref after) = args.after { if let Ok(ts) = after.parse::() { range.insert("gte".into(), json!(ts)); } } if let Some(ref before) = args.before { if let Ok(ts) = before.parse::() { range.insert("lte".into(), json!(ts)); } } if !range.is_empty() { filter.push(json!({ "range": { "timestamp": range } })); } json!({ "size": args.limit, "query": { "bool": { "must": must, "filter": filter } }, "sort": [{ "timestamp": "desc" }], "_source": ["event_id", "room_name", "sender_name", "timestamp", "content"] }) } pub async fn search_archive( client: &OpenSearch, index: &str, args_json: &str, allowed_room_ids: &[String], ) -> anyhow::Result { let args: SearchArgs = serde_json::from_str(args_json)?; let query_body = build_search_query(&args, allowed_room_ids); info!( query = args.query.as_str(), room = args.room.as_deref().unwrap_or("*"), sender = args.sender.as_deref().unwrap_or("*"), after = args.after.as_deref().unwrap_or("*"), before = args.before.as_deref().unwrap_or("*"), limit = args.limit, query_json = %query_body, "Executing search" ); let response = client .search(opensearch::SearchParts::Index(&[index])) .body(query_body) .send() .await?; let body: serde_json::Value = response.json().await?; let hit_count = body["hits"]["total"]["value"].as_i64().unwrap_or(0); info!(hit_count, "Search results"); let hits = &body["hits"]["hits"]; let Some(hits_arr) = hits.as_array() else { return Ok("No results found.".into()); }; if hits_arr.is_empty() { return Ok("No results found.".into()); } let mut output = String::new(); for (i, hit) in hits_arr.iter().enumerate() { let src = &hit["_source"]; let sender = src["sender_name"].as_str().unwrap_or("unknown"); let room = src["room_name"].as_str().unwrap_or("unknown"); let content = src["content"].as_str().unwrap_or(""); let ts = src["timestamp"].as_i64().unwrap_or(0); let dt = chrono::DateTime::from_timestamp_millis(ts) .map(|d| d.format("%Y-%m-%d %H:%M").to_string()) .unwrap_or_else(|| "unknown date".into()); output.push_str(&format!( "{}. [{dt}] #{room} — {sender}: {content}\n", i + 1 )); } Ok(output) } #[cfg(test)] mod tests { use super::*; fn parse_args(json: &str) -> SearchArgs { serde_json::from_str(json).unwrap() } #[test] fn test_parse_minimal_args() { let args = parse_args(r#"{"query": "hello"}"#); assert_eq!(args.query, "hello"); assert!(args.room.is_none()); assert!(args.sender.is_none()); assert!(args.after.is_none()); assert!(args.before.is_none()); assert_eq!(args.limit, 10); // default assert!(args.semantic.is_none()); } #[test] fn test_parse_full_args() { let args = parse_args(r#"{ "query": "meeting notes", "room": "general", "sender": "Alice", "after": "1710000000000", "before": "1710100000000", "limit": 25, "semantic": true }"#); assert_eq!(args.query, "meeting notes"); assert_eq!(args.room.as_deref(), Some("general")); assert_eq!(args.sender.as_deref(), Some("Alice")); assert_eq!(args.after.as_deref(), Some("1710000000000")); assert_eq!(args.before.as_deref(), Some("1710100000000")); assert_eq!(args.limit, 25); assert_eq!(args.semantic, Some(true)); } #[test] fn test_query_basic() { let args = parse_args(r#"{"query": "test"}"#); let q = build_search_query(&args, &[]); assert_eq!(q["size"], 10); assert_eq!(q["query"]["bool"]["must"][0]["match"]["content"], "test"); assert_eq!(q["query"]["bool"]["filter"][0]["term"]["redacted"], false); assert_eq!(q["sort"][0]["timestamp"], "desc"); } #[test] fn test_query_with_room_filter() { let args = parse_args(r#"{"query": "hello", "room": "design"}"#); let q = build_search_query(&args, &[]); let filters = q["query"]["bool"]["filter"].as_array().unwrap(); assert_eq!(filters.len(), 2); assert_eq!(filters[1]["term"]["room_name"], "design"); } #[test] fn test_query_with_sender_filter() { let args = parse_args(r#"{"query": "hello", "sender": "Bob"}"#); let q = build_search_query(&args, &[]); let filters = q["query"]["bool"]["filter"].as_array().unwrap(); assert_eq!(filters.len(), 2); assert_eq!(filters[1]["term"]["sender_name"], "Bob"); } #[test] fn test_query_with_room_and_sender() { let args = parse_args(r#"{"query": "hello", "room": "dev", "sender": "Carol"}"#); let q = build_search_query(&args, &[]); let filters = q["query"]["bool"]["filter"].as_array().unwrap(); assert_eq!(filters.len(), 3); assert_eq!(filters[1]["term"]["room_name"], "dev"); assert_eq!(filters[2]["term"]["sender_name"], "Carol"); } #[test] fn test_query_with_date_range() { let args = parse_args(r#"{ "query": "hello", "after": "1710000000000", "before": "1710100000000" }"#); let q = build_search_query(&args, &[]); let filters = q["query"]["bool"]["filter"].as_array().unwrap(); let range_filter = &filters[1]["range"]["timestamp"]; assert_eq!(range_filter["gte"], 1710000000000_i64); assert_eq!(range_filter["lte"], 1710100000000_i64); } #[test] fn test_query_with_after_only() { let args = parse_args(r#"{"query": "hello", "after": "1710000000000"}"#); let q = build_search_query(&args, &[]); let filters = q["query"]["bool"]["filter"].as_array().unwrap(); let range_filter = &filters[1]["range"]["timestamp"]; assert_eq!(range_filter["gte"], 1710000000000_i64); assert!(range_filter.get("lte").is_none()); } #[test] fn test_query_with_custom_limit() { let args = parse_args(r#"{"query": "hello", "limit": 50}"#); let q = build_search_query(&args, &[]); assert_eq!(q["size"], 50); } #[test] fn test_query_all_filters_combined() { let args = parse_args(r#"{ "query": "architecture", "room": "engineering", "sender": "Sienna", "after": "1000", "before": "2000", "limit": 5 }"#); let q = build_search_query(&args, &[]); assert_eq!(q["size"], 5); let filters = q["query"]["bool"]["filter"].as_array().unwrap(); // redacted=false, room, sender, range = 4 filters assert_eq!(filters.len(), 4); } #[test] fn test_invalid_timestamp_ignored() { let args = parse_args(r#"{"query": "hello", "after": "not-a-number"}"#); let q = build_search_query(&args, &[]); let filters = q["query"]["bool"]["filter"].as_array().unwrap(); // Only the redacted filter, no range since parse failed assert_eq!(filters.len(), 1); } #[test] fn test_wildcard_query_uses_match_all() { let args = parse_args(r#"{"query": "*"}"#); let q = build_search_query(&args, &[]); assert!(q["query"]["bool"]["must"][0]["match_all"].is_object()); } #[test] fn test_empty_query_uses_match_all() { let args = parse_args(r#"{"query": ""}"#); let q = build_search_query(&args, &[]); assert!(q["query"]["bool"]["must"][0]["match_all"].is_object()); } #[test] fn test_room_filter_uses_keyword_field() { // room_name is mapped as "keyword" in OpenSearch — no .keyword subfield let args = parse_args(r#"{"query": "test", "room": "general"}"#); let q = build_search_query(&args, &[]); let filters = q["query"]["bool"]["filter"].as_array().unwrap(); // Should be room_name, NOT room_name.keyword assert_eq!(filters[1]["term"]["room_name"], "general"); } #[test] fn test_source_fields() { let args = parse_args(r#"{"query": "test"}"#); let q = build_search_query(&args, &[]); let source = q["_source"].as_array().unwrap(); let fields: Vec<&str> = source.iter().map(|v| v.as_str().unwrap()).collect(); assert!(fields.contains(&"event_id")); assert!(fields.contains(&"room_name")); assert!(fields.contains(&"sender_name")); assert!(fields.contains(&"timestamp")); assert!(fields.contains(&"content")); } }