room overlap access control for cross-room search

search_archive, get_room_context, and sol.search() (in run_script)
enforce a configurable member overlap threshold. results from a
room are only visible if >=25% of that room's members are also in
the requesting room.

system-level filter applied at the opensearch query layer — sol
never sees results from excluded rooms.
This commit is contained in:
2026-03-23 01:42:20 +00:00
parent 7324c10d25
commit 7dbc8a3121
4 changed files with 177 additions and 21 deletions

View File

@@ -23,7 +23,8 @@ pub struct SearchArgs {
fn default_limit() -> usize { 10 }
/// Build the OpenSearch query body from parsed SearchArgs. Extracted for testability.
pub fn build_search_query(args: &SearchArgs) -> serde_json::Value {
/// `allowed_room_ids` restricts results to rooms that pass the member overlap check.
pub fn build_search_query(args: &SearchArgs, allowed_room_ids: &[String]) -> serde_json::Value {
// Handle empty/wildcard queries as match_all
let must = if args.query.is_empty() || args.query == "*" {
vec![json!({ "match_all": {} })]
@@ -37,6 +38,12 @@ pub fn build_search_query(args: &SearchArgs) -> serde_json::Value {
"term": { "redacted": false }
})];
// Restrict to rooms that pass the member overlap threshold.
// This is a system-level security filter — Sol never sees results from excluded rooms.
if !allowed_room_ids.is_empty() {
filter.push(json!({ "terms": { "room_id": allowed_room_ids } }));
}
if let Some(ref room) = args.room {
filter.push(json!({ "term": { "room_name": room } }));
}
@@ -76,9 +83,10 @@ pub async fn search_archive(
client: &OpenSearch,
index: &str,
args_json: &str,
allowed_room_ids: &[String],
) -> anyhow::Result<String> {
let args: SearchArgs = serde_json::from_str(args_json)?;
let query_body = build_search_query(&args);
let query_body = build_search_query(&args, allowed_room_ids);
info!(
query = args.query.as_str(),
@@ -174,7 +182,7 @@ mod tests {
#[test]
fn test_query_basic() {
let args = parse_args(r#"{"query": "test"}"#);
let q = build_search_query(&args);
let q = build_search_query(&args, &[]);
assert_eq!(q["size"], 10);
assert_eq!(q["query"]["bool"]["must"][0]["match"]["content"], "test");
@@ -185,7 +193,7 @@ mod tests {
#[test]
fn test_query_with_room_filter() {
let args = parse_args(r#"{"query": "hello", "room": "design"}"#);
let q = build_search_query(&args);
let q = build_search_query(&args, &[]);
let filters = q["query"]["bool"]["filter"].as_array().unwrap();
assert_eq!(filters.len(), 2);
@@ -195,7 +203,7 @@ mod tests {
#[test]
fn test_query_with_sender_filter() {
let args = parse_args(r#"{"query": "hello", "sender": "Bob"}"#);
let q = build_search_query(&args);
let q = build_search_query(&args, &[]);
let filters = q["query"]["bool"]["filter"].as_array().unwrap();
assert_eq!(filters.len(), 2);
@@ -205,7 +213,7 @@ mod tests {
#[test]
fn test_query_with_room_and_sender() {
let args = parse_args(r#"{"query": "hello", "room": "dev", "sender": "Carol"}"#);
let q = build_search_query(&args);
let q = build_search_query(&args, &[]);
let filters = q["query"]["bool"]["filter"].as_array().unwrap();
assert_eq!(filters.len(), 3);
@@ -220,7 +228,7 @@ mod tests {
"after": "1710000000000",
"before": "1710100000000"
}"#);
let q = build_search_query(&args);
let q = build_search_query(&args, &[]);
let filters = q["query"]["bool"]["filter"].as_array().unwrap();
let range_filter = &filters[1]["range"]["timestamp"];
@@ -231,7 +239,7 @@ mod tests {
#[test]
fn test_query_with_after_only() {
let args = parse_args(r#"{"query": "hello", "after": "1710000000000"}"#);
let q = build_search_query(&args);
let q = build_search_query(&args, &[]);
let filters = q["query"]["bool"]["filter"].as_array().unwrap();
let range_filter = &filters[1]["range"]["timestamp"];
@@ -242,7 +250,7 @@ mod tests {
#[test]
fn test_query_with_custom_limit() {
let args = parse_args(r#"{"query": "hello", "limit": 50}"#);
let q = build_search_query(&args);
let q = build_search_query(&args, &[]);
assert_eq!(q["size"], 50);
}
@@ -256,7 +264,7 @@ mod tests {
"before": "2000",
"limit": 5
}"#);
let q = build_search_query(&args);
let q = build_search_query(&args, &[]);
assert_eq!(q["size"], 5);
let filters = q["query"]["bool"]["filter"].as_array().unwrap();
@@ -267,7 +275,7 @@ mod tests {
#[test]
fn test_invalid_timestamp_ignored() {
let args = parse_args(r#"{"query": "hello", "after": "not-a-number"}"#);
let q = build_search_query(&args);
let q = build_search_query(&args, &[]);
let filters = q["query"]["bool"]["filter"].as_array().unwrap();
// Only the redacted filter, no range since parse failed
@@ -277,14 +285,14 @@ mod tests {
#[test]
fn test_wildcard_query_uses_match_all() {
let args = parse_args(r#"{"query": "*"}"#);
let q = build_search_query(&args);
let q = build_search_query(&args, &[]);
assert!(q["query"]["bool"]["must"][0]["match_all"].is_object());
}
#[test]
fn test_empty_query_uses_match_all() {
let args = parse_args(r#"{"query": ""}"#);
let q = build_search_query(&args);
let q = build_search_query(&args, &[]);
assert!(q["query"]["bool"]["must"][0]["match_all"].is_object());
}
@@ -292,7 +300,7 @@ mod tests {
fn test_room_filter_uses_keyword_field() {
// room_name is mapped as "keyword" in OpenSearch — no .keyword subfield
let args = parse_args(r#"{"query": "test", "room": "general"}"#);
let q = build_search_query(&args);
let q = build_search_query(&args, &[]);
let filters = q["query"]["bool"]["filter"].as_array().unwrap();
// Should be room_name, NOT room_name.keyword
assert_eq!(filters[1]["term"]["room_name"], "general");
@@ -301,7 +309,7 @@ mod tests {
#[test]
fn test_source_fields() {
let args = parse_args(r#"{"query": "test"}"#);
let q = build_search_query(&args);
let q = build_search_query(&args, &[]);
let source = q["_source"].as_array().unwrap();
let fields: Vec<&str> = source.iter().map(|v| v.as_str().unwrap()).collect();