use opensearch::OpenSearch; use serde::{Deserialize, Serialize}; use tracing::info; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ArchiveDocument { pub event_id: String, pub room_id: String, #[serde(skip_serializing_if = "Option::is_none")] pub room_name: Option, pub sender: String, #[serde(skip_serializing_if = "Option::is_none")] pub sender_name: Option, pub timestamp: i64, pub content: String, #[serde(skip_serializing_if = "Option::is_none")] pub reply_to: Option, #[serde(skip_serializing_if = "Option::is_none")] pub thread_id: Option, #[serde(default)] pub media_urls: Vec, pub event_type: String, #[serde(default)] pub edited: bool, #[serde(default)] pub redacted: bool, #[serde(default, skip_serializing_if = "Vec::is_empty")] pub reactions: Vec, } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Reaction { pub sender: String, pub emoji: String, pub timestamp: i64, } const INDEX_MAPPING: &str = r#"{ "settings": { "number_of_shards": 1, "number_of_replicas": 0 }, "mappings": { "properties": { "event_id": { "type": "keyword" }, "room_id": { "type": "keyword" }, "room_name": { "type": "keyword" }, "sender": { "type": "keyword" }, "sender_name": { "type": "keyword" }, "timestamp": { "type": "date", "format": "epoch_millis" }, "content": { "type": "text", "analyzer": "standard" }, "reply_to": { "type": "keyword" }, "thread_id": { "type": "keyword" }, "media_urls": { "type": "keyword" }, "event_type": { "type": "keyword" }, "edited": { "type": "boolean" }, "redacted": { "type": "boolean" }, "reactions": { "type": "nested", "properties": { "sender": { "type": "keyword" }, "emoji": { "type": "keyword" }, "timestamp": { "type": "date", "format": "epoch_millis" } } } } } }"#; pub fn index_mapping_json() -> &'static str { INDEX_MAPPING } pub async fn create_index_if_not_exists(client: &OpenSearch, index: &str) -> anyhow::Result<()> { let exists = client .indices() .exists(opensearch::indices::IndicesExistsParts::Index(&[index])) .send() .await?; if exists.status_code().is_success() { info!(index, "OpenSearch index already exists"); // Ensure reactions field exists (added after initial schema) let reactions_mapping = serde_json::json!({ "properties": { "reactions": { "type": "nested", "properties": { "sender": { "type": "keyword" }, "emoji": { "type": "keyword" }, "timestamp": { "type": "date", "format": "epoch_millis" } } } } }); let _ = client .indices() .put_mapping(opensearch::indices::IndicesPutMappingParts::Index(&[index])) .body(reactions_mapping) .send() .await; return Ok(()); } let mapping: serde_json::Value = serde_json::from_str(INDEX_MAPPING)?; let response = client .indices() .create(opensearch::indices::IndicesCreateParts::Index(index)) .body(mapping) .send() .await?; if !response.status_code().is_success() { let body = response.text().await?; anyhow::bail!("Failed to create index {index}: {body}"); } info!(index, "Created OpenSearch index"); Ok(()) } #[cfg(test)] mod tests { use super::*; fn sample_doc() -> ArchiveDocument { ArchiveDocument { event_id: "$abc123:sunbeam.pt".to_string(), room_id: "!room:sunbeam.pt".to_string(), room_name: Some("general".to_string()), sender: "@alice:sunbeam.pt".to_string(), sender_name: Some("Alice".to_string()), timestamp: 1710000000000, content: "hello world".to_string(), reply_to: None, thread_id: None, media_urls: vec![], event_type: "m.room.message".to_string(), edited: false, redacted: false, reactions: vec![], } } #[test] fn test_serialize_full_doc() { let doc = sample_doc(); let json = serde_json::to_value(&doc).unwrap(); assert_eq!(json["event_id"], "$abc123:sunbeam.pt"); assert_eq!(json["room_id"], "!room:sunbeam.pt"); assert_eq!(json["room_name"], "general"); assert_eq!(json["sender"], "@alice:sunbeam.pt"); assert_eq!(json["sender_name"], "Alice"); assert_eq!(json["timestamp"], 1710000000000_i64); assert_eq!(json["content"], "hello world"); assert_eq!(json["event_type"], "m.room.message"); assert_eq!(json["edited"], false); assert_eq!(json["redacted"], false); assert!(json["media_urls"].as_array().unwrap().is_empty()); } #[test] fn test_skip_none_fields() { let doc = sample_doc(); let json_str = serde_json::to_string(&doc).unwrap(); // reply_to and thread_id are None, should be omitted assert!(!json_str.contains("reply_to")); assert!(!json_str.contains("thread_id")); } #[test] fn test_serialize_with_optional_fields() { let mut doc = sample_doc(); doc.reply_to = Some("$parent:sunbeam.pt".to_string()); doc.thread_id = Some("$thread:sunbeam.pt".to_string()); doc.media_urls = vec!["mxc://sunbeam.pt/abc".to_string()]; doc.edited = true; let json = serde_json::to_value(&doc).unwrap(); assert_eq!(json["reply_to"], "$parent:sunbeam.pt"); assert_eq!(json["thread_id"], "$thread:sunbeam.pt"); assert_eq!(json["media_urls"][0], "mxc://sunbeam.pt/abc"); assert_eq!(json["edited"], true); } #[test] fn test_deserialize_roundtrip() { let doc = sample_doc(); let json_str = serde_json::to_string(&doc).unwrap(); let deserialized: ArchiveDocument = serde_json::from_str(&json_str).unwrap(); assert_eq!(deserialized.event_id, doc.event_id); assert_eq!(deserialized.room_id, doc.room_id); assert_eq!(deserialized.room_name, doc.room_name); assert_eq!(deserialized.sender, doc.sender); assert_eq!(deserialized.content, doc.content); assert_eq!(deserialized.timestamp, doc.timestamp); assert_eq!(deserialized.edited, doc.edited); assert_eq!(deserialized.redacted, doc.redacted); } #[test] fn test_deserialize_with_defaults() { // Simulate a document missing optional/default fields let json = r#"{ "event_id": "$x:s", "room_id": "!r:s", "sender": "@a:s", "timestamp": 1000, "content": "test", "event_type": "m.room.message" }"#; let doc: ArchiveDocument = serde_json::from_str(json).unwrap(); assert!(doc.room_name.is_none()); assert!(doc.sender_name.is_none()); assert!(doc.reply_to.is_none()); assert!(doc.thread_id.is_none()); assert!(doc.media_urls.is_empty()); assert!(!doc.edited); assert!(!doc.redacted); } #[test] fn test_index_mapping_is_valid_json() { let mapping: serde_json::Value = serde_json::from_str(index_mapping_json()).unwrap(); assert!(mapping["settings"]["number_of_shards"].is_number()); assert!(mapping["mappings"]["properties"]["event_id"]["type"] .as_str() .unwrap() == "keyword"); assert!(mapping["mappings"]["properties"]["content"]["type"] .as_str() .unwrap() == "text"); assert!(mapping["mappings"]["properties"]["timestamp"]["type"] .as_str() .unwrap() == "date"); } }