pub mod bridge; pub mod devtools; pub mod identity; pub mod research; pub mod room_history; pub mod web_search; pub mod room_info; pub mod script; pub mod search; use std::collections::HashSet; use std::sync::Arc; use matrix_sdk::Client as MatrixClient; use matrix_sdk::RoomMemberships; use mistralai_client::v1::tool::Tool; use opensearch::OpenSearch; use serde_json::json; use tracing::debug; use crate::config::Config; use crate::context::ResponseContext; use crate::orchestrator::event::ToolContext; use crate::persistence::Store; use crate::sdk::gitea::GiteaClient; use crate::sdk::kratos::KratosClient; pub struct ToolRegistry { opensearch: OpenSearch, matrix: MatrixClient, config: Arc, gitea: Option>, kratos: Option>, mistral: Option>, store: Option>, } impl ToolRegistry { pub fn new( opensearch: OpenSearch, matrix: MatrixClient, config: Arc, gitea: Option>, kratos: Option>, mistral: Option>, store: Option>, ) -> Self { Self { opensearch, matrix, config, gitea, kratos, mistral, store, } } pub fn has_gitea(&self) -> bool { self.gitea.is_some() } pub fn has_kratos(&self) -> bool { self.kratos.is_some() } pub fn tool_definitions(gitea_enabled: bool, kratos_enabled: bool) -> Vec { let mut tools = vec![ Tool::new( "search_archive".into(), "Search the message archive. Use this to find past conversations, \ messages from specific people, or about specific topics." .into(), json!({ "type": "object", "properties": { "query": { "type": "string", "description": "Search query for message content" }, "room": { "type": "string", "description": "Filter by room name (optional)" }, "sender": { "type": "string", "description": "Filter by sender display name (optional)" }, "after": { "type": "string", "description": "Unix timestamp in ms — only messages after this time (optional)" }, "before": { "type": "string", "description": "Unix timestamp in ms — only messages before this time (optional)" }, "limit": { "type": "integer", "description": "Max results to return (default 10)" }, "semantic": { "type": "boolean", "description": "Use semantic search instead of keyword (optional)" } }, "required": ["query"] }), ), Tool::new( "get_room_context".into(), "Get messages around a specific point in time or event in a room. \ Useful for understanding the context of a conversation." .into(), json!({ "type": "object", "properties": { "room_id": { "type": "string", "description": "The Matrix room ID" }, "around_timestamp": { "type": "integer", "description": "Unix timestamp in ms to center the context around" }, "around_event_id": { "type": "string", "description": "Event ID to center the context around" }, "before_count": { "type": "integer", "description": "Number of messages before the pivot (default 10)" }, "after_count": { "type": "integer", "description": "Number of messages after the pivot (default 10)" } }, "required": ["room_id"] }), ), Tool::new( "list_rooms".into(), "List all rooms Sol is currently in, with names and member counts.".into(), json!({ "type": "object", "properties": {} }), ), Tool::new( "get_room_members".into(), "Get the list of members in a specific room.".into(), json!({ "type": "object", "properties": { "room_id": { "type": "string", "description": "The Matrix room ID" } }, "required": ["room_id"] }), ), Tool::new( "run_script".into(), "Execute a TypeScript/JavaScript snippet in a sandboxed runtime. \ Use this for math, date calculations, data transformations, or any \ computation that needs precision. The script has access to:\n\ - sol.search(query, opts?) — search the message archive. opts: \ { room?, sender?, after?, before?, limit?, semantic? }\n\ - sol.rooms() — list joined rooms (returns array of {name, id, members})\n\ - sol.members(roomName) — get room members (returns array of {name, id})\n\ - sol.fetch(url) — HTTP GET (allowlisted domains only)\n\ - sol.memory.get(query?) — retrieve internal notes relevant to the query\n\ - sol.memory.set(content, category?) — save an internal note for later reference\n\ - sol.fs.read(path), sol.fs.write(path, content), sol.fs.list(path?) — \ sandboxed temp filesystem for intermediate files\n\ - console.log() to produce output\n\ All sol.* methods are async — use await. The last expression value is \ also captured. Output is truncated to 4096 chars." .into(), json!({ "type": "object", "properties": { "code": { "type": "string", "description": "TypeScript or JavaScript code to execute" } }, "required": ["code"] }), ), ]; if gitea_enabled { tools.extend(devtools::tool_definitions()); } if kratos_enabled { tools.extend(identity::tool_definitions()); } // Web search (SearXNG — free, self-hosted) tools.push(web_search::tool_definition()); // Research tool (depth 0 — orchestrator level) if let Some(def) = research::tool_definition(4, 0) { tools.push(def); } tools } /// Convert Sol's tool definitions to Mistral AgentTool format /// for use with the Agents API (orchestrator agent creation). pub fn agent_tool_definitions(gitea_enabled: bool, kratos_enabled: bool) -> Vec { Self::tool_definitions(gitea_enabled, kratos_enabled) .into_iter() .map(|t| { mistralai_client::v1::agents::AgentTool::function( t.function.name, t.function.description, t.function.parameters, ) }) .collect() } /// Compute the set of room IDs whose search results are visible from /// the requesting room, based on member overlap. /// /// A room's results are visible if at least ROOM_OVERLAP_THRESHOLD of /// its members are also members of the requesting room. This is enforced /// at the query level — Sol never sees filtered-out results. async fn allowed_room_ids(&self, requesting_room_id: &str) -> Vec { let rooms = self.matrix.joined_rooms(); // Get requesting room's member set let requesting_room = rooms.iter().find(|r| r.room_id().as_str() == requesting_room_id); let requesting_members: HashSet = match requesting_room { Some(room) => match room.members(RoomMemberships::JOIN).await { Ok(members) => members.iter().map(|m| m.user_id().to_string()).collect(), Err(_) => return vec![requesting_room_id.to_string()], }, None => return vec![requesting_room_id.to_string()], }; let mut allowed = Vec::new(); for room in &rooms { let room_id = room.room_id().to_string(); // Always allow the requesting room itself if room_id == requesting_room_id { allowed.push(room_id); continue; } let members: HashSet = match room.members(RoomMemberships::JOIN).await { Ok(m) => m.iter().map(|m| m.user_id().to_string()).collect(), Err(_) => continue, }; if members.is_empty() { continue; } let overlap = members.intersection(&requesting_members).count(); let ratio = overlap as f64 / members.len() as f64; if ratio >= self.config.behavior.room_overlap_threshold as f64 { debug!( source_room = room_id.as_str(), overlap_pct = format!("{:.0}%", ratio * 100.0).as_str(), "Room passes overlap threshold" ); allowed.push(room_id); } } allowed } /// Execute a tool with transport-agnostic context (used by orchestrator). pub async fn execute_with_context( &self, name: &str, arguments: &str, ctx: &ToolContext, ) -> anyhow::Result { // Delegate to the existing execute with a shim ResponseContext let response_ctx = ResponseContext { matrix_user_id: String::new(), user_id: ctx.user_id.clone(), display_name: None, is_dm: ctx.is_direct, is_reply: false, room_id: ctx.scope_key.clone(), }; self.execute(name, arguments, &response_ctx).await } pub async fn execute( &self, name: &str, arguments: &str, response_ctx: &ResponseContext, ) -> anyhow::Result { match name { "search_archive" => { let allowed = self.allowed_room_ids(&response_ctx.room_id).await; search::search_archive( &self.opensearch, &self.config.opensearch.index, arguments, &allowed, ) .await } "get_room_context" => { let allowed = self.allowed_room_ids(&response_ctx.room_id).await; room_history::get_room_context( &self.opensearch, &self.config.opensearch.index, arguments, &allowed, ) .await } "list_rooms" => room_info::list_rooms(&self.matrix).await, "get_room_members" => room_info::get_room_members(&self.matrix, arguments).await, "run_script" => { let allowed = self.allowed_room_ids(&response_ctx.room_id).await; script::run_script( &self.opensearch, &self.matrix, &self.config, arguments, response_ctx, allowed, ) .await } name if name.starts_with("gitea_") => { if let Some(ref gitea) = self.gitea { devtools::execute(gitea, name, arguments, response_ctx).await } else { anyhow::bail!("Gitea integration not configured") } } name if name.starts_with("identity_") => { if let Some(ref kratos) = self.kratos { identity::execute(kratos, name, arguments).await } else { anyhow::bail!("Identity (Kratos) integration not configured") } } "search_web" => { if let Some(ref searxng) = self.config.services.searxng { web_search::search(&searxng.url, arguments).await } else { anyhow::bail!("Web search not configured (missing [services.searxng])") } } "research" => { if let (Some(ref mistral), Some(ref store)) = (&self.mistral, &self.store) { anyhow::bail!("research tool requires execute_research() — call with room + event_id context") } else { anyhow::bail!("Research not configured (missing mistral client or store)") } } _ => anyhow::bail!("Unknown tool: {name}"), } } /// Execute a research tool call with full context (room, event_id for threads). pub async fn execute_research( self: &Arc, arguments: &str, response_ctx: &ResponseContext, room: &matrix_sdk::room::Room, event_id: &ruma::OwnedEventId, current_depth: usize, ) -> anyhow::Result { let mistral = self .mistral .as_ref() .ok_or_else(|| anyhow::anyhow!("Research not configured: missing Mistral client"))?; let store = self .store .as_ref() .ok_or_else(|| anyhow::anyhow!("Research not configured: missing store"))?; research::execute( arguments, &self.config, mistral, self, response_ctx, room, event_id, store, current_depth, ) .await } }