diff --git a/src/agents/definitions.rs b/src/agents/definitions.rs index 91cde42..7a1af52 100644 --- a/src/agents/definitions.rs +++ b/src/agents/definitions.rs @@ -69,11 +69,7 @@ pub fn orchestrator_request( name: ORCHESTRATOR_NAME.to_string(), description: Some(ORCHESTRATOR_DESCRIPTION.to_string()), instructions: Some(instructions), - tools: { - let mut all_tools = tools; - all_tools.push(AgentTool::web_search()); - Some(all_tools) - }, + tools: if tools.is_empty() { None } else { Some(tools) }, handoffs: None, completion_args: Some(CompletionArgs { temperature: Some(0.5), diff --git a/src/config.rs b/src/config.rs index 956846e..f4921bb 100644 --- a/src/config.rs +++ b/src/config.rs @@ -154,6 +154,8 @@ pub struct ServicesConfig { pub gitea: Option, #[serde(default)] pub kratos: Option, + #[serde(default)] + pub searxng: Option, } #[derive(Debug, Clone, Deserialize)] @@ -166,6 +168,11 @@ pub struct KratosConfig { pub admin_url: String, } +#[derive(Debug, Clone, Deserialize)] +pub struct SearxngConfig { + pub url: String, +} + #[derive(Debug, Clone, Deserialize)] pub struct VaultConfig { /// OpenBao/Vault URL. Default: http://openbao.data.svc.cluster.local:8200 diff --git a/src/tools/mod.rs b/src/tools/mod.rs index d36a66c..cc4e608 100644 --- a/src/tools/mod.rs +++ b/src/tools/mod.rs @@ -3,6 +3,7 @@ pub mod devtools; pub mod identity; pub mod research; pub mod room_history; +pub mod web_search; pub mod room_info; pub mod script; pub mod search; @@ -197,6 +198,9 @@ impl ToolRegistry { tools.extend(identity::tool_definitions()); } + // Web search (SearXNG — free, self-hosted) + tools.push(web_search::tool_definition()); + // Research tool (depth 0 — orchestrator level) if let Some(def) = research::tool_definition(4, 0) { tools.push(def); @@ -329,6 +333,13 @@ impl ToolRegistry { anyhow::bail!("Identity (Kratos) integration not configured") } } + "search_web" => { + if let Some(ref searxng) = self.config.services.searxng { + web_search::search(&searxng.url, arguments).await + } else { + anyhow::bail!("Web search not configured (missing [services.searxng])") + } + } "research" => { if let (Some(ref mistral), Some(ref store)) = (&self.mistral, &self.store) { anyhow::bail!("research tool requires execute_research() — call with room + event_id context") diff --git a/src/tools/web_search.rs b/src/tools/web_search.rs new file mode 100644 index 0000000..478e2a4 --- /dev/null +++ b/src/tools/web_search.rs @@ -0,0 +1,176 @@ +use reqwest::Client as HttpClient; +use serde::{Deserialize, Serialize}; +use serde_json::json; +use tracing::{debug, info}; + +#[derive(Debug, Deserialize)] +struct SearxngResponse { + #[serde(default)] + results: Vec, + #[serde(default)] + number_of_results: f64, +} + +#[derive(Debug, Deserialize, Serialize)] +struct SearxngResult { + #[serde(default)] + title: String, + #[serde(default)] + url: String, + #[serde(default)] + content: String, + #[serde(default)] + engine: String, +} + +#[derive(Debug, Deserialize)] +struct SearchArgs { + query: String, + #[serde(default = "default_limit")] + limit: usize, +} + +fn default_limit() -> usize { + 5 +} + +/// Execute a web search via SearXNG. +pub async fn search( + searxng_url: &str, + args_json: &str, +) -> anyhow::Result { + let args: SearchArgs = serde_json::from_str(args_json)?; + + let query_encoded = url::form_urlencoded::byte_serialize(args.query.as_bytes()) + .collect::(); + + let url = format!( + "{}/search?q={}&format=json&language=en", + searxng_url.trim_end_matches('/'), + query_encoded, + ); + + info!(query = args.query.as_str(), limit = args.limit, "Web search via SearXNG"); + + let client = HttpClient::new(); + let resp = client + .get(&url) + .header("Accept", "application/json") + .send() + .await + .map_err(|e| anyhow::anyhow!("SearXNG request failed: {e}"))?; + + if !resp.status().is_success() { + let status = resp.status(); + let text = resp.text().await.unwrap_or_default(); + anyhow::bail!("SearXNG search failed (HTTP {status}): {text}"); + } + + let data: SearxngResponse = resp + .json() + .await + .map_err(|e| anyhow::anyhow!("Failed to parse SearXNG response: {e}"))?; + + if data.results.is_empty() { + return Ok("No web search results found.".into()); + } + + let limit = args.limit.min(data.results.len()); + let results = &data.results[..limit]; + + debug!( + query = args.query.as_str(), + total = data.number_of_results as u64, + returned = results.len(), + "SearXNG results" + ); + + // Format results for the LLM + let mut output = format!("Web search results for \"{}\":\n\n", args.query); + for (i, r) in results.iter().enumerate() { + output.push_str(&format!( + "{}. **{}**\n {}\n {}\n\n", + i + 1, + r.title, + r.url, + if r.content.is_empty() { + "(no snippet)".to_string() + } else { + r.content.clone() + }, + )); + } + + Ok(output) +} + +pub fn tool_definition() -> mistralai_client::v1::tool::Tool { + mistralai_client::v1::tool::Tool::new( + "search_web".into(), + "Search the web via SearXNG. Returns titles, URLs, and snippets from \ + DuckDuckGo, Wikipedia, StackOverflow, GitHub, and other free engines. \ + Use for current events, product info, documentation, or anything you're \ + not certain about. Free and self-hosted — use liberally." + .into(), + json!({ + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "Search query" + }, + "limit": { + "type": "integer", + "description": "Max results to return (default 5)" + } + }, + "required": ["query"] + }), + ) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_search_args() { + let args: SearchArgs = + serde_json::from_str(r#"{"query": "mistral vibe"}"#).unwrap(); + assert_eq!(args.query, "mistral vibe"); + assert_eq!(args.limit, 5); + } + + #[test] + fn test_parse_search_args_with_limit() { + let args: SearchArgs = + serde_json::from_str(r#"{"query": "rust async", "limit": 10}"#).unwrap(); + assert_eq!(args.limit, 10); + } + + #[test] + fn test_searxng_result_deserialize() { + let json = serde_json::json!({ + "title": "Mistral AI", + "url": "https://mistral.ai", + "content": "A leading AI company", + "engine": "duckduckgo" + }); + let result: SearxngResult = serde_json::from_value(json).unwrap(); + assert_eq!(result.title, "Mistral AI"); + assert_eq!(result.engine, "duckduckgo"); + } + + #[test] + fn test_searxng_response_empty() { + let json = serde_json::json!({"results": [], "number_of_results": 0.0}); + let resp: SearxngResponse = serde_json::from_value(json).unwrap(); + assert!(resp.results.is_empty()); + } + + #[test] + fn test_tool_definition() { + let def = tool_definition(); + assert_eq!(def.function.name, "search_web"); + } +}