add self-hosted web search via SearXNG

new search_web tool calls SearXNG (cluster-internal, free, no tracking)
instead of Mistral's built-in web_search ($0.03/query + rate limits).

returns structured results from DuckDuckGo, Wikipedia, StackOverflow,
GitHub, arXiv, and Brave. no API keys, no cost, no rate limits.

removed Mistral AgentTool::web_search() from orchestrator — replaced
by the custom tool which goes through Sol's normal tool dispatch.
This commit is contained in:
2026-03-23 09:52:56 +00:00
parent 567d4c1171
commit 1ba4e016ba
4 changed files with 195 additions and 5 deletions

View File

@@ -69,11 +69,7 @@ pub fn orchestrator_request(
name: ORCHESTRATOR_NAME.to_string(),
description: Some(ORCHESTRATOR_DESCRIPTION.to_string()),
instructions: Some(instructions),
tools: {
let mut all_tools = tools;
all_tools.push(AgentTool::web_search());
Some(all_tools)
},
tools: if tools.is_empty() { None } else { Some(tools) },
handoffs: None,
completion_args: Some(CompletionArgs {
temperature: Some(0.5),

View File

@@ -154,6 +154,8 @@ pub struct ServicesConfig {
pub gitea: Option<GiteaConfig>,
#[serde(default)]
pub kratos: Option<KratosConfig>,
#[serde(default)]
pub searxng: Option<SearxngConfig>,
}
#[derive(Debug, Clone, Deserialize)]
@@ -166,6 +168,11 @@ pub struct KratosConfig {
pub admin_url: String,
}
#[derive(Debug, Clone, Deserialize)]
pub struct SearxngConfig {
pub url: String,
}
#[derive(Debug, Clone, Deserialize)]
pub struct VaultConfig {
/// OpenBao/Vault URL. Default: http://openbao.data.svc.cluster.local:8200

View File

@@ -3,6 +3,7 @@ pub mod devtools;
pub mod identity;
pub mod research;
pub mod room_history;
pub mod web_search;
pub mod room_info;
pub mod script;
pub mod search;
@@ -197,6 +198,9 @@ impl ToolRegistry {
tools.extend(identity::tool_definitions());
}
// Web search (SearXNG — free, self-hosted)
tools.push(web_search::tool_definition());
// Research tool (depth 0 — orchestrator level)
if let Some(def) = research::tool_definition(4, 0) {
tools.push(def);
@@ -329,6 +333,13 @@ impl ToolRegistry {
anyhow::bail!("Identity (Kratos) integration not configured")
}
}
"search_web" => {
if let Some(ref searxng) = self.config.services.searxng {
web_search::search(&searxng.url, arguments).await
} else {
anyhow::bail!("Web search not configured (missing [services.searxng])")
}
}
"research" => {
if let (Some(ref mistral), Some(ref store)) = (&self.mistral, &self.store) {
anyhow::bail!("research tool requires execute_research() — call with room + event_id context")

176
src/tools/web_search.rs Normal file
View File

@@ -0,0 +1,176 @@
use reqwest::Client as HttpClient;
use serde::{Deserialize, Serialize};
use serde_json::json;
use tracing::{debug, info};
#[derive(Debug, Deserialize)]
struct SearxngResponse {
#[serde(default)]
results: Vec<SearxngResult>,
#[serde(default)]
number_of_results: f64,
}
#[derive(Debug, Deserialize, Serialize)]
struct SearxngResult {
#[serde(default)]
title: String,
#[serde(default)]
url: String,
#[serde(default)]
content: String,
#[serde(default)]
engine: String,
}
#[derive(Debug, Deserialize)]
struct SearchArgs {
query: String,
#[serde(default = "default_limit")]
limit: usize,
}
fn default_limit() -> usize {
5
}
/// Execute a web search via SearXNG.
pub async fn search(
searxng_url: &str,
args_json: &str,
) -> anyhow::Result<String> {
let args: SearchArgs = serde_json::from_str(args_json)?;
let query_encoded = url::form_urlencoded::byte_serialize(args.query.as_bytes())
.collect::<String>();
let url = format!(
"{}/search?q={}&format=json&language=en",
searxng_url.trim_end_matches('/'),
query_encoded,
);
info!(query = args.query.as_str(), limit = args.limit, "Web search via SearXNG");
let client = HttpClient::new();
let resp = client
.get(&url)
.header("Accept", "application/json")
.send()
.await
.map_err(|e| anyhow::anyhow!("SearXNG request failed: {e}"))?;
if !resp.status().is_success() {
let status = resp.status();
let text = resp.text().await.unwrap_or_default();
anyhow::bail!("SearXNG search failed (HTTP {status}): {text}");
}
let data: SearxngResponse = resp
.json()
.await
.map_err(|e| anyhow::anyhow!("Failed to parse SearXNG response: {e}"))?;
if data.results.is_empty() {
return Ok("No web search results found.".into());
}
let limit = args.limit.min(data.results.len());
let results = &data.results[..limit];
debug!(
query = args.query.as_str(),
total = data.number_of_results as u64,
returned = results.len(),
"SearXNG results"
);
// Format results for the LLM
let mut output = format!("Web search results for \"{}\":\n\n", args.query);
for (i, r) in results.iter().enumerate() {
output.push_str(&format!(
"{}. **{}**\n {}\n {}\n\n",
i + 1,
r.title,
r.url,
if r.content.is_empty() {
"(no snippet)".to_string()
} else {
r.content.clone()
},
));
}
Ok(output)
}
pub fn tool_definition() -> mistralai_client::v1::tool::Tool {
mistralai_client::v1::tool::Tool::new(
"search_web".into(),
"Search the web via SearXNG. Returns titles, URLs, and snippets from \
DuckDuckGo, Wikipedia, StackOverflow, GitHub, and other free engines. \
Use for current events, product info, documentation, or anything you're \
not certain about. Free and self-hosted — use liberally."
.into(),
json!({
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Search query"
},
"limit": {
"type": "integer",
"description": "Max results to return (default 5)"
}
},
"required": ["query"]
}),
)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_search_args() {
let args: SearchArgs =
serde_json::from_str(r#"{"query": "mistral vibe"}"#).unwrap();
assert_eq!(args.query, "mistral vibe");
assert_eq!(args.limit, 5);
}
#[test]
fn test_parse_search_args_with_limit() {
let args: SearchArgs =
serde_json::from_str(r#"{"query": "rust async", "limit": 10}"#).unwrap();
assert_eq!(args.limit, 10);
}
#[test]
fn test_searxng_result_deserialize() {
let json = serde_json::json!({
"title": "Mistral AI",
"url": "https://mistral.ai",
"content": "A leading AI company",
"engine": "duckduckgo"
});
let result: SearxngResult = serde_json::from_value(json).unwrap();
assert_eq!(result.title, "Mistral AI");
assert_eq!(result.engine, "duckduckgo");
}
#[test]
fn test_searxng_response_empty() {
let json = serde_json::json!({"results": [], "number_of_results": 0.0});
let resp: SearxngResponse = serde_json::from_value(json).unwrap();
assert!(resp.results.is_empty());
}
#[test]
fn test_tool_definition() {
let def = tool_definition();
assert_eq!(def.function.name, "search_web");
}
}