add self-hosted web search via SearXNG
new search_web tool calls SearXNG (cluster-internal, free, no tracking) instead of Mistral's built-in web_search ($0.03/query + rate limits). returns structured results from DuckDuckGo, Wikipedia, StackOverflow, GitHub, arXiv, and Brave. no API keys, no cost, no rate limits. removed Mistral AgentTool::web_search() from orchestrator — replaced by the custom tool which goes through Sol's normal tool dispatch.
This commit is contained in:
@@ -69,11 +69,7 @@ pub fn orchestrator_request(
|
||||
name: ORCHESTRATOR_NAME.to_string(),
|
||||
description: Some(ORCHESTRATOR_DESCRIPTION.to_string()),
|
||||
instructions: Some(instructions),
|
||||
tools: {
|
||||
let mut all_tools = tools;
|
||||
all_tools.push(AgentTool::web_search());
|
||||
Some(all_tools)
|
||||
},
|
||||
tools: if tools.is_empty() { None } else { Some(tools) },
|
||||
handoffs: None,
|
||||
completion_args: Some(CompletionArgs {
|
||||
temperature: Some(0.5),
|
||||
|
||||
@@ -154,6 +154,8 @@ pub struct ServicesConfig {
|
||||
pub gitea: Option<GiteaConfig>,
|
||||
#[serde(default)]
|
||||
pub kratos: Option<KratosConfig>,
|
||||
#[serde(default)]
|
||||
pub searxng: Option<SearxngConfig>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
@@ -166,6 +168,11 @@ pub struct KratosConfig {
|
||||
pub admin_url: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct SearxngConfig {
|
||||
pub url: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct VaultConfig {
|
||||
/// OpenBao/Vault URL. Default: http://openbao.data.svc.cluster.local:8200
|
||||
|
||||
@@ -3,6 +3,7 @@ pub mod devtools;
|
||||
pub mod identity;
|
||||
pub mod research;
|
||||
pub mod room_history;
|
||||
pub mod web_search;
|
||||
pub mod room_info;
|
||||
pub mod script;
|
||||
pub mod search;
|
||||
@@ -197,6 +198,9 @@ impl ToolRegistry {
|
||||
tools.extend(identity::tool_definitions());
|
||||
}
|
||||
|
||||
// Web search (SearXNG — free, self-hosted)
|
||||
tools.push(web_search::tool_definition());
|
||||
|
||||
// Research tool (depth 0 — orchestrator level)
|
||||
if let Some(def) = research::tool_definition(4, 0) {
|
||||
tools.push(def);
|
||||
@@ -329,6 +333,13 @@ impl ToolRegistry {
|
||||
anyhow::bail!("Identity (Kratos) integration not configured")
|
||||
}
|
||||
}
|
||||
"search_web" => {
|
||||
if let Some(ref searxng) = self.config.services.searxng {
|
||||
web_search::search(&searxng.url, arguments).await
|
||||
} else {
|
||||
anyhow::bail!("Web search not configured (missing [services.searxng])")
|
||||
}
|
||||
}
|
||||
"research" => {
|
||||
if let (Some(ref mistral), Some(ref store)) = (&self.mistral, &self.store) {
|
||||
anyhow::bail!("research tool requires execute_research() — call with room + event_id context")
|
||||
|
||||
176
src/tools/web_search.rs
Normal file
176
src/tools/web_search.rs
Normal file
@@ -0,0 +1,176 @@
|
||||
use reqwest::Client as HttpClient;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::json;
|
||||
use tracing::{debug, info};
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct SearxngResponse {
|
||||
#[serde(default)]
|
||||
results: Vec<SearxngResult>,
|
||||
#[serde(default)]
|
||||
number_of_results: f64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
struct SearxngResult {
|
||||
#[serde(default)]
|
||||
title: String,
|
||||
#[serde(default)]
|
||||
url: String,
|
||||
#[serde(default)]
|
||||
content: String,
|
||||
#[serde(default)]
|
||||
engine: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct SearchArgs {
|
||||
query: String,
|
||||
#[serde(default = "default_limit")]
|
||||
limit: usize,
|
||||
}
|
||||
|
||||
fn default_limit() -> usize {
|
||||
5
|
||||
}
|
||||
|
||||
/// Execute a web search via SearXNG.
|
||||
pub async fn search(
|
||||
searxng_url: &str,
|
||||
args_json: &str,
|
||||
) -> anyhow::Result<String> {
|
||||
let args: SearchArgs = serde_json::from_str(args_json)?;
|
||||
|
||||
let query_encoded = url::form_urlencoded::byte_serialize(args.query.as_bytes())
|
||||
.collect::<String>();
|
||||
|
||||
let url = format!(
|
||||
"{}/search?q={}&format=json&language=en",
|
||||
searxng_url.trim_end_matches('/'),
|
||||
query_encoded,
|
||||
);
|
||||
|
||||
info!(query = args.query.as_str(), limit = args.limit, "Web search via SearXNG");
|
||||
|
||||
let client = HttpClient::new();
|
||||
let resp = client
|
||||
.get(&url)
|
||||
.header("Accept", "application/json")
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| anyhow::anyhow!("SearXNG request failed: {e}"))?;
|
||||
|
||||
if !resp.status().is_success() {
|
||||
let status = resp.status();
|
||||
let text = resp.text().await.unwrap_or_default();
|
||||
anyhow::bail!("SearXNG search failed (HTTP {status}): {text}");
|
||||
}
|
||||
|
||||
let data: SearxngResponse = resp
|
||||
.json()
|
||||
.await
|
||||
.map_err(|e| anyhow::anyhow!("Failed to parse SearXNG response: {e}"))?;
|
||||
|
||||
if data.results.is_empty() {
|
||||
return Ok("No web search results found.".into());
|
||||
}
|
||||
|
||||
let limit = args.limit.min(data.results.len());
|
||||
let results = &data.results[..limit];
|
||||
|
||||
debug!(
|
||||
query = args.query.as_str(),
|
||||
total = data.number_of_results as u64,
|
||||
returned = results.len(),
|
||||
"SearXNG results"
|
||||
);
|
||||
|
||||
// Format results for the LLM
|
||||
let mut output = format!("Web search results for \"{}\":\n\n", args.query);
|
||||
for (i, r) in results.iter().enumerate() {
|
||||
output.push_str(&format!(
|
||||
"{}. **{}**\n {}\n {}\n\n",
|
||||
i + 1,
|
||||
r.title,
|
||||
r.url,
|
||||
if r.content.is_empty() {
|
||||
"(no snippet)".to_string()
|
||||
} else {
|
||||
r.content.clone()
|
||||
},
|
||||
));
|
||||
}
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
pub fn tool_definition() -> mistralai_client::v1::tool::Tool {
|
||||
mistralai_client::v1::tool::Tool::new(
|
||||
"search_web".into(),
|
||||
"Search the web via SearXNG. Returns titles, URLs, and snippets from \
|
||||
DuckDuckGo, Wikipedia, StackOverflow, GitHub, and other free engines. \
|
||||
Use for current events, product info, documentation, or anything you're \
|
||||
not certain about. Free and self-hosted — use liberally."
|
||||
.into(),
|
||||
json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "Search query"
|
||||
},
|
||||
"limit": {
|
||||
"type": "integer",
|
||||
"description": "Max results to return (default 5)"
|
||||
}
|
||||
},
|
||||
"required": ["query"]
|
||||
}),
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parse_search_args() {
|
||||
let args: SearchArgs =
|
||||
serde_json::from_str(r#"{"query": "mistral vibe"}"#).unwrap();
|
||||
assert_eq!(args.query, "mistral vibe");
|
||||
assert_eq!(args.limit, 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_search_args_with_limit() {
|
||||
let args: SearchArgs =
|
||||
serde_json::from_str(r#"{"query": "rust async", "limit": 10}"#).unwrap();
|
||||
assert_eq!(args.limit, 10);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_searxng_result_deserialize() {
|
||||
let json = serde_json::json!({
|
||||
"title": "Mistral AI",
|
||||
"url": "https://mistral.ai",
|
||||
"content": "A leading AI company",
|
||||
"engine": "duckduckgo"
|
||||
});
|
||||
let result: SearxngResult = serde_json::from_value(json).unwrap();
|
||||
assert_eq!(result.title, "Mistral AI");
|
||||
assert_eq!(result.engine, "duckduckgo");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_searxng_response_empty() {
|
||||
let json = serde_json::json!({"results": [], "number_of_results": 0.0});
|
||||
let resp: SearxngResponse = serde_json::from_value(json).unwrap();
|
||||
assert!(resp.results.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tool_definition() {
|
||||
let def = tool_definition();
|
||||
assert_eq!(def.function.name, "search_web");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user