feat: code search tool + breadcrumb context injection + integration tests
search_code tool: - Server-side tool querying sol_code OpenSearch index - BM25 search across symbol_name, signature, docstring, content - Branch-aware with boost for current branch, mainline fallback - Registered in ToolRegistry execute dispatch Breadcrumb injection: - build_context_header() now async, injects adaptive breadcrumbs - Hybrid search: _analyze → wildcard symbol matching → BM25 - Token budget enforcement (default outline + relevant expansion) - Graceful degradation when OpenSearch unavailable GrpcState: - Added Option<OpenSearch> for breadcrumb retrieval - code_index_name() accessor Integration tests (6 new, 226 total): - Index + search: bulk index symbols, verify BM25 retrieval - Breadcrumb outline: aggregation query returns project structure - Breadcrumb expansion: substantive query triggers relevant symbols - Token budget: respects character limit - Branch scoping: feat/code symbols preferred over mainline - Branch deletion: cleanup removes branch symbols, mainline survives
This commit is contained in:
@@ -190,17 +190,15 @@ async fn hybrid_symbol_search(
|
|||||||
|
|
||||||
// Step 2: Build hybrid query
|
// Step 2: Build hybrid query
|
||||||
let mut should_clauses = vec![
|
let mut should_clauses = vec![
|
||||||
serde_json::json!({ "match": { "content": user_message } }),
|
serde_json::json!({ "match": { "content": { "query": user_message, "boost": 1.0 } } }),
|
||||||
serde_json::json!({ "match": { "signature": { "query": user_message, "boost": 2.0 } } }),
|
serde_json::json!({ "match": { "signature": { "query": user_message, "boost": 2.0 } } }),
|
||||||
serde_json::json!({ "match": { "docstring": { "query": user_message, "boost": 1.5 } } }),
|
serde_json::json!({ "match": { "docstring": { "query": user_message, "boost": 1.5 } } }),
|
||||||
];
|
];
|
||||||
|
|
||||||
// Add symbol name term matching from analyzed tokens
|
// Add wildcard queries on symbol_name for each analyzed token
|
||||||
if !tokens.is_empty() {
|
for token in &tokens {
|
||||||
// Build wildcard patterns from tokens for symbol name matching
|
|
||||||
let patterns: Vec<String> = tokens.iter().map(|t| format!(".*{t}.*")).collect();
|
|
||||||
should_clauses.push(serde_json::json!({
|
should_clauses.push(serde_json::json!({
|
||||||
"regexp": { "symbol_name": { "value": patterns.join("|"), "boost": 3.0 } }
|
"wildcard": { "symbol_name": { "value": format!("*{token}*"), "boost": 3.0 } }
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -49,13 +49,11 @@ impl CodeIndexer {
|
|||||||
body.push(serde_json::to_value(doc).unwrap_or_default().into());
|
body.push(serde_json::to_value(doc).unwrap_or_default().into());
|
||||||
}
|
}
|
||||||
|
|
||||||
match self
|
let mut req = self.client.bulk(opensearch::BulkParts::None).body(body);
|
||||||
.client
|
if !self.pipeline.is_empty() {
|
||||||
.bulk(opensearch::BulkParts::None)
|
req = req.pipeline(&self.pipeline);
|
||||||
.pipeline(&self.pipeline)
|
}
|
||||||
.body(body)
|
match req.send().await
|
||||||
.send()
|
|
||||||
.await
|
|
||||||
{
|
{
|
||||||
Ok(response) => {
|
Ok(response) => {
|
||||||
let count = self.buffer.len();
|
let count = self.buffer.len();
|
||||||
|
|||||||
@@ -26,11 +26,20 @@ pub struct GrpcState {
|
|||||||
pub store: Arc<Store>,
|
pub store: Arc<Store>,
|
||||||
pub mistral: Arc<mistralai_client::v1::client::Client>,
|
pub mistral: Arc<mistralai_client::v1::client::Client>,
|
||||||
pub matrix: Option<matrix_sdk::Client>,
|
pub matrix: Option<matrix_sdk::Client>,
|
||||||
|
pub opensearch: Option<opensearch::OpenSearch>,
|
||||||
pub system_prompt: String,
|
pub system_prompt: String,
|
||||||
pub orchestrator_agent_id: String,
|
pub orchestrator_agent_id: String,
|
||||||
pub orchestrator: Option<Arc<crate::orchestrator::Orchestrator>>,
|
pub orchestrator: Option<Arc<crate::orchestrator::Orchestrator>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl GrpcState {
|
||||||
|
/// Get the code index name from config, defaulting to "sol_code".
|
||||||
|
pub fn code_index_name(&self) -> String {
|
||||||
|
// TODO: add to config. For now, hardcode.
|
||||||
|
"sol_code".into()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Start the gRPC server. Call from main.rs alongside the Matrix sync loop.
|
/// Start the gRPC server. Call from main.rs alongside the Matrix sync loop.
|
||||||
pub async fn start_server(state: Arc<GrpcState>) -> anyhow::Result<()> {
|
pub async fn start_server(state: Arc<GrpcState>) -> anyhow::Result<()> {
|
||||||
let addr = state
|
let addr = state
|
||||||
|
|||||||
@@ -225,20 +225,47 @@ you also have access to server-side tools: search_archive, search_web, research,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Build the per-message context header for coding mode.
|
/// Build the per-message context header for coding mode.
|
||||||
fn build_context_header(&self) -> String {
|
/// Includes time context, project info, instructions, and adaptive breadcrumbs.
|
||||||
|
async fn build_context_header(&self, user_message: &str) -> String {
|
||||||
let tc = TimeContext::now();
|
let tc = TimeContext::now();
|
||||||
format!(
|
let mut header = format!(
|
||||||
"{}\n[project: {} | path: {} | model: {}]\n{}",
|
"{}\n[project: {} | path: {} | model: {}]",
|
||||||
tc.message_line(),
|
tc.message_line(),
|
||||||
self.project_name,
|
self.project_name,
|
||||||
self.project_path,
|
self.project_path,
|
||||||
self.model,
|
self.model,
|
||||||
if self.prompt_md.is_empty() {
|
);
|
||||||
String::new()
|
|
||||||
} else {
|
if !self.prompt_md.is_empty() {
|
||||||
format!("## project instructions\n{}\n", self.prompt_md)
|
header.push_str(&format!("\n## project instructions\n{}", self.prompt_md));
|
||||||
},
|
}
|
||||||
)
|
|
||||||
|
// Inject adaptive breadcrumbs from the code index (if OpenSearch available)
|
||||||
|
if let Some(ref os) = self.state.opensearch {
|
||||||
|
let breadcrumbs = crate::breadcrumbs::build_breadcrumbs(
|
||||||
|
os,
|
||||||
|
&self.state.code_index_name(),
|
||||||
|
&self.project_name,
|
||||||
|
&self.git_branch(),
|
||||||
|
user_message,
|
||||||
|
4000, // ~1000 tokens budget
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
if !breadcrumbs.formatted.is_empty() {
|
||||||
|
header.push('\n');
|
||||||
|
header.push_str(&breadcrumbs.formatted);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
header.push('\n');
|
||||||
|
header
|
||||||
|
}
|
||||||
|
|
||||||
|
fn git_branch(&self) -> String {
|
||||||
|
// Stored from StartSession.git_branch, fall back to "mainline"
|
||||||
|
// TODO: store git_branch in CodeSession struct
|
||||||
|
"mainline".into()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Send a user message and run the agent loop.
|
/// Send a user message and run the agent loop.
|
||||||
@@ -250,7 +277,7 @@ you also have access to server-side tools: search_archive, search_web, research,
|
|||||||
client_tx: &mpsc::Sender<Result<ServerMessage, tonic::Status>>,
|
client_tx: &mpsc::Sender<Result<ServerMessage, tonic::Status>>,
|
||||||
client_rx: &mut tonic::Streaming<ClientMessage>,
|
client_rx: &mut tonic::Streaming<ClientMessage>,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
let context_header = self.build_context_header();
|
let context_header = self.build_context_header(text).await;
|
||||||
let input_text = format!("{context_header}\n{text}");
|
let input_text = format!("{context_header}\n{text}");
|
||||||
|
|
||||||
// Post user message to Matrix room (as m.notice to distinguish from assistant)
|
// Post user message to Matrix room (as m.notice to distinguish from assistant)
|
||||||
@@ -486,7 +513,7 @@ you also have access to server-side tools: search_archive, search_web, research,
|
|||||||
&mut self,
|
&mut self,
|
||||||
text: &str,
|
text: &str,
|
||||||
) -> anyhow::Result<mistralai_client::v1::conversations::ConversationResponse> {
|
) -> anyhow::Result<mistralai_client::v1::conversations::ConversationResponse> {
|
||||||
let context_header = self.build_context_header();
|
let context_header = self.build_context_header(text).await;
|
||||||
let input_text = format!("{context_header}\n{text}");
|
let input_text = format!("{context_header}\n{text}");
|
||||||
|
|
||||||
if let Some(ref conv_id) = self.conversation_id {
|
if let Some(ref conv_id) = self.conversation_id {
|
||||||
|
|||||||
@@ -606,7 +606,8 @@ mod grpc_tests {
|
|||||||
tools,
|
tools,
|
||||||
store,
|
store,
|
||||||
mistral,
|
mistral,
|
||||||
matrix: None, // not needed for tests
|
matrix: None,
|
||||||
|
opensearch: None, // breadcrumbs disabled in tests
|
||||||
system_prompt: "you are sol. respond briefly. lowercase only.".into(),
|
system_prompt: "you are sol. respond briefly. lowercase only.".into(),
|
||||||
orchestrator_agent_id: String::new(),
|
orchestrator_agent_id: String::new(),
|
||||||
orchestrator: Some(orch),
|
orchestrator: Some(orch),
|
||||||
@@ -921,3 +922,355 @@ mod grpc_tests {
|
|||||||
assert!(got_end, "Server should send SessionEnd on clean disconnect");
|
assert!(got_end, "Server should send SessionEnd on clean disconnect");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ══════════════════════════════════════════════════════════════════════════
|
||||||
|
// Code index + breadcrumb integration tests (requires local OpenSearch)
|
||||||
|
// ══════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
mod code_index_tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::code_index::schema::{self, SymbolDocument};
|
||||||
|
use crate::code_index::indexer::CodeIndexer;
|
||||||
|
use crate::breadcrumbs;
|
||||||
|
|
||||||
|
fn os_client() -> Option<opensearch::OpenSearch> {
|
||||||
|
use opensearch::http::transport::{SingleNodeConnectionPool, TransportBuilder};
|
||||||
|
let url = url::Url::parse("http://localhost:9200").ok()?;
|
||||||
|
let transport = TransportBuilder::new(SingleNodeConnectionPool::new(url))
|
||||||
|
.build()
|
||||||
|
.ok()?;
|
||||||
|
Some(opensearch::OpenSearch::new(transport))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn setup_test_index(client: &opensearch::OpenSearch) -> String {
|
||||||
|
let index = format!("sol_code_test_{}", uuid::Uuid::new_v4().to_string().split('-').next().unwrap());
|
||||||
|
schema::create_index_if_not_exists(client, &index).await.unwrap();
|
||||||
|
index
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn refresh_index(client: &opensearch::OpenSearch, index: &str) {
|
||||||
|
let _ = client
|
||||||
|
.indices()
|
||||||
|
.refresh(opensearch::indices::IndicesRefreshParts::Index(&[index]))
|
||||||
|
.send()
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn cleanup_index(client: &opensearch::OpenSearch, index: &str) {
|
||||||
|
let _ = client
|
||||||
|
.indices()
|
||||||
|
.delete(opensearch::indices::IndicesDeleteParts::Index(&[index]))
|
||||||
|
.send()
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn sample_symbols() -> Vec<SymbolDocument> {
|
||||||
|
let now = chrono::Utc::now().timestamp_millis();
|
||||||
|
vec![
|
||||||
|
SymbolDocument {
|
||||||
|
file_path: "src/orchestrator/mod.rs".into(),
|
||||||
|
repo_owner: Some("studio".into()),
|
||||||
|
repo_name: "sol".into(),
|
||||||
|
language: "rust".into(),
|
||||||
|
symbol_name: "generate".into(),
|
||||||
|
symbol_kind: "function".into(),
|
||||||
|
signature: "pub async fn generate(&self, req: &GenerateRequest) -> Option<String>".into(),
|
||||||
|
docstring: "Generate a response using the ConversationRegistry.".into(),
|
||||||
|
start_line: 80,
|
||||||
|
end_line: 120,
|
||||||
|
content: "pub async fn generate(&self, req: &GenerateRequest) -> Option<String> { ... }".into(),
|
||||||
|
branch: "mainline".into(),
|
||||||
|
source: "local".into(),
|
||||||
|
indexed_at: now,
|
||||||
|
},
|
||||||
|
SymbolDocument {
|
||||||
|
file_path: "src/orchestrator/engine.rs".into(),
|
||||||
|
repo_owner: Some("studio".into()),
|
||||||
|
repo_name: "sol".into(),
|
||||||
|
language: "rust".into(),
|
||||||
|
symbol_name: "run_tool_loop".into(),
|
||||||
|
symbol_kind: "function".into(),
|
||||||
|
signature: "pub async fn run_tool_loop(orch: &Orchestrator, req: &GenerateRequest, resp: ConversationResponse) -> Option<(String, TokenUsage)>".into(),
|
||||||
|
docstring: "Unified Mistral tool loop. Emits events for every state transition.".into(),
|
||||||
|
start_line: 20,
|
||||||
|
end_line: 160,
|
||||||
|
content: "pub async fn run_tool_loop(...) { ... tool iteration ... }".into(),
|
||||||
|
branch: "mainline".into(),
|
||||||
|
source: "local".into(),
|
||||||
|
indexed_at: now,
|
||||||
|
},
|
||||||
|
SymbolDocument {
|
||||||
|
file_path: "src/orchestrator/tool_dispatch.rs".into(),
|
||||||
|
repo_owner: Some("studio".into()),
|
||||||
|
repo_name: "sol".into(),
|
||||||
|
language: "rust".into(),
|
||||||
|
symbol_name: "route".into(),
|
||||||
|
symbol_kind: "function".into(),
|
||||||
|
signature: "pub fn route(tool_name: &str) -> ToolSide".into(),
|
||||||
|
docstring: "Route a tool call to server or client.".into(),
|
||||||
|
start_line: 17,
|
||||||
|
end_line: 23,
|
||||||
|
content: "pub fn route(tool_name: &str) -> ToolSide { if CLIENT_TOOLS.contains ... }".into(),
|
||||||
|
branch: "mainline".into(),
|
||||||
|
source: "local".into(),
|
||||||
|
indexed_at: now,
|
||||||
|
},
|
||||||
|
SymbolDocument {
|
||||||
|
file_path: "src/orchestrator/event.rs".into(),
|
||||||
|
repo_owner: Some("studio".into()),
|
||||||
|
repo_name: "sol".into(),
|
||||||
|
language: "rust".into(),
|
||||||
|
symbol_name: "ToolSide".into(),
|
||||||
|
symbol_kind: "enum".into(),
|
||||||
|
signature: "pub enum ToolSide { Server, Client }".into(),
|
||||||
|
docstring: "Whether a tool executes on the server or on a connected client.".into(),
|
||||||
|
start_line: 68,
|
||||||
|
end_line: 72,
|
||||||
|
content: "pub enum ToolSide { Server, Client }".into(),
|
||||||
|
branch: "mainline".into(),
|
||||||
|
source: "local".into(),
|
||||||
|
indexed_at: now,
|
||||||
|
},
|
||||||
|
SymbolDocument {
|
||||||
|
file_path: "src/orchestrator/event.rs".into(),
|
||||||
|
repo_owner: Some("studio".into()),
|
||||||
|
repo_name: "sol".into(),
|
||||||
|
language: "rust".into(),
|
||||||
|
symbol_name: "OrchestratorEvent".into(),
|
||||||
|
symbol_kind: "enum".into(),
|
||||||
|
signature: "pub enum OrchestratorEvent { Started, Thinking, ToolCallDetected, ToolStarted, ToolCompleted, Done, Failed }".into(),
|
||||||
|
docstring: "An event emitted by the orchestrator during response generation.".into(),
|
||||||
|
start_line: 110,
|
||||||
|
end_line: 170,
|
||||||
|
content: "pub enum OrchestratorEvent { ... }".into(),
|
||||||
|
branch: "mainline".into(),
|
||||||
|
source: "local".into(),
|
||||||
|
indexed_at: now,
|
||||||
|
},
|
||||||
|
// Feature branch symbol — should be preferred when querying feat/code
|
||||||
|
SymbolDocument {
|
||||||
|
file_path: "src/orchestrator/mod.rs".into(),
|
||||||
|
repo_owner: Some("studio".into()),
|
||||||
|
repo_name: "sol".into(),
|
||||||
|
language: "rust".into(),
|
||||||
|
symbol_name: "generate_from_response".into(),
|
||||||
|
symbol_kind: "function".into(),
|
||||||
|
signature: "pub async fn generate_from_response(&self, req: &GenerateRequest, resp: ConversationResponse) -> Option<String>".into(),
|
||||||
|
docstring: "Generate from a pre-built ConversationResponse. Caller manages conversation.".into(),
|
||||||
|
start_line: 125,
|
||||||
|
end_line: 160,
|
||||||
|
content: "pub async fn generate_from_response(...) { ... }".into(),
|
||||||
|
branch: "feat/code".into(),
|
||||||
|
source: "local".into(),
|
||||||
|
indexed_at: now,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_index_and_search_symbols() {
|
||||||
|
let Some(client) = os_client() else {
|
||||||
|
eprintln!("Skipping: OpenSearch not available at localhost:9200");
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
|
||||||
|
let index = setup_test_index(&client).await;
|
||||||
|
let mut indexer = CodeIndexer::new(client.clone(), index.clone(), "".into(), 100);
|
||||||
|
|
||||||
|
for doc in sample_symbols() {
|
||||||
|
indexer.add(doc).await;
|
||||||
|
}
|
||||||
|
indexer.flush().await;
|
||||||
|
|
||||||
|
refresh_index(&client, &index).await;
|
||||||
|
|
||||||
|
// Search for "tool loop" — should find run_tool_loop
|
||||||
|
let results = crate::tools::code_search::search_code(
|
||||||
|
&client, &index,
|
||||||
|
r#"{"query": "tool loop"}"#,
|
||||||
|
Some("sol"), Some("mainline"),
|
||||||
|
).await.unwrap();
|
||||||
|
assert!(results.contains("run_tool_loop"), "Expected run_tool_loop in results, got:\n{results}");
|
||||||
|
|
||||||
|
// Search for "ToolSide" — should find the enum
|
||||||
|
let results = crate::tools::code_search::search_code(
|
||||||
|
&client, &index,
|
||||||
|
r#"{"query": "ToolSide"}"#,
|
||||||
|
Some("sol"), None,
|
||||||
|
).await.unwrap();
|
||||||
|
assert!(results.contains("ToolSide"), "Expected ToolSide in results, got:\n{results}");
|
||||||
|
|
||||||
|
// Search for "generate response" — should find generate()
|
||||||
|
let results = crate::tools::code_search::search_code(
|
||||||
|
&client, &index,
|
||||||
|
r#"{"query": "generate response"}"#,
|
||||||
|
Some("sol"), None,
|
||||||
|
).await.unwrap();
|
||||||
|
assert!(results.contains("generate"), "Expected generate in results, got:\n{results}");
|
||||||
|
|
||||||
|
cleanup_index(&client, &index).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_breadcrumb_project_outline() {
|
||||||
|
let Some(client) = os_client() else {
|
||||||
|
eprintln!("Skipping: OpenSearch not available");
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
|
||||||
|
let index = setup_test_index(&client).await;
|
||||||
|
let mut indexer = CodeIndexer::new(client.clone(), index.clone(), "".into(), 100);
|
||||||
|
for doc in sample_symbols() {
|
||||||
|
indexer.add(doc).await;
|
||||||
|
}
|
||||||
|
indexer.flush().await;
|
||||||
|
refresh_index(&client, &index).await;
|
||||||
|
|
||||||
|
let result = breadcrumbs::build_breadcrumbs(
|
||||||
|
&client, &index, "sol", "mainline", "hi", 4000
|
||||||
|
).await;
|
||||||
|
|
||||||
|
// Default outline should have project name
|
||||||
|
assert!(result.outline.contains("sol"), "Outline should mention project name");
|
||||||
|
// Short message → no adaptive expansion
|
||||||
|
assert!(result.relevant.is_empty(), "Short message should not trigger expansion");
|
||||||
|
|
||||||
|
cleanup_index(&client, &index).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_breadcrumb_adaptive_expansion() {
|
||||||
|
let Some(client) = os_client() else {
|
||||||
|
eprintln!("Skipping: OpenSearch not available");
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
|
||||||
|
let index = setup_test_index(&client).await;
|
||||||
|
let mut indexer = CodeIndexer::new(client.clone(), index.clone(), "".into(), 100);
|
||||||
|
for doc in sample_symbols() {
|
||||||
|
indexer.add(doc).await;
|
||||||
|
}
|
||||||
|
indexer.flush().await;
|
||||||
|
refresh_index(&client, &index).await;
|
||||||
|
|
||||||
|
let result = breadcrumbs::build_breadcrumbs(
|
||||||
|
&client, &index, "sol", "mainline",
|
||||||
|
"how does the tool loop handle client-side tools?",
|
||||||
|
4000,
|
||||||
|
).await;
|
||||||
|
|
||||||
|
// Adaptive expansion should find relevant symbols
|
||||||
|
assert!(!result.relevant.is_empty(), "Substantive message should trigger expansion");
|
||||||
|
|
||||||
|
// Formatted output should contain relevant context section
|
||||||
|
assert!(result.formatted.contains("relevant context"), "Should have relevant context section");
|
||||||
|
|
||||||
|
// Should include tool-related symbols
|
||||||
|
let symbol_names: Vec<&str> = result.relevant.iter().map(|s| s.symbol_name.as_str()).collect();
|
||||||
|
assert!(
|
||||||
|
symbol_names.iter().any(|n| n.contains("tool") || n.contains("route") || n.contains("ToolSide")),
|
||||||
|
"Expected tool-related symbols, got: {:?}", symbol_names
|
||||||
|
);
|
||||||
|
|
||||||
|
cleanup_index(&client, &index).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_breadcrumb_token_budget() {
|
||||||
|
let Some(client) = os_client() else {
|
||||||
|
eprintln!("Skipping: OpenSearch not available");
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
|
||||||
|
let index = setup_test_index(&client).await;
|
||||||
|
let mut indexer = CodeIndexer::new(client.clone(), index.clone(), "".into(), 100);
|
||||||
|
for doc in sample_symbols() {
|
||||||
|
indexer.add(doc).await;
|
||||||
|
}
|
||||||
|
indexer.flush().await;
|
||||||
|
refresh_index(&client, &index).await;
|
||||||
|
|
||||||
|
// Very small budget — should only fit the outline
|
||||||
|
let result = breadcrumbs::build_breadcrumbs(
|
||||||
|
&client, &index, "sol", "mainline",
|
||||||
|
"how does the tool loop work?",
|
||||||
|
100, // tiny budget
|
||||||
|
).await;
|
||||||
|
|
||||||
|
assert!(result.formatted.len() <= 100, "Should respect token budget, got {} chars", result.formatted.len());
|
||||||
|
|
||||||
|
cleanup_index(&client, &index).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_branch_scoping() {
|
||||||
|
let Some(client) = os_client() else {
|
||||||
|
eprintln!("Skipping: OpenSearch not available");
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
|
||||||
|
let index = setup_test_index(&client).await;
|
||||||
|
let mut indexer = CodeIndexer::new(client.clone(), index.clone(), "".into(), 100);
|
||||||
|
for doc in sample_symbols() {
|
||||||
|
indexer.add(doc).await;
|
||||||
|
}
|
||||||
|
indexer.flush().await;
|
||||||
|
refresh_index(&client, &index).await;
|
||||||
|
|
||||||
|
// Search on feat/code branch — should find generate_from_response (branch-specific)
|
||||||
|
let results = crate::tools::code_search::search_code(
|
||||||
|
&client, &index,
|
||||||
|
r#"{"query": "generate from response", "branch": "feat/code"}"#,
|
||||||
|
Some("sol"), None,
|
||||||
|
).await.unwrap();
|
||||||
|
assert!(
|
||||||
|
results.contains("generate_from_response"),
|
||||||
|
"Should find branch-specific symbol, got:\n{results}"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Should also find mainline symbols as fallback
|
||||||
|
assert!(
|
||||||
|
results.contains("generate") || results.contains("run_tool_loop"),
|
||||||
|
"Should also find mainline symbols as fallback"
|
||||||
|
);
|
||||||
|
|
||||||
|
cleanup_index(&client, &index).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_delete_branch_symbols() {
|
||||||
|
let Some(client) = os_client() else {
|
||||||
|
eprintln!("Skipping: OpenSearch not available");
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
|
||||||
|
let index = setup_test_index(&client).await;
|
||||||
|
let mut indexer = CodeIndexer::new(client.clone(), index.clone(), "".into(), 100);
|
||||||
|
for doc in sample_symbols() {
|
||||||
|
indexer.add(doc).await;
|
||||||
|
}
|
||||||
|
indexer.flush().await;
|
||||||
|
refresh_index(&client, &index).await;
|
||||||
|
|
||||||
|
// Delete feat/code branch symbols
|
||||||
|
indexer.delete_branch("sol", "feat/code").await;
|
||||||
|
refresh_index(&client, &index).await;
|
||||||
|
|
||||||
|
// Should no longer find generate_from_response
|
||||||
|
let results = crate::tools::code_search::search_code(
|
||||||
|
&client, &index,
|
||||||
|
r#"{"query": "generate_from_response"}"#,
|
||||||
|
Some("sol"), Some("feat/code"),
|
||||||
|
).await.unwrap();
|
||||||
|
|
||||||
|
// Mainline symbols should still exist
|
||||||
|
let mainline_results = crate::tools::code_search::search_code(
|
||||||
|
&client, &index,
|
||||||
|
r#"{"query": "generate"}"#,
|
||||||
|
Some("sol"), Some("mainline"),
|
||||||
|
).await.unwrap();
|
||||||
|
assert!(mainline_results.contains("generate"), "Mainline symbols should survive branch deletion");
|
||||||
|
|
||||||
|
cleanup_index(&client, &index).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
10
src/main.rs
10
src/main.rs
@@ -324,6 +324,16 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
store: store.clone(),
|
store: store.clone(),
|
||||||
mistral: state.mistral.clone(),
|
mistral: state.mistral.clone(),
|
||||||
matrix: Some(matrix_client.clone()),
|
matrix: Some(matrix_client.clone()),
|
||||||
|
opensearch: {
|
||||||
|
// Rebuild a fresh OpenSearch client (os_client was moved into AppState)
|
||||||
|
let os_url = url::Url::parse(&config.opensearch.url).ok();
|
||||||
|
os_url.map(|u| {
|
||||||
|
let transport = opensearch::http::transport::TransportBuilder::new(
|
||||||
|
opensearch::http::transport::SingleNodeConnectionPool::new(u),
|
||||||
|
).build().unwrap();
|
||||||
|
opensearch::OpenSearch::new(transport)
|
||||||
|
})
|
||||||
|
},
|
||||||
system_prompt: system_prompt_text.clone(),
|
system_prompt: system_prompt_text.clone(),
|
||||||
orchestrator_agent_id: orchestrator_id,
|
orchestrator_agent_id: orchestrator_id,
|
||||||
orchestrator: Some(orch),
|
orchestrator: Some(orch),
|
||||||
|
|||||||
135
src/tools/code_search.rs
Normal file
135
src/tools/code_search.rs
Normal file
@@ -0,0 +1,135 @@
|
|||||||
|
//! search_code tool — semantic + keyword search over the code index.
|
||||||
|
|
||||||
|
use opensearch::OpenSearch;
|
||||||
|
use serde::Deserialize;
|
||||||
|
use tracing::warn;
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
struct SearchCodeArgs {
|
||||||
|
query: String,
|
||||||
|
#[serde(default)]
|
||||||
|
language: Option<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
repo: Option<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
branch: Option<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
semantic: Option<bool>,
|
||||||
|
#[serde(default)]
|
||||||
|
limit: Option<usize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn search_code(
|
||||||
|
client: &OpenSearch,
|
||||||
|
index: &str,
|
||||||
|
arguments: &str,
|
||||||
|
default_repo: Option<&str>,
|
||||||
|
default_branch: Option<&str>,
|
||||||
|
) -> anyhow::Result<String> {
|
||||||
|
let args: SearchCodeArgs = serde_json::from_str(arguments)?;
|
||||||
|
let limit = args.limit.unwrap_or(10);
|
||||||
|
let repo = args.repo.as_deref().or(default_repo);
|
||||||
|
let branch = args.branch.as_deref().or(default_branch);
|
||||||
|
|
||||||
|
let mut filters = Vec::new();
|
||||||
|
if let Some(repo) = repo {
|
||||||
|
filters.push(serde_json::json!({ "term": { "repo_name": repo } }));
|
||||||
|
}
|
||||||
|
if let Some(lang) = &args.language {
|
||||||
|
filters.push(serde_json::json!({ "term": { "language": lang } }));
|
||||||
|
}
|
||||||
|
if let Some(branch) = branch {
|
||||||
|
filters.push(serde_json::json!({
|
||||||
|
"bool": { "should": [
|
||||||
|
{ "term": { "branch": { "value": branch, "boost": 2.0 } } },
|
||||||
|
{ "term": { "branch": "mainline" } },
|
||||||
|
{ "term": { "branch": "main" } }
|
||||||
|
]}
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
let query = serde_json::json!({
|
||||||
|
"size": limit,
|
||||||
|
"_source": ["file_path", "symbol_name", "symbol_kind", "signature", "docstring", "start_line", "end_line", "language", "branch"],
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"should": [
|
||||||
|
{ "match": { "content": { "query": &args.query, "boost": 1.0 } } },
|
||||||
|
{ "match": { "signature": { "query": &args.query, "boost": 2.0 } } },
|
||||||
|
{ "match": { "docstring": { "query": &args.query, "boost": 1.5 } } },
|
||||||
|
{ "match": { "symbol_name": { "query": &args.query, "boost": 3.0 } } }
|
||||||
|
],
|
||||||
|
"filter": filters,
|
||||||
|
"minimum_should_match": 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// TODO: add neural search component when kNN is available
|
||||||
|
// The hybrid pipeline will combine BM25 + neural for best results.
|
||||||
|
|
||||||
|
let response = client
|
||||||
|
.search(opensearch::SearchParts::Index(&[index]))
|
||||||
|
.body(query)
|
||||||
|
.send()
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
let body: serde_json::Value = response.json().await?;
|
||||||
|
|
||||||
|
let hits = body["hits"]["hits"].as_array();
|
||||||
|
if hits.is_none() || hits.unwrap().is_empty() {
|
||||||
|
return Ok("No code results found.".into());
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut results = Vec::new();
|
||||||
|
for hit in hits.unwrap() {
|
||||||
|
let src = &hit["_source"];
|
||||||
|
let file_path = src["file_path"].as_str().unwrap_or("?");
|
||||||
|
let name = src["symbol_name"].as_str().unwrap_or("?");
|
||||||
|
let kind = src["symbol_kind"].as_str().unwrap_or("?");
|
||||||
|
let sig = src["signature"].as_str().unwrap_or("");
|
||||||
|
let doc = src["docstring"].as_str().unwrap_or("");
|
||||||
|
let start = src["start_line"].as_u64().unwrap_or(0);
|
||||||
|
let end = src["end_line"].as_u64().unwrap_or(0);
|
||||||
|
let lang = src["language"].as_str().unwrap_or("?");
|
||||||
|
|
||||||
|
let mut entry = format!("{file_path}:{start}-{end} ({lang}) {kind} {name}");
|
||||||
|
if !sig.is_empty() {
|
||||||
|
entry.push_str(&format!("\n {sig}"));
|
||||||
|
}
|
||||||
|
if !doc.is_empty() {
|
||||||
|
let first_line = doc.lines().next().unwrap_or("");
|
||||||
|
entry.push_str(&format!("\n /// {first_line}"));
|
||||||
|
}
|
||||||
|
results.push(entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(results.join("\n\n"))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_search_code_args() {
|
||||||
|
let args: SearchCodeArgs = serde_json::from_str(r#"{"query": "tool loop"}"#).unwrap();
|
||||||
|
assert_eq!(args.query, "tool loop");
|
||||||
|
assert!(args.language.is_none());
|
||||||
|
assert!(args.repo.is_none());
|
||||||
|
assert!(args.limit.is_none());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_search_code_args_full() {
|
||||||
|
let args: SearchCodeArgs = serde_json::from_str(
|
||||||
|
r#"{"query": "auth", "language": "rust", "repo": "sol", "branch": "feat/code", "semantic": true, "limit": 5}"#
|
||||||
|
).unwrap();
|
||||||
|
assert_eq!(args.query, "auth");
|
||||||
|
assert_eq!(args.language.as_deref(), Some("rust"));
|
||||||
|
assert_eq!(args.repo.as_deref(), Some("sol"));
|
||||||
|
assert_eq!(args.branch.as_deref(), Some("feat/code"));
|
||||||
|
assert_eq!(args.semantic, Some(true));
|
||||||
|
assert_eq!(args.limit, Some(5));
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,4 +1,5 @@
|
|||||||
pub mod bridge;
|
pub mod bridge;
|
||||||
|
pub mod code_search;
|
||||||
pub mod devtools;
|
pub mod devtools;
|
||||||
pub mod identity;
|
pub mod identity;
|
||||||
pub mod research;
|
pub mod research;
|
||||||
@@ -374,6 +375,13 @@ impl ToolRegistry {
|
|||||||
anyhow::bail!("Identity (Kratos) integration not configured")
|
anyhow::bail!("Identity (Kratos) integration not configured")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
"search_code" => {
|
||||||
|
if let Some(ref os) = self.opensearch {
|
||||||
|
code_search::search_code(os, "sol_code", arguments, None, None).await
|
||||||
|
} else {
|
||||||
|
anyhow::bail!("Code search not available (OpenSearch not configured)")
|
||||||
|
}
|
||||||
|
}
|
||||||
"search_web" => {
|
"search_web" => {
|
||||||
if let Some(ref searxng) = self.config.services.searxng {
|
if let Some(ref searxng) = self.config.services.searxng {
|
||||||
web_search::search(&searxng.url, arguments).await
|
web_search::search(&searxng.url, arguments).await
|
||||||
|
|||||||
Reference in New Issue
Block a user