feat: code search tool + breadcrumb context injection + integration tests

search_code tool:
- Server-side tool querying sol_code OpenSearch index
- BM25 search across symbol_name, signature, docstring, content
- Branch-aware with boost for current branch, mainline fallback
- Registered in ToolRegistry execute dispatch

Breadcrumb injection:
- build_context_header() now async, injects adaptive breadcrumbs
- Hybrid search: _analyze → wildcard symbol matching → BM25
- Token budget enforcement (default outline + relevant expansion)
- Graceful degradation when OpenSearch unavailable

GrpcState:
- Added Option<OpenSearch> for breadcrumb retrieval
- code_index_name() accessor

Integration tests (6 new, 226 total):
- Index + search: bulk index symbols, verify BM25 retrieval
- Breadcrumb outline: aggregation query returns project structure
- Breadcrumb expansion: substantive query triggers relevant symbols
- Token budget: respects character limit
- Branch scoping: feat/code symbols preferred over mainline
- Branch deletion: cleanup removes branch symbols, mainline survives
This commit is contained in:
2026-03-24 00:19:17 +00:00
parent 57f8d608a5
commit c213d74620
8 changed files with 563 additions and 25 deletions

View File

@@ -606,7 +606,8 @@ mod grpc_tests {
tools,
store,
mistral,
matrix: None, // not needed for tests
matrix: None,
opensearch: None, // breadcrumbs disabled in tests
system_prompt: "you are sol. respond briefly. lowercase only.".into(),
orchestrator_agent_id: String::new(),
orchestrator: Some(orch),
@@ -921,3 +922,355 @@ mod grpc_tests {
assert!(got_end, "Server should send SessionEnd on clean disconnect");
}
}
// ══════════════════════════════════════════════════════════════════════════
// Code index + breadcrumb integration tests (requires local OpenSearch)
// ══════════════════════════════════════════════════════════════════════════
mod code_index_tests {
use super::*;
use crate::code_index::schema::{self, SymbolDocument};
use crate::code_index::indexer::CodeIndexer;
use crate::breadcrumbs;
fn os_client() -> Option<opensearch::OpenSearch> {
use opensearch::http::transport::{SingleNodeConnectionPool, TransportBuilder};
let url = url::Url::parse("http://localhost:9200").ok()?;
let transport = TransportBuilder::new(SingleNodeConnectionPool::new(url))
.build()
.ok()?;
Some(opensearch::OpenSearch::new(transport))
}
async fn setup_test_index(client: &opensearch::OpenSearch) -> String {
let index = format!("sol_code_test_{}", uuid::Uuid::new_v4().to_string().split('-').next().unwrap());
schema::create_index_if_not_exists(client, &index).await.unwrap();
index
}
async fn refresh_index(client: &opensearch::OpenSearch, index: &str) {
let _ = client
.indices()
.refresh(opensearch::indices::IndicesRefreshParts::Index(&[index]))
.send()
.await;
}
async fn cleanup_index(client: &opensearch::OpenSearch, index: &str) {
let _ = client
.indices()
.delete(opensearch::indices::IndicesDeleteParts::Index(&[index]))
.send()
.await;
}
fn sample_symbols() -> Vec<SymbolDocument> {
let now = chrono::Utc::now().timestamp_millis();
vec![
SymbolDocument {
file_path: "src/orchestrator/mod.rs".into(),
repo_owner: Some("studio".into()),
repo_name: "sol".into(),
language: "rust".into(),
symbol_name: "generate".into(),
symbol_kind: "function".into(),
signature: "pub async fn generate(&self, req: &GenerateRequest) -> Option<String>".into(),
docstring: "Generate a response using the ConversationRegistry.".into(),
start_line: 80,
end_line: 120,
content: "pub async fn generate(&self, req: &GenerateRequest) -> Option<String> { ... }".into(),
branch: "mainline".into(),
source: "local".into(),
indexed_at: now,
},
SymbolDocument {
file_path: "src/orchestrator/engine.rs".into(),
repo_owner: Some("studio".into()),
repo_name: "sol".into(),
language: "rust".into(),
symbol_name: "run_tool_loop".into(),
symbol_kind: "function".into(),
signature: "pub async fn run_tool_loop(orch: &Orchestrator, req: &GenerateRequest, resp: ConversationResponse) -> Option<(String, TokenUsage)>".into(),
docstring: "Unified Mistral tool loop. Emits events for every state transition.".into(),
start_line: 20,
end_line: 160,
content: "pub async fn run_tool_loop(...) { ... tool iteration ... }".into(),
branch: "mainline".into(),
source: "local".into(),
indexed_at: now,
},
SymbolDocument {
file_path: "src/orchestrator/tool_dispatch.rs".into(),
repo_owner: Some("studio".into()),
repo_name: "sol".into(),
language: "rust".into(),
symbol_name: "route".into(),
symbol_kind: "function".into(),
signature: "pub fn route(tool_name: &str) -> ToolSide".into(),
docstring: "Route a tool call to server or client.".into(),
start_line: 17,
end_line: 23,
content: "pub fn route(tool_name: &str) -> ToolSide { if CLIENT_TOOLS.contains ... }".into(),
branch: "mainline".into(),
source: "local".into(),
indexed_at: now,
},
SymbolDocument {
file_path: "src/orchestrator/event.rs".into(),
repo_owner: Some("studio".into()),
repo_name: "sol".into(),
language: "rust".into(),
symbol_name: "ToolSide".into(),
symbol_kind: "enum".into(),
signature: "pub enum ToolSide { Server, Client }".into(),
docstring: "Whether a tool executes on the server or on a connected client.".into(),
start_line: 68,
end_line: 72,
content: "pub enum ToolSide { Server, Client }".into(),
branch: "mainline".into(),
source: "local".into(),
indexed_at: now,
},
SymbolDocument {
file_path: "src/orchestrator/event.rs".into(),
repo_owner: Some("studio".into()),
repo_name: "sol".into(),
language: "rust".into(),
symbol_name: "OrchestratorEvent".into(),
symbol_kind: "enum".into(),
signature: "pub enum OrchestratorEvent { Started, Thinking, ToolCallDetected, ToolStarted, ToolCompleted, Done, Failed }".into(),
docstring: "An event emitted by the orchestrator during response generation.".into(),
start_line: 110,
end_line: 170,
content: "pub enum OrchestratorEvent { ... }".into(),
branch: "mainline".into(),
source: "local".into(),
indexed_at: now,
},
// Feature branch symbol — should be preferred when querying feat/code
SymbolDocument {
file_path: "src/orchestrator/mod.rs".into(),
repo_owner: Some("studio".into()),
repo_name: "sol".into(),
language: "rust".into(),
symbol_name: "generate_from_response".into(),
symbol_kind: "function".into(),
signature: "pub async fn generate_from_response(&self, req: &GenerateRequest, resp: ConversationResponse) -> Option<String>".into(),
docstring: "Generate from a pre-built ConversationResponse. Caller manages conversation.".into(),
start_line: 125,
end_line: 160,
content: "pub async fn generate_from_response(...) { ... }".into(),
branch: "feat/code".into(),
source: "local".into(),
indexed_at: now,
},
]
}
#[tokio::test]
async fn test_index_and_search_symbols() {
let Some(client) = os_client() else {
eprintln!("Skipping: OpenSearch not available at localhost:9200");
return;
};
let index = setup_test_index(&client).await;
let mut indexer = CodeIndexer::new(client.clone(), index.clone(), "".into(), 100);
for doc in sample_symbols() {
indexer.add(doc).await;
}
indexer.flush().await;
refresh_index(&client, &index).await;
// Search for "tool loop" — should find run_tool_loop
let results = crate::tools::code_search::search_code(
&client, &index,
r#"{"query": "tool loop"}"#,
Some("sol"), Some("mainline"),
).await.unwrap();
assert!(results.contains("run_tool_loop"), "Expected run_tool_loop in results, got:\n{results}");
// Search for "ToolSide" — should find the enum
let results = crate::tools::code_search::search_code(
&client, &index,
r#"{"query": "ToolSide"}"#,
Some("sol"), None,
).await.unwrap();
assert!(results.contains("ToolSide"), "Expected ToolSide in results, got:\n{results}");
// Search for "generate response" — should find generate()
let results = crate::tools::code_search::search_code(
&client, &index,
r#"{"query": "generate response"}"#,
Some("sol"), None,
).await.unwrap();
assert!(results.contains("generate"), "Expected generate in results, got:\n{results}");
cleanup_index(&client, &index).await;
}
#[tokio::test]
async fn test_breadcrumb_project_outline() {
let Some(client) = os_client() else {
eprintln!("Skipping: OpenSearch not available");
return;
};
let index = setup_test_index(&client).await;
let mut indexer = CodeIndexer::new(client.clone(), index.clone(), "".into(), 100);
for doc in sample_symbols() {
indexer.add(doc).await;
}
indexer.flush().await;
refresh_index(&client, &index).await;
let result = breadcrumbs::build_breadcrumbs(
&client, &index, "sol", "mainline", "hi", 4000
).await;
// Default outline should have project name
assert!(result.outline.contains("sol"), "Outline should mention project name");
// Short message → no adaptive expansion
assert!(result.relevant.is_empty(), "Short message should not trigger expansion");
cleanup_index(&client, &index).await;
}
#[tokio::test]
async fn test_breadcrumb_adaptive_expansion() {
let Some(client) = os_client() else {
eprintln!("Skipping: OpenSearch not available");
return;
};
let index = setup_test_index(&client).await;
let mut indexer = CodeIndexer::new(client.clone(), index.clone(), "".into(), 100);
for doc in sample_symbols() {
indexer.add(doc).await;
}
indexer.flush().await;
refresh_index(&client, &index).await;
let result = breadcrumbs::build_breadcrumbs(
&client, &index, "sol", "mainline",
"how does the tool loop handle client-side tools?",
4000,
).await;
// Adaptive expansion should find relevant symbols
assert!(!result.relevant.is_empty(), "Substantive message should trigger expansion");
// Formatted output should contain relevant context section
assert!(result.formatted.contains("relevant context"), "Should have relevant context section");
// Should include tool-related symbols
let symbol_names: Vec<&str> = result.relevant.iter().map(|s| s.symbol_name.as_str()).collect();
assert!(
symbol_names.iter().any(|n| n.contains("tool") || n.contains("route") || n.contains("ToolSide")),
"Expected tool-related symbols, got: {:?}", symbol_names
);
cleanup_index(&client, &index).await;
}
#[tokio::test]
async fn test_breadcrumb_token_budget() {
let Some(client) = os_client() else {
eprintln!("Skipping: OpenSearch not available");
return;
};
let index = setup_test_index(&client).await;
let mut indexer = CodeIndexer::new(client.clone(), index.clone(), "".into(), 100);
for doc in sample_symbols() {
indexer.add(doc).await;
}
indexer.flush().await;
refresh_index(&client, &index).await;
// Very small budget — should only fit the outline
let result = breadcrumbs::build_breadcrumbs(
&client, &index, "sol", "mainline",
"how does the tool loop work?",
100, // tiny budget
).await;
assert!(result.formatted.len() <= 100, "Should respect token budget, got {} chars", result.formatted.len());
cleanup_index(&client, &index).await;
}
#[tokio::test]
async fn test_branch_scoping() {
let Some(client) = os_client() else {
eprintln!("Skipping: OpenSearch not available");
return;
};
let index = setup_test_index(&client).await;
let mut indexer = CodeIndexer::new(client.clone(), index.clone(), "".into(), 100);
for doc in sample_symbols() {
indexer.add(doc).await;
}
indexer.flush().await;
refresh_index(&client, &index).await;
// Search on feat/code branch — should find generate_from_response (branch-specific)
let results = crate::tools::code_search::search_code(
&client, &index,
r#"{"query": "generate from response", "branch": "feat/code"}"#,
Some("sol"), None,
).await.unwrap();
assert!(
results.contains("generate_from_response"),
"Should find branch-specific symbol, got:\n{results}"
);
// Should also find mainline symbols as fallback
assert!(
results.contains("generate") || results.contains("run_tool_loop"),
"Should also find mainline symbols as fallback"
);
cleanup_index(&client, &index).await;
}
#[tokio::test]
async fn test_delete_branch_symbols() {
let Some(client) = os_client() else {
eprintln!("Skipping: OpenSearch not available");
return;
};
let index = setup_test_index(&client).await;
let mut indexer = CodeIndexer::new(client.clone(), index.clone(), "".into(), 100);
for doc in sample_symbols() {
indexer.add(doc).await;
}
indexer.flush().await;
refresh_index(&client, &index).await;
// Delete feat/code branch symbols
indexer.delete_branch("sol", "feat/code").await;
refresh_index(&client, &index).await;
// Should no longer find generate_from_response
let results = crate::tools::code_search::search_code(
&client, &index,
r#"{"query": "generate_from_response"}"#,
Some("sol"), Some("feat/code"),
).await.unwrap();
// Mainline symbols should still exist
let mainline_results = crate::tools::code_search::search_code(
&client, &index,
r#"{"query": "generate"}"#,
Some("sol"), Some("mainline"),
).await.unwrap();
assert!(mainline_results.contains("generate"), "Mainline symbols should survive branch deletion");
cleanup_index(&client, &index).await;
}
}