feat: code search tool + breadcrumb context injection + integration tests
search_code tool: - Server-side tool querying sol_code OpenSearch index - BM25 search across symbol_name, signature, docstring, content - Branch-aware with boost for current branch, mainline fallback - Registered in ToolRegistry execute dispatch Breadcrumb injection: - build_context_header() now async, injects adaptive breadcrumbs - Hybrid search: _analyze → wildcard symbol matching → BM25 - Token budget enforcement (default outline + relevant expansion) - Graceful degradation when OpenSearch unavailable GrpcState: - Added Option<OpenSearch> for breadcrumb retrieval - code_index_name() accessor Integration tests (6 new, 226 total): - Index + search: bulk index symbols, verify BM25 retrieval - Breadcrumb outline: aggregation query returns project structure - Breadcrumb expansion: substantive query triggers relevant symbols - Token budget: respects character limit - Branch scoping: feat/code symbols preferred over mainline - Branch deletion: cleanup removes branch symbols, mainline survives
This commit is contained in:
@@ -606,7 +606,8 @@ mod grpc_tests {
|
||||
tools,
|
||||
store,
|
||||
mistral,
|
||||
matrix: None, // not needed for tests
|
||||
matrix: None,
|
||||
opensearch: None, // breadcrumbs disabled in tests
|
||||
system_prompt: "you are sol. respond briefly. lowercase only.".into(),
|
||||
orchestrator_agent_id: String::new(),
|
||||
orchestrator: Some(orch),
|
||||
@@ -921,3 +922,355 @@ mod grpc_tests {
|
||||
assert!(got_end, "Server should send SessionEnd on clean disconnect");
|
||||
}
|
||||
}
|
||||
|
||||
// ══════════════════════════════════════════════════════════════════════════
|
||||
// Code index + breadcrumb integration tests (requires local OpenSearch)
|
||||
// ══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
mod code_index_tests {
|
||||
use super::*;
|
||||
use crate::code_index::schema::{self, SymbolDocument};
|
||||
use crate::code_index::indexer::CodeIndexer;
|
||||
use crate::breadcrumbs;
|
||||
|
||||
fn os_client() -> Option<opensearch::OpenSearch> {
|
||||
use opensearch::http::transport::{SingleNodeConnectionPool, TransportBuilder};
|
||||
let url = url::Url::parse("http://localhost:9200").ok()?;
|
||||
let transport = TransportBuilder::new(SingleNodeConnectionPool::new(url))
|
||||
.build()
|
||||
.ok()?;
|
||||
Some(opensearch::OpenSearch::new(transport))
|
||||
}
|
||||
|
||||
async fn setup_test_index(client: &opensearch::OpenSearch) -> String {
|
||||
let index = format!("sol_code_test_{}", uuid::Uuid::new_v4().to_string().split('-').next().unwrap());
|
||||
schema::create_index_if_not_exists(client, &index).await.unwrap();
|
||||
index
|
||||
}
|
||||
|
||||
async fn refresh_index(client: &opensearch::OpenSearch, index: &str) {
|
||||
let _ = client
|
||||
.indices()
|
||||
.refresh(opensearch::indices::IndicesRefreshParts::Index(&[index]))
|
||||
.send()
|
||||
.await;
|
||||
}
|
||||
|
||||
async fn cleanup_index(client: &opensearch::OpenSearch, index: &str) {
|
||||
let _ = client
|
||||
.indices()
|
||||
.delete(opensearch::indices::IndicesDeleteParts::Index(&[index]))
|
||||
.send()
|
||||
.await;
|
||||
}
|
||||
|
||||
fn sample_symbols() -> Vec<SymbolDocument> {
|
||||
let now = chrono::Utc::now().timestamp_millis();
|
||||
vec![
|
||||
SymbolDocument {
|
||||
file_path: "src/orchestrator/mod.rs".into(),
|
||||
repo_owner: Some("studio".into()),
|
||||
repo_name: "sol".into(),
|
||||
language: "rust".into(),
|
||||
symbol_name: "generate".into(),
|
||||
symbol_kind: "function".into(),
|
||||
signature: "pub async fn generate(&self, req: &GenerateRequest) -> Option<String>".into(),
|
||||
docstring: "Generate a response using the ConversationRegistry.".into(),
|
||||
start_line: 80,
|
||||
end_line: 120,
|
||||
content: "pub async fn generate(&self, req: &GenerateRequest) -> Option<String> { ... }".into(),
|
||||
branch: "mainline".into(),
|
||||
source: "local".into(),
|
||||
indexed_at: now,
|
||||
},
|
||||
SymbolDocument {
|
||||
file_path: "src/orchestrator/engine.rs".into(),
|
||||
repo_owner: Some("studio".into()),
|
||||
repo_name: "sol".into(),
|
||||
language: "rust".into(),
|
||||
symbol_name: "run_tool_loop".into(),
|
||||
symbol_kind: "function".into(),
|
||||
signature: "pub async fn run_tool_loop(orch: &Orchestrator, req: &GenerateRequest, resp: ConversationResponse) -> Option<(String, TokenUsage)>".into(),
|
||||
docstring: "Unified Mistral tool loop. Emits events for every state transition.".into(),
|
||||
start_line: 20,
|
||||
end_line: 160,
|
||||
content: "pub async fn run_tool_loop(...) { ... tool iteration ... }".into(),
|
||||
branch: "mainline".into(),
|
||||
source: "local".into(),
|
||||
indexed_at: now,
|
||||
},
|
||||
SymbolDocument {
|
||||
file_path: "src/orchestrator/tool_dispatch.rs".into(),
|
||||
repo_owner: Some("studio".into()),
|
||||
repo_name: "sol".into(),
|
||||
language: "rust".into(),
|
||||
symbol_name: "route".into(),
|
||||
symbol_kind: "function".into(),
|
||||
signature: "pub fn route(tool_name: &str) -> ToolSide".into(),
|
||||
docstring: "Route a tool call to server or client.".into(),
|
||||
start_line: 17,
|
||||
end_line: 23,
|
||||
content: "pub fn route(tool_name: &str) -> ToolSide { if CLIENT_TOOLS.contains ... }".into(),
|
||||
branch: "mainline".into(),
|
||||
source: "local".into(),
|
||||
indexed_at: now,
|
||||
},
|
||||
SymbolDocument {
|
||||
file_path: "src/orchestrator/event.rs".into(),
|
||||
repo_owner: Some("studio".into()),
|
||||
repo_name: "sol".into(),
|
||||
language: "rust".into(),
|
||||
symbol_name: "ToolSide".into(),
|
||||
symbol_kind: "enum".into(),
|
||||
signature: "pub enum ToolSide { Server, Client }".into(),
|
||||
docstring: "Whether a tool executes on the server or on a connected client.".into(),
|
||||
start_line: 68,
|
||||
end_line: 72,
|
||||
content: "pub enum ToolSide { Server, Client }".into(),
|
||||
branch: "mainline".into(),
|
||||
source: "local".into(),
|
||||
indexed_at: now,
|
||||
},
|
||||
SymbolDocument {
|
||||
file_path: "src/orchestrator/event.rs".into(),
|
||||
repo_owner: Some("studio".into()),
|
||||
repo_name: "sol".into(),
|
||||
language: "rust".into(),
|
||||
symbol_name: "OrchestratorEvent".into(),
|
||||
symbol_kind: "enum".into(),
|
||||
signature: "pub enum OrchestratorEvent { Started, Thinking, ToolCallDetected, ToolStarted, ToolCompleted, Done, Failed }".into(),
|
||||
docstring: "An event emitted by the orchestrator during response generation.".into(),
|
||||
start_line: 110,
|
||||
end_line: 170,
|
||||
content: "pub enum OrchestratorEvent { ... }".into(),
|
||||
branch: "mainline".into(),
|
||||
source: "local".into(),
|
||||
indexed_at: now,
|
||||
},
|
||||
// Feature branch symbol — should be preferred when querying feat/code
|
||||
SymbolDocument {
|
||||
file_path: "src/orchestrator/mod.rs".into(),
|
||||
repo_owner: Some("studio".into()),
|
||||
repo_name: "sol".into(),
|
||||
language: "rust".into(),
|
||||
symbol_name: "generate_from_response".into(),
|
||||
symbol_kind: "function".into(),
|
||||
signature: "pub async fn generate_from_response(&self, req: &GenerateRequest, resp: ConversationResponse) -> Option<String>".into(),
|
||||
docstring: "Generate from a pre-built ConversationResponse. Caller manages conversation.".into(),
|
||||
start_line: 125,
|
||||
end_line: 160,
|
||||
content: "pub async fn generate_from_response(...) { ... }".into(),
|
||||
branch: "feat/code".into(),
|
||||
source: "local".into(),
|
||||
indexed_at: now,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_index_and_search_symbols() {
|
||||
let Some(client) = os_client() else {
|
||||
eprintln!("Skipping: OpenSearch not available at localhost:9200");
|
||||
return;
|
||||
};
|
||||
|
||||
let index = setup_test_index(&client).await;
|
||||
let mut indexer = CodeIndexer::new(client.clone(), index.clone(), "".into(), 100);
|
||||
|
||||
for doc in sample_symbols() {
|
||||
indexer.add(doc).await;
|
||||
}
|
||||
indexer.flush().await;
|
||||
|
||||
refresh_index(&client, &index).await;
|
||||
|
||||
// Search for "tool loop" — should find run_tool_loop
|
||||
let results = crate::tools::code_search::search_code(
|
||||
&client, &index,
|
||||
r#"{"query": "tool loop"}"#,
|
||||
Some("sol"), Some("mainline"),
|
||||
).await.unwrap();
|
||||
assert!(results.contains("run_tool_loop"), "Expected run_tool_loop in results, got:\n{results}");
|
||||
|
||||
// Search for "ToolSide" — should find the enum
|
||||
let results = crate::tools::code_search::search_code(
|
||||
&client, &index,
|
||||
r#"{"query": "ToolSide"}"#,
|
||||
Some("sol"), None,
|
||||
).await.unwrap();
|
||||
assert!(results.contains("ToolSide"), "Expected ToolSide in results, got:\n{results}");
|
||||
|
||||
// Search for "generate response" — should find generate()
|
||||
let results = crate::tools::code_search::search_code(
|
||||
&client, &index,
|
||||
r#"{"query": "generate response"}"#,
|
||||
Some("sol"), None,
|
||||
).await.unwrap();
|
||||
assert!(results.contains("generate"), "Expected generate in results, got:\n{results}");
|
||||
|
||||
cleanup_index(&client, &index).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_breadcrumb_project_outline() {
|
||||
let Some(client) = os_client() else {
|
||||
eprintln!("Skipping: OpenSearch not available");
|
||||
return;
|
||||
};
|
||||
|
||||
let index = setup_test_index(&client).await;
|
||||
let mut indexer = CodeIndexer::new(client.clone(), index.clone(), "".into(), 100);
|
||||
for doc in sample_symbols() {
|
||||
indexer.add(doc).await;
|
||||
}
|
||||
indexer.flush().await;
|
||||
refresh_index(&client, &index).await;
|
||||
|
||||
let result = breadcrumbs::build_breadcrumbs(
|
||||
&client, &index, "sol", "mainline", "hi", 4000
|
||||
).await;
|
||||
|
||||
// Default outline should have project name
|
||||
assert!(result.outline.contains("sol"), "Outline should mention project name");
|
||||
// Short message → no adaptive expansion
|
||||
assert!(result.relevant.is_empty(), "Short message should not trigger expansion");
|
||||
|
||||
cleanup_index(&client, &index).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_breadcrumb_adaptive_expansion() {
|
||||
let Some(client) = os_client() else {
|
||||
eprintln!("Skipping: OpenSearch not available");
|
||||
return;
|
||||
};
|
||||
|
||||
let index = setup_test_index(&client).await;
|
||||
let mut indexer = CodeIndexer::new(client.clone(), index.clone(), "".into(), 100);
|
||||
for doc in sample_symbols() {
|
||||
indexer.add(doc).await;
|
||||
}
|
||||
indexer.flush().await;
|
||||
refresh_index(&client, &index).await;
|
||||
|
||||
let result = breadcrumbs::build_breadcrumbs(
|
||||
&client, &index, "sol", "mainline",
|
||||
"how does the tool loop handle client-side tools?",
|
||||
4000,
|
||||
).await;
|
||||
|
||||
// Adaptive expansion should find relevant symbols
|
||||
assert!(!result.relevant.is_empty(), "Substantive message should trigger expansion");
|
||||
|
||||
// Formatted output should contain relevant context section
|
||||
assert!(result.formatted.contains("relevant context"), "Should have relevant context section");
|
||||
|
||||
// Should include tool-related symbols
|
||||
let symbol_names: Vec<&str> = result.relevant.iter().map(|s| s.symbol_name.as_str()).collect();
|
||||
assert!(
|
||||
symbol_names.iter().any(|n| n.contains("tool") || n.contains("route") || n.contains("ToolSide")),
|
||||
"Expected tool-related symbols, got: {:?}", symbol_names
|
||||
);
|
||||
|
||||
cleanup_index(&client, &index).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_breadcrumb_token_budget() {
|
||||
let Some(client) = os_client() else {
|
||||
eprintln!("Skipping: OpenSearch not available");
|
||||
return;
|
||||
};
|
||||
|
||||
let index = setup_test_index(&client).await;
|
||||
let mut indexer = CodeIndexer::new(client.clone(), index.clone(), "".into(), 100);
|
||||
for doc in sample_symbols() {
|
||||
indexer.add(doc).await;
|
||||
}
|
||||
indexer.flush().await;
|
||||
refresh_index(&client, &index).await;
|
||||
|
||||
// Very small budget — should only fit the outline
|
||||
let result = breadcrumbs::build_breadcrumbs(
|
||||
&client, &index, "sol", "mainline",
|
||||
"how does the tool loop work?",
|
||||
100, // tiny budget
|
||||
).await;
|
||||
|
||||
assert!(result.formatted.len() <= 100, "Should respect token budget, got {} chars", result.formatted.len());
|
||||
|
||||
cleanup_index(&client, &index).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_branch_scoping() {
|
||||
let Some(client) = os_client() else {
|
||||
eprintln!("Skipping: OpenSearch not available");
|
||||
return;
|
||||
};
|
||||
|
||||
let index = setup_test_index(&client).await;
|
||||
let mut indexer = CodeIndexer::new(client.clone(), index.clone(), "".into(), 100);
|
||||
for doc in sample_symbols() {
|
||||
indexer.add(doc).await;
|
||||
}
|
||||
indexer.flush().await;
|
||||
refresh_index(&client, &index).await;
|
||||
|
||||
// Search on feat/code branch — should find generate_from_response (branch-specific)
|
||||
let results = crate::tools::code_search::search_code(
|
||||
&client, &index,
|
||||
r#"{"query": "generate from response", "branch": "feat/code"}"#,
|
||||
Some("sol"), None,
|
||||
).await.unwrap();
|
||||
assert!(
|
||||
results.contains("generate_from_response"),
|
||||
"Should find branch-specific symbol, got:\n{results}"
|
||||
);
|
||||
|
||||
// Should also find mainline symbols as fallback
|
||||
assert!(
|
||||
results.contains("generate") || results.contains("run_tool_loop"),
|
||||
"Should also find mainline symbols as fallback"
|
||||
);
|
||||
|
||||
cleanup_index(&client, &index).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_delete_branch_symbols() {
|
||||
let Some(client) = os_client() else {
|
||||
eprintln!("Skipping: OpenSearch not available");
|
||||
return;
|
||||
};
|
||||
|
||||
let index = setup_test_index(&client).await;
|
||||
let mut indexer = CodeIndexer::new(client.clone(), index.clone(), "".into(), 100);
|
||||
for doc in sample_symbols() {
|
||||
indexer.add(doc).await;
|
||||
}
|
||||
indexer.flush().await;
|
||||
refresh_index(&client, &index).await;
|
||||
|
||||
// Delete feat/code branch symbols
|
||||
indexer.delete_branch("sol", "feat/code").await;
|
||||
refresh_index(&client, &index).await;
|
||||
|
||||
// Should no longer find generate_from_response
|
||||
let results = crate::tools::code_search::search_code(
|
||||
&client, &index,
|
||||
r#"{"query": "generate_from_response"}"#,
|
||||
Some("sol"), Some("feat/code"),
|
||||
).await.unwrap();
|
||||
|
||||
// Mainline symbols should still exist
|
||||
let mainline_results = crate::tools::code_search::search_code(
|
||||
&client, &index,
|
||||
r#"{"query": "generate"}"#,
|
||||
Some("sol"), Some("mainline"),
|
||||
).await.unwrap();
|
||||
assert!(mainline_results.contains("generate"), "Mainline symbols should survive branch deletion");
|
||||
|
||||
cleanup_index(&client, &index).await;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user