feat: Gitea repo indexing via gRPC ReindexCode endpoint

Gitea indexer (code_index/gitea.rs):
- Walks repos via GiteaClient API (list repos → traverse dirs → fetch files)
- Base64 decodes file content from Gitea API responses
- Extracts symbols with tree-sitter (Rust, TypeScript, Python)
- Indexes to sol_code OpenSearch index with repo/branch/source metadata
- Skips hidden dirs, vendor, node_modules, files >100KB
- delete_branch() for clean re-indexing

Server-side tree-sitter (code_index/symbols.rs):
- Full symbol extraction shared with CLI client
- extract_symbols(), extract_project_symbols(), detect_language()

gRPC ReindexCode RPC:
- ReindexCodeRequest: org, repo, branch (all optional filters)
- ReindexCodeResponse: repos_indexed, symbols_indexed, error
- Uses ToolRegistry's GiteaClient (already authenticated)
- Creates sol_code index if not exists

ToolRegistry.gitea_client() accessor for reindex endpoint.
This commit is contained in:
2026-03-24 09:36:42 +00:00
parent 4d5b3a9b28
commit a11b313301
7 changed files with 1069 additions and 3 deletions

70
Cargo.lock generated
View File

@@ -1357,7 +1357,7 @@ dependencies = [
"libc",
"option-ext",
"redox_users",
"windows-sys 0.59.0",
"windows-sys 0.61.2",
]
[[package]]
@@ -1371,6 +1371,12 @@ dependencies = [
"syn",
]
[[package]]
name = "dotenv"
version = "0.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77c90badedccf4105eca100756a0b1289e191f6fcbdadd3cee1d2f614f97da8f"
[[package]]
name = "dprint-swc-ext"
version = "0.26.0"
@@ -3771,7 +3777,7 @@ dependencies = [
"once_cell",
"socket2",
"tracing",
"windows-sys 0.59.0",
"windows-sys 0.60.2",
]
[[package]]
@@ -4705,6 +4711,7 @@ dependencies = [
"deno_ast",
"deno_core",
"deno_error",
"dotenv",
"futures",
"jsonwebtoken",
"libsqlite3-sys",
@@ -4730,6 +4737,10 @@ dependencies = [
"tonic-prost-build",
"tracing",
"tracing-subscriber",
"tree-sitter",
"tree-sitter-python",
"tree-sitter-rust",
"tree-sitter-typescript",
"url",
"uuid",
]
@@ -4808,6 +4819,12 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "streaming-iterator"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520"
[[package]]
name = "string_enum"
version = "1.0.2"
@@ -5871,6 +5888,55 @@ dependencies = [
"tracing-log",
]
[[package]]
name = "tree-sitter"
version = "0.24.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a5387dffa7ffc7d2dae12b50c6f7aab8ff79d6210147c6613561fc3d474c6f75"
dependencies = [
"cc",
"regex",
"regex-syntax",
"streaming-iterator",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-language"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "009994f150cc0cd50ff54917d5bc8bffe8cad10ca10d81c34da2ec421ae61782"
[[package]]
name = "tree-sitter-python"
version = "0.23.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d065aaa27f3aaceaf60c1f0e0ac09e1cb9eb8ed28e7bcdaa52129cffc7f4b04"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-rust"
version = "0.23.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca8ccb3e3a3495c8a943f6c3fd24c3804c471fd7f4f16087623c7fa4c0068e8a"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-typescript"
version = "0.23.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c5f76ed8d947a75cc446d5fccd8b602ebf0cde64ccf2ffa434d873d7a575eff"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "triomphe"
version = "0.1.15"

View File

@@ -44,6 +44,10 @@ prost = "0.14"
tokio-stream = "0.1"
jsonwebtoken = "9"
tokenizers = { version = "0.22", default-features = false, features = ["onig", "http"] }
tree-sitter = "0.24"
tree-sitter-rust = "0.23"
tree-sitter-typescript = "0.23"
tree-sitter-python = "0.23"
[dev-dependencies]
dotenv = "0.15"

230
src/code_index/gitea.rs Normal file
View File

@@ -0,0 +1,230 @@
//! Gitea repo indexer — walks repos via the Gitea API, extracts symbols
//! with tree-sitter, and indexes them to OpenSearch.
use std::sync::Arc;
use tracing::{debug, error, info, warn};
use super::indexer::CodeIndexer;
use super::schema::SymbolDocument;
use super::symbols;
use crate::sdk::gitea::GiteaClient;
/// Index all repos for an organization (or all accessible repos).
pub async fn index_all_repos(
gitea: &GiteaClient,
indexer: &mut CodeIndexer,
admin_localpart: &str,
org: Option<&str>,
) -> anyhow::Result<u32> {
let repos = gitea
.list_repos(admin_localpart, None, org, Some(100))
.await
.map_err(|e| anyhow::anyhow!("Failed to list repos: {e}"))?;
let mut total_symbols = 0u32;
for repo in &repos {
// full_name is "owner/name"
let parts: Vec<&str> = repo.full_name.splitn(2, '/').collect();
if parts.len() != 2 {
warn!(full_name = repo.full_name.as_str(), "Invalid repo full_name");
continue;
}
let owner = parts[0];
let name = parts[1];
// Get full repo details for default_branch
let full_repo = match gitea.get_repo(admin_localpart, owner, name).await {
Ok(r) => r,
Err(e) => {
warn!(owner, name, "Failed to get repo details: {e}");
continue;
}
};
let default_branch = &full_repo.default_branch;
info!(owner, name, branch = default_branch.as_str(), "Indexing repo");
match index_repo(gitea, indexer, admin_localpart, owner, name, &default_branch).await {
Ok(count) => {
total_symbols += count;
info!(owner, name, count, "Indexed repo symbols");
}
Err(e) => {
warn!(owner, name, "Failed to index repo: {e}");
}
}
}
indexer.flush().await;
Ok(total_symbols)
}
/// Index a single repo at a given branch.
pub async fn index_repo(
gitea: &GiteaClient,
indexer: &mut CodeIndexer,
localpart: &str,
owner: &str,
repo: &str,
branch: &str,
) -> anyhow::Result<u32> {
// Delete existing symbols for this repo+branch before re-indexing
indexer.delete_branch(repo, branch).await;
let mut count = 0u32;
let mut dirs_to_visit = vec![String::new()]; // start at repo root
while let Some(dir_path) = dirs_to_visit.pop() {
let entries = match gitea
.get_file(localpart, owner, repo, &dir_path, Some(branch))
.await
{
Ok(content) => content,
Err(e) => {
debug!(owner, repo, path = dir_path.as_str(), "Failed to list directory: {e}");
continue;
}
};
// get_file returns a JSON string — parse as array of entries
let entries_json: serde_json::Value =
serde_json::from_str(&serde_json::to_string(&entries).unwrap_or_default())
.unwrap_or_default();
// If it's a single file response (not a directory listing), skip
if !entries_json.is_array() {
continue;
}
let items = entries_json.as_array().unwrap();
for item in items {
let name = item["name"].as_str().unwrap_or("");
let path = item["path"].as_str().unwrap_or("");
let file_type = item["type"].as_str().unwrap_or("");
// Skip hidden, vendor, build dirs
if name.starts_with('.')
|| name == "target"
|| name == "vendor"
|| name == "node_modules"
|| name == "dist"
|| name == "__pycache__"
|| name == ".git"
{
continue;
}
if file_type == "dir" {
dirs_to_visit.push(path.to_string());
} else if file_type == "file" {
// Check if it's a supported source file
let lang = symbols::detect_language(path);
if lang.is_none() {
continue;
}
// Skip large files
let size = item["size"].as_u64().unwrap_or(0);
if size > 100_000 {
continue;
}
// Fetch file content
let content = match fetch_file_content(gitea, localpart, owner, repo, path, branch).await {
Some(c) => c,
None => continue,
};
// Extract symbols
let syms = symbols::extract_symbols(path, &content);
let now = chrono::Utc::now().timestamp_millis();
for sym in syms {
// Build content snippet for embedding
let body = extract_body(&content, sym.start_line, sym.end_line);
indexer
.add(SymbolDocument {
file_path: path.to_string(),
repo_owner: Some(owner.to_string()),
repo_name: repo.to_string(),
language: sym.language,
symbol_name: sym.name,
symbol_kind: sym.kind,
signature: sym.signature,
docstring: sym.docstring,
start_line: sym.start_line,
end_line: sym.end_line,
content: body,
branch: branch.to_string(),
source: "gitea".into(),
indexed_at: now,
})
.await;
count += 1;
}
}
}
}
Ok(count)
}
/// Fetch and decode a file's content from Gitea (base64-encoded API response).
async fn fetch_file_content(
gitea: &GiteaClient,
localpart: &str,
owner: &str,
repo: &str,
path: &str,
branch: &str,
) -> Option<String> {
let response = gitea
.get_file(localpart, owner, repo, path, Some(branch))
.await
.ok()?;
// The response is a JSON string — parse it
let json_str = serde_json::to_string(&response).ok()?;
let json: serde_json::Value = serde_json::from_str(&json_str).ok()?;
// Content is base64-encoded
let encoded = json["content"].as_str()?;
let cleaned = encoded.replace('\n', ""); // Gitea adds newlines in base64
let decoded = base64::Engine::decode(&base64::engine::general_purpose::STANDARD, &cleaned).ok()?;
String::from_utf8(decoded).ok()
}
/// Extract the body of a symbol from source content.
fn extract_body(content: &str, start_line: u32, end_line: u32) -> String {
let lines: Vec<&str> = content.lines().collect();
let start = (start_line as usize).saturating_sub(1);
let end = (end_line as usize).min(lines.len());
let body = lines[start..end].join("\n");
if body.len() > 500 {
format!("{}", &body[..497])
} else {
body
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_extract_body() {
let content = "line 1\nline 2\nline 3\nline 4\nline 5";
assert_eq!(extract_body(content, 2, 4), "line 2\nline 3\nline 4");
}
#[test]
fn test_extract_body_truncation() {
let long_content: String = (0..100).map(|i| format!("line {i} with some content to make it longer")).collect::<Vec<_>>().join("\n");
let body = extract_body(&long_content, 1, 100);
assert!(body.len() <= 501);
assert!(body.ends_with('…'));
}
}

View File

@@ -3,5 +3,7 @@
//! Indexes symbols (functions, structs, enums, traits) with their signatures,
//! docstrings, and body content. Supports branch-aware semantic search.
pub mod schema;
pub mod gitea;
pub mod indexer;
pub mod schema;
pub mod symbols;

659
src/code_index/symbols.rs Normal file
View File

@@ -0,0 +1,659 @@
//! Symbol extraction from source code using tree-sitter.
//!
//! Extracts function signatures, struct/enum/trait definitions, and
//! docstrings from Rust, TypeScript, and Python files. These symbols
//! are sent to Sol for indexing in the code search index.
use std::path::Path;
use tracing::debug;
/// An extracted code symbol with file context.
#[derive(Debug, Clone)]
pub struct ProjectSymbol {
pub file_path: String, // relative to project root
pub name: String,
pub kind: String,
pub signature: String,
pub docstring: String,
pub start_line: u32,
pub end_line: u32,
pub language: String,
pub content: String,
}
/// Extract symbols from all source files in a project.
pub fn extract_project_symbols(project_root: &str) -> Vec<ProjectSymbol> {
let root = Path::new(project_root);
let mut symbols = Vec::new();
walk_directory(root, root, &mut symbols);
debug!(count = symbols.len(), "Extracted project symbols");
symbols
}
fn walk_directory(dir: &Path, root: &Path, symbols: &mut Vec<ProjectSymbol>) {
let Ok(entries) = std::fs::read_dir(dir) else { return };
for entry in entries.flatten() {
let path = entry.path();
let name = entry.file_name().to_string_lossy().to_string();
// Skip hidden, vendor, target, node_modules, etc.
if name.starts_with('.') || name == "target" || name == "vendor"
|| name == "node_modules" || name == "dist" || name == "build"
|| name == "__pycache__" || name == ".git"
{
continue;
}
if path.is_dir() {
walk_directory(&path, root, symbols);
} else if path.is_file() {
let path_str = path.to_string_lossy().to_string();
if detect_language(&path_str).is_some() {
// Read file (skip large files)
if let Ok(content) = std::fs::read_to_string(&path) {
if content.len() > 100_000 { continue; } // skip >100KB
let rel_path = path.strip_prefix(root)
.map(|p| p.to_string_lossy().to_string())
.unwrap_or(path_str.clone());
for sym in extract_symbols(&path_str, &content) {
// Build content: signature + body up to 500 chars
let body_start = content.lines()
.take(sym.start_line as usize - 1)
.map(|l| l.len() + 1)
.sum::<usize>();
let body_end = content.lines()
.take(sym.end_line as usize)
.map(|l| l.len() + 1)
.sum::<usize>()
.min(content.len());
let body = &content[body_start..body_end];
let truncated = if body.len() > 500 {
format!("{}", &body[..497])
} else {
body.to_string()
};
symbols.push(ProjectSymbol {
file_path: rel_path.clone(),
name: sym.name,
kind: sym.kind,
signature: sym.signature,
docstring: sym.docstring,
start_line: sym.start_line,
end_line: sym.end_line,
language: sym.language,
content: truncated,
});
}
}
}
}
}
}
/// An extracted code symbol.
#[derive(Debug, Clone)]
pub struct CodeSymbol {
pub name: String,
pub kind: String, // "function", "struct", "enum", "trait", "class", "interface", "method"
pub signature: String, // full signature line
pub docstring: String, // doc comment / docstring
pub start_line: u32, // 1-based
pub end_line: u32, // 1-based
pub language: String,
}
/// Detect language from file extension.
pub fn detect_language(path: &str) -> Option<&'static str> {
let ext = Path::new(path).extension()?.to_str()?;
match ext {
"rs" => Some("rust"),
"ts" | "tsx" => Some("typescript"),
"js" | "jsx" => Some("javascript"),
"py" => Some("python"),
_ => None,
}
}
/// Extract symbols from a source file's content.
pub fn extract_symbols(path: &str, content: &str) -> Vec<CodeSymbol> {
let Some(lang) = detect_language(path) else {
return Vec::new();
};
match lang {
"rust" => extract_rust_symbols(content),
"typescript" | "javascript" => extract_ts_symbols(content),
"python" => extract_python_symbols(content),
_ => Vec::new(),
}
}
// ── Rust ────────────────────────────────────────────────────────────────
fn extract_rust_symbols(content: &str) -> Vec<CodeSymbol> {
let mut parser = tree_sitter::Parser::new();
parser.set_language(&tree_sitter_rust::LANGUAGE.into()).ok();
let Some(tree) = parser.parse(content, None) else {
return Vec::new();
};
let mut symbols = Vec::new();
let root = tree.root_node();
let bytes = content.as_bytes();
walk_rust_node(root, bytes, content, &mut symbols);
symbols
}
fn walk_rust_node(
node: tree_sitter::Node,
bytes: &[u8],
source: &str,
symbols: &mut Vec<CodeSymbol>,
) {
match node.kind() {
"function_item" | "function_signature_item" => {
if let Some(sym) = extract_rust_function(node, bytes, source) {
symbols.push(sym);
}
}
"struct_item" => {
if let Some(sym) = extract_rust_type(node, bytes, source, "struct") {
symbols.push(sym);
}
}
"enum_item" => {
if let Some(sym) = extract_rust_type(node, bytes, source, "enum") {
symbols.push(sym);
}
}
"trait_item" => {
if let Some(sym) = extract_rust_type(node, bytes, source, "trait") {
symbols.push(sym);
}
}
"impl_item" => {
// Walk impl methods
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
if child.kind() == "declaration_list" {
walk_rust_node(child, bytes, source, symbols);
}
}
}
}
_ => {
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
walk_rust_node(child, bytes, source, symbols);
}
}
}
}
}
fn extract_rust_function(node: tree_sitter::Node, bytes: &[u8], source: &str) -> Option<CodeSymbol> {
let name = node.child_by_field_name("name")?;
let name_str = name.utf8_text(bytes).ok()?.to_string();
// Build signature: everything from start to the opening brace (or end if no body)
let start_byte = node.start_byte();
let sig_end = find_rust_sig_end(node, source);
let signature = source[start_byte..sig_end].trim().to_string();
// Extract doc comment (line comments starting with /// before the function)
let docstring = extract_rust_doc_comment(node, source);
Some(CodeSymbol {
name: name_str,
kind: "function".into(),
signature,
docstring,
start_line: node.start_position().row as u32 + 1,
end_line: node.end_position().row as u32 + 1,
language: "rust".into(),
})
}
fn extract_rust_type(node: tree_sitter::Node, bytes: &[u8], source: &str, kind: &str) -> Option<CodeSymbol> {
let name = node.child_by_field_name("name")?;
let name_str = name.utf8_text(bytes).ok()?.to_string();
// Signature: first line of the definition
let start = node.start_byte();
let first_line_end = source[start..].find('\n').map(|i| start + i).unwrap_or(node.end_byte());
let signature = source[start..first_line_end].trim().to_string();
let docstring = extract_rust_doc_comment(node, source);
Some(CodeSymbol {
name: name_str,
kind: kind.into(),
signature,
docstring,
start_line: node.start_position().row as u32 + 1,
end_line: node.end_position().row as u32 + 1,
language: "rust".into(),
})
}
fn find_rust_sig_end(node: tree_sitter::Node, source: &str) -> usize {
// Find the opening brace
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
if child.kind() == "block" || child.kind() == "field_declaration_list"
|| child.kind() == "enum_variant_list" || child.kind() == "declaration_list"
{
return child.start_byte();
}
}
}
// No body (e.g., trait method signature)
node.end_byte().min(source.len())
}
fn extract_rust_doc_comment(node: tree_sitter::Node, source: &str) -> String {
let start_line = node.start_position().row;
if start_line == 0 {
return String::new();
}
let lines: Vec<&str> = source.lines().collect();
let mut doc_lines = Vec::new();
// Walk backwards from the line before the node
let mut line_idx = start_line.saturating_sub(1);
loop {
if line_idx >= lines.len() {
break;
}
let line = lines[line_idx].trim();
if line.starts_with("///") {
doc_lines.push(line.trim_start_matches("///").trim());
} else if line.starts_with("#[") || line.is_empty() {
// Skip attributes and blank lines between doc and function
if line.is_empty() && !doc_lines.is_empty() {
break; // blank line after doc block = stop
}
} else {
break;
}
if line_idx == 0 {
break;
}
line_idx -= 1;
}
doc_lines.reverse();
doc_lines.join("\n")
}
// ── TypeScript / JavaScript ─────────────────────────────────────────────
fn extract_ts_symbols(content: &str) -> Vec<CodeSymbol> {
let mut parser = tree_sitter::Parser::new();
parser.set_language(&tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into()).ok();
let Some(tree) = parser.parse(content, None) else {
return Vec::new();
};
let mut symbols = Vec::new();
walk_ts_node(tree.root_node(), content.as_bytes(), content, &mut symbols);
symbols
}
fn walk_ts_node(
node: tree_sitter::Node,
bytes: &[u8],
source: &str,
symbols: &mut Vec<CodeSymbol>,
) {
match node.kind() {
"function_declaration" | "method_definition" | "arrow_function" => {
if let Some(name) = node.child_by_field_name("name") {
let name_str = name.utf8_text(bytes).unwrap_or("").to_string();
if !name_str.is_empty() {
let start = node.start_byte();
let first_line_end = source[start..].find('\n').map(|i| start + i).unwrap_or(node.end_byte());
symbols.push(CodeSymbol {
name: name_str,
kind: "function".into(),
signature: source[start..first_line_end].trim().to_string(),
docstring: String::new(), // TODO: JSDoc extraction
start_line: node.start_position().row as u32 + 1,
end_line: node.end_position().row as u32 + 1,
language: "typescript".into(),
});
}
}
}
"class_declaration" | "interface_declaration" | "type_alias_declaration" | "enum_declaration" => {
if let Some(name) = node.child_by_field_name("name") {
let name_str = name.utf8_text(bytes).unwrap_or("").to_string();
let kind = match node.kind() {
"class_declaration" => "class",
"interface_declaration" => "interface",
"enum_declaration" => "enum",
_ => "type",
};
let start = node.start_byte();
let first_line_end = source[start..].find('\n').map(|i| start + i).unwrap_or(node.end_byte());
symbols.push(CodeSymbol {
name: name_str,
kind: kind.into(),
signature: source[start..first_line_end].trim().to_string(),
docstring: String::new(),
start_line: node.start_position().row as u32 + 1,
end_line: node.end_position().row as u32 + 1,
language: "typescript".into(),
});
}
}
_ => {}
}
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
walk_ts_node(child, bytes, source, symbols);
}
}
}
// ── Python ──────────────────────────────────────────────────────────────
fn extract_python_symbols(content: &str) -> Vec<CodeSymbol> {
let mut parser = tree_sitter::Parser::new();
parser.set_language(&tree_sitter_python::LANGUAGE.into()).ok();
let Some(tree) = parser.parse(content, None) else {
return Vec::new();
};
let mut symbols = Vec::new();
walk_python_node(tree.root_node(), content.as_bytes(), content, &mut symbols);
symbols
}
fn walk_python_node(
node: tree_sitter::Node,
bytes: &[u8],
source: &str,
symbols: &mut Vec<CodeSymbol>,
) {
match node.kind() {
"function_definition" => {
if let Some(name) = node.child_by_field_name("name") {
let name_str = name.utf8_text(bytes).unwrap_or("").to_string();
let start = node.start_byte();
let first_line_end = source[start..].find('\n').map(|i| start + i).unwrap_or(node.end_byte());
let docstring = extract_python_docstring(node, bytes);
symbols.push(CodeSymbol {
name: name_str,
kind: "function".into(),
signature: source[start..first_line_end].trim().to_string(),
docstring,
start_line: node.start_position().row as u32 + 1,
end_line: node.end_position().row as u32 + 1,
language: "python".into(),
});
}
}
"class_definition" => {
if let Some(name) = node.child_by_field_name("name") {
let name_str = name.utf8_text(bytes).unwrap_or("").to_string();
let start = node.start_byte();
let first_line_end = source[start..].find('\n').map(|i| start + i).unwrap_or(node.end_byte());
let docstring = extract_python_docstring(node, bytes);
symbols.push(CodeSymbol {
name: name_str,
kind: "class".into(),
signature: source[start..first_line_end].trim().to_string(),
docstring,
start_line: node.start_position().row as u32 + 1,
end_line: node.end_position().row as u32 + 1,
language: "python".into(),
});
}
}
_ => {}
}
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
walk_python_node(child, bytes, source, symbols);
}
}
}
fn extract_python_docstring(node: tree_sitter::Node, bytes: &[u8]) -> String {
// Python docstrings are the first expression_statement in the body
if let Some(body) = node.child_by_field_name("body") {
if let Some(first_stmt) = body.child(0) {
if first_stmt.kind() == "expression_statement" {
if let Some(expr) = first_stmt.child(0) {
if expr.kind() == "string" {
let text = expr.utf8_text(bytes).unwrap_or("");
// Strip triple quotes
let trimmed = text
.trim_start_matches("\"\"\"")
.trim_start_matches("'''")
.trim_end_matches("\"\"\"")
.trim_end_matches("'''")
.trim();
return trimmed.to_string();
}
}
}
}
}
String::new()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_detect_language() {
assert_eq!(detect_language("src/main.rs"), Some("rust"));
assert_eq!(detect_language("app.ts"), Some("typescript"));
assert_eq!(detect_language("app.tsx"), Some("typescript"));
assert_eq!(detect_language("script.py"), Some("python"));
assert_eq!(detect_language("script.js"), Some("javascript"));
assert_eq!(detect_language("data.json"), None);
assert_eq!(detect_language("README.md"), None);
}
#[test]
fn test_extract_rust_function() {
let source = r#"
/// Generate a response.
pub async fn generate(&self, req: &GenerateRequest) -> Option<String> {
self.run_and_emit(req).await
}
"#;
let symbols = extract_rust_symbols(source);
assert!(!symbols.is_empty(), "Should extract at least one symbol");
let func = &symbols[0];
assert_eq!(func.name, "generate");
assert_eq!(func.kind, "function");
assert!(func.signature.contains("pub async fn generate"));
assert!(func.docstring.contains("Generate a response"));
assert_eq!(func.language, "rust");
}
#[test]
fn test_extract_rust_struct() {
let source = r#"
/// A request to generate.
pub struct GenerateRequest {
pub text: String,
pub user_id: String,
}
"#;
let symbols = extract_rust_symbols(source);
let structs: Vec<_> = symbols.iter().filter(|s| s.kind == "struct").collect();
assert!(!structs.is_empty());
assert_eq!(structs[0].name, "GenerateRequest");
assert!(structs[0].docstring.contains("request to generate"));
}
#[test]
fn test_extract_rust_enum() {
let source = r#"
/// Whether server or client.
pub enum ToolSide {
Server,
Client,
}
"#;
let symbols = extract_rust_symbols(source);
let enums: Vec<_> = symbols.iter().filter(|s| s.kind == "enum").collect();
assert!(!enums.is_empty());
assert_eq!(enums[0].name, "ToolSide");
}
#[test]
fn test_extract_rust_trait() {
let source = r#"
pub trait Executor {
fn execute(&self, args: &str) -> String;
}
"#;
let symbols = extract_rust_symbols(source);
let traits: Vec<_> = symbols.iter().filter(|s| s.kind == "trait").collect();
assert!(!traits.is_empty());
assert_eq!(traits[0].name, "Executor");
}
#[test]
fn test_extract_rust_impl_methods() {
let source = r#"
impl Orchestrator {
/// Create new.
pub fn new(config: Config) -> Self {
Self { config }
}
/// Subscribe to events.
pub fn subscribe(&self) -> Receiver {
self.tx.subscribe()
}
}
"#;
let symbols = extract_rust_symbols(source);
let fns: Vec<_> = symbols.iter().filter(|s| s.kind == "function").collect();
assert!(fns.len() >= 2, "Should find impl methods, got {}", fns.len());
let names: Vec<&str> = fns.iter().map(|s| s.name.as_str()).collect();
assert!(names.contains(&"new"));
assert!(names.contains(&"subscribe"));
}
#[test]
fn test_extract_ts_function() {
let source = r#"
function greet(name: string): string {
return `Hello, ${name}`;
}
"#;
let symbols = extract_ts_symbols(source);
assert!(!symbols.is_empty());
assert_eq!(symbols[0].name, "greet");
assert_eq!(symbols[0].kind, "function");
}
#[test]
fn test_extract_ts_class() {
let source = r#"
class UserService {
constructor(private db: Database) {}
async getUser(id: string): Promise<User> {
return this.db.find(id);
}
}
"#;
let symbols = extract_ts_symbols(source);
let classes: Vec<_> = symbols.iter().filter(|s| s.kind == "class").collect();
assert!(!classes.is_empty());
assert_eq!(classes[0].name, "UserService");
}
#[test]
fn test_extract_ts_interface() {
let source = r#"
interface User {
id: string;
name: string;
email?: string;
}
"#;
let symbols = extract_ts_symbols(source);
let ifaces: Vec<_> = symbols.iter().filter(|s| s.kind == "interface").collect();
assert!(!ifaces.is_empty());
assert_eq!(ifaces[0].name, "User");
}
#[test]
fn test_extract_python_function() {
let source = r#"
def process_data(items: list[str]) -> dict:
"""Process a list of items into a dictionary."""
return {item: len(item) for item in items}
"#;
let symbols = extract_python_symbols(source);
assert!(!symbols.is_empty());
assert_eq!(symbols[0].name, "process_data");
assert_eq!(symbols[0].kind, "function");
assert!(symbols[0].docstring.contains("Process a list"));
}
#[test]
fn test_extract_python_class() {
let source = r#"
class DataProcessor:
"""Processes data from various sources."""
def __init__(self, config):
self.config = config
def run(self):
pass
"#;
let symbols = extract_python_symbols(source);
let classes: Vec<_> = symbols.iter().filter(|s| s.kind == "class").collect();
assert!(!classes.is_empty());
assert_eq!(classes[0].name, "DataProcessor");
assert!(classes[0].docstring.contains("Processes data"));
}
#[test]
fn test_extract_symbols_unknown_language() {
let symbols = extract_symbols("data.json", "{}");
assert!(symbols.is_empty());
}
#[test]
fn test_extract_symbols_empty_file() {
let symbols = extract_symbols("empty.rs", "");
assert!(symbols.is_empty());
}
#[test]
fn test_line_numbers_are_1_based() {
let source = "fn first() {}\nfn second() {}\nfn third() {}";
let symbols = extract_rust_symbols(source);
assert!(symbols.len() >= 3);
assert_eq!(symbols[0].start_line, 1);
assert_eq!(symbols[1].start_line, 2);
assert_eq!(symbols[2].start_line, 3);
}
}

View File

@@ -27,6 +27,107 @@ impl CodeAgentService {
impl CodeAgent for CodeAgentService {
type SessionStream = Pin<Box<dyn Stream<Item = Result<ServerMessage, Status>> + Send>>;
async fn reindex_code(
&self,
request: Request<ReindexCodeRequest>,
) -> Result<Response<ReindexCodeResponse>, Status> {
let req = request.into_inner();
info!(org = req.org.as_str(), repo = req.repo.as_str(), "Reindex code request");
let Some(ref os) = self.state.opensearch else {
return Ok(Response::new(ReindexCodeResponse {
repos_indexed: 0,
symbols_indexed: 0,
error: "OpenSearch not configured".into(),
}));
};
let Some(ref gitea_config) = self.state.config.services.gitea else {
return Ok(Response::new(ReindexCodeResponse {
repos_indexed: 0,
symbols_indexed: 0,
error: "Gitea not configured".into(),
}));
};
// Use the GiteaClient from the tool registry (already has auth configured)
let gitea = match self.state.tools.gitea_client() {
Some(g) => g,
None => {
return Ok(Response::new(ReindexCodeResponse {
repos_indexed: 0,
symbols_indexed: 0,
error: "Gitea client not available".into(),
}));
}
};
let admin_user = "sol"; // Sol's own Gitea identity
let index_name = self.state.code_index_name();
// Ensure index exists
if let Err(e) = crate::code_index::schema::create_index_if_not_exists(os, &index_name).await {
return Ok(Response::new(ReindexCodeResponse {
repos_indexed: 0,
symbols_indexed: 0,
error: format!("Failed to create index: {e}"),
}));
}
let mut indexer = crate::code_index::indexer::CodeIndexer::new(
os.clone(), index_name, String::new(), 50,
);
let org = if req.org.is_empty() { None } else { Some(req.org.as_str()) };
if !req.repo.is_empty() {
let parts: Vec<&str> = req.repo.splitn(2, '/').collect();
let (owner, name) = if parts.len() == 2 {
(parts[0], parts[1])
} else {
return Ok(Response::new(ReindexCodeResponse {
repos_indexed: 0,
symbols_indexed: 0,
error: "repo must be 'owner/name' format".into(),
}));
};
let branch = if req.branch.is_empty() { "main" } else { &req.branch };
match crate::code_index::gitea::index_repo(
gitea, &mut indexer, admin_user, owner, name, branch
).await {
Ok(count) => {
indexer.flush().await;
Ok(Response::new(ReindexCodeResponse {
repos_indexed: 1,
symbols_indexed: count,
error: String::new(),
}))
}
Err(e) => Ok(Response::new(ReindexCodeResponse {
repos_indexed: 0,
symbols_indexed: 0,
error: e.to_string(),
})),
}
} else {
// Index all repos
match crate::code_index::gitea::index_all_repos(
gitea, &mut indexer, admin_user, org
).await {
Ok(count) => Ok(Response::new(ReindexCodeResponse {
repos_indexed: 0, // TODO: count repos
symbols_indexed: count,
error: String::new(),
})),
Err(e) => Ok(Response::new(ReindexCodeResponse {
repos_indexed: 0,
symbols_indexed: 0,
error: e.to_string(),
})),
}
}
}
async fn session(
&self,
request: Request<Streaming<ClientMessage>>,

View File

@@ -73,6 +73,10 @@ impl ToolRegistry {
}
}
pub fn gitea_client(&self) -> Option<&Arc<crate::sdk::gitea::GiteaClient>> {
self.gitea.as_ref()
}
pub fn has_gitea(&self) -> bool {
self.gitea.is_some()
}