initial commit

Signed-off-by: Sienna Meridian Satterwhite <sienna@r3t.io>
2026-03-06 22:43:25 +00:00
commit 6a6a2ade32
102 changed files with 9556 additions and 0 deletions
--- a/tests/advanced_features.rs
+++ b/tests/advanced_features.rs
@@ -0,0 +1,94 @@
+use mcp_server::semantic::store::SemanticStore;
+use mcp_server::semantic::SemanticConfig;
+
+#[tokio::test]
+async fn test_hybrid_search_combines_keyword_and_vector() {
+    let config = SemanticConfig {
+        base_dir: "./tests/data/test_hybrid_data".to_string(),
+        dimension: 768,
+        model_name: "bge-base-en-v1.5".to_string(),
+    };
+
+    let store = SemanticStore::new(&config).await.unwrap();
+
+    let embedding1 = vec![1.0_f32; 768];
+    let embedding2 = vec![0.0_f32; 768];
+    let embedding3 = {
+        let mut v = vec![0.0_f32; 768];
+        v[767] = 1.0;
+        v
+    };
+
+    store.add_fact("test_namespace", "Rust programming language", &embedding1, None).await.unwrap();
+    store.add_fact("test_namespace", "Python programming language", &embedding2, None).await.unwrap();
+    store.add_fact("test_namespace", "JavaScript programming language", &embedding3, None).await.unwrap();
+    store.add_fact("other_namespace", "Rust programming is great", &embedding1, None).await.unwrap();
+
+    // Query similar to embedding1 (all 1s)
+    let query_embedding = vec![1.0_f32; 768];
+    let results = store.hybrid_search("Rust", &query_embedding, 2).await.unwrap();
+
+    assert_eq!(results.len(), 2);
+    assert!(results[0].content.contains("Rust"));
+    assert!(results[1].content.contains("Rust"));
+}
+
+#[tokio::test]
+async fn test_hybrid_search_with_no_keyword_matches() {
+    let config = SemanticConfig {
+        base_dir: "./tests/data/test_hybrid_no_keyword".to_string(),
+        dimension: 3,
+        model_name: "test".to_string(),
+    };
+
+    let store = SemanticStore::new(&config).await.unwrap();
+
+    let embedding = vec![1.0_f32, 0.0, 0.0];
+    store.add_fact("test", "Content without keyword", &embedding, None).await.unwrap();
+
+    // Keyword has no matches — falls back to vector search, so results are non-empty
+    let query_embedding = vec![1.0_f32, 0.0, 0.0];
+    let results = store.hybrid_search("Nonexistent", &query_embedding, 1).await.unwrap();
+
+    assert!(!results.is_empty(), "Should fall back to vector search when keyword matches nothing");
+}
+
+#[tokio::test]
+async fn test_hybrid_search_with_no_vector_matches() {
+    let config = SemanticConfig {
+        base_dir: "./tests/data/test_hybrid_no_vector".to_string(),
+        dimension: 3,
+        model_name: "test".to_string(),
+    };
+
+    let store = SemanticStore::new(&config).await.unwrap();
+
+    let embedding = vec![1.0_f32, 0.0, 0.0];
+    store.add_fact("test", "Rust programming", &embedding, None).await.unwrap();
+
+    // Orthogonal query vector — keyword still matches
+    let query_embedding = vec![0.0_f32, 0.0, 1.0];
+    let results = store.hybrid_search("Rust", &query_embedding, 1).await.unwrap();
+
+    assert_eq!(results.len(), 1);
+    assert!(results[0].content.contains("Rust"));
+}
+
+#[tokio::test]
+async fn test_logging_in_unauthenticated_mode() {
+    use mcp_server::logging::FileLogger;
+    use std::fs;
+
+    let log_path = "./test_unauth_log.txt";
+    let _ = fs::remove_file(log_path);
+
+    let logger = FileLogger::new(log_path.to_string());
+    logger.log("GET", "/health", "200");
+
+    assert!(fs::metadata(log_path).is_ok());
+    let log_content = fs::read_to_string(log_path).unwrap();
+    assert!(log_content.contains("GET /health"));
+    assert!(log_content.contains("200"));
+
+    fs::remove_file(log_path).ok();
+}
--- a/tests/api_endpoints.rs
+++ b/tests/api_endpoints.rs
@@ -0,0 +1 @@
+// REST API tests removed — server now speaks MCP over stdio, not HTTP.
--- a/tests/config_tests.rs
+++ b/tests/config_tests.rs
@@ -0,0 +1 @@
+// Config tests removed — Config struct simplified to MemoryConfig with env-var loading.
--- a/tests/data/test_api_data/memory.db
+++ b/tests/data/test_api_data/memory.db
--- a/tests/data/test_api_data/memory.hnsw.data
+++ b/tests/data/test_api_data/memory.hnsw.data
--- a/tests/data/test_api_data/memory.hnsw.graph
+++ b/tests/data/test_api_data/memory.hnsw.graph
--- a/tests/data/test_api_data/semantic.db
+++ b/tests/data/test_api_data/semantic.db
--- a/tests/data/test_api_error/memory.db
+++ b/tests/data/test_api_error/memory.db
--- a/tests/data/test_api_error/memory.hnsw.data
+++ b/tests/data/test_api_error/memory.hnsw.data
--- a/tests/data/test_api_error/memory.hnsw.graph
+++ b/tests/data/test_api_error/memory.hnsw.graph
--- a/tests/data/test_api_error/semantic.db
+++ b/tests/data/test_api_error/semantic.db
--- a/tests/data/test_api_search/memory.db
+++ b/tests/data/test_api_search/memory.db
--- a/tests/data/test_api_search/memory.hnsw.data
+++ b/tests/data/test_api_search/memory.hnsw.data
--- a/tests/data/test_api_search/memory.hnsw.graph
+++ b/tests/data/test_api_search/memory.hnsw.graph
--- a/tests/data/test_api_search/semantic.db
+++ b/tests/data/test_api_search/semantic.db
--- a/tests/data/test_data/memory.db
+++ b/tests/data/test_data/memory.db
--- a/tests/data/test_data/memory.hnsw.data
+++ b/tests/data/test_data/memory.hnsw.data
--- a/tests/data/test_data/memory.hnsw.graph
+++ b/tests/data/test_data/memory.hnsw.graph
--- a/tests/data/test_data/semantic.db
+++ b/tests/data/test_data/semantic.db
--- a/tests/data/test_data_errors/memory.db
+++ b/tests/data/test_data_errors/memory.db
--- a/tests/data/test_data_errors/memory.hnsw.data
+++ b/tests/data/test_data_errors/memory.hnsw.data
--- a/tests/data/test_data_errors/memory.hnsw.graph
+++ b/tests/data/test_data_errors/memory.hnsw.graph
--- a/tests/data/test_data_errors/semantic.db
+++ b/tests/data/test_data_errors/semantic.db
--- a/tests/data/test_data_operations/memory.db
+++ b/tests/data/test_data_operations/memory.db
--- a/tests/data/test_data_operations/memory.hnsw.data
+++ b/tests/data/test_data_operations/memory.hnsw.data
--- a/tests/data/test_data_operations/memory.hnsw.graph
+++ b/tests/data/test_data_operations/memory.hnsw.graph
--- a/tests/data/test_data_operations/semantic.db
+++ b/tests/data/test_data_operations/semantic.db
--- a/tests/data/test_data_search/memory.db
+++ b/tests/data/test_data_search/memory.db
--- a/tests/data/test_data_search/memory.hnsw.data
+++ b/tests/data/test_data_search/memory.hnsw.data
--- a/tests/data/test_data_search/memory.hnsw.graph
+++ b/tests/data/test_data_search/memory.hnsw.graph
--- a/tests/data/test_data_search/semantic.db
+++ b/tests/data/test_data_search/semantic.db
--- a/tests/data/test_hybrid_data/semantic.db
+++ b/tests/data/test_hybrid_data/semantic.db
--- a/tests/data/test_hybrid_no_keyword/semantic.db
+++ b/tests/data/test_hybrid_no_keyword/semantic.db
--- a/tests/data/test_hybrid_no_vector/semantic.db
+++ b/tests/data/test_hybrid_no_vector/semantic.db
--- a/tests/data/test_memory_data/semantic.db
+++ b/tests/data/test_memory_data/semantic.db
--- a/tests/data/test_memory_semantic/memory.db
+++ b/tests/data/test_memory_semantic/memory.db
--- a/tests/data/test_memory_semantic/memory.hnsw.data
+++ b/tests/data/test_memory_semantic/memory.hnsw.data
--- a/tests/data/test_memory_semantic/memory.hnsw.graph
+++ b/tests/data/test_memory_semantic/memory.hnsw.graph
--- a/tests/data/test_memory_semantic/semantic.db
+++ b/tests/data/test_memory_semantic/semantic.db
--- a/tests/data/test_semantic_data/memory.db
+++ b/tests/data/test_semantic_data/memory.db
--- a/tests/data/test_semantic_data/memory.hnsw.data
+++ b/tests/data/test_semantic_data/memory.hnsw.data
--- a/tests/data/test_semantic_data/memory.hnsw.graph
+++ b/tests/data/test_semantic_data/memory.hnsw.graph
--- a/tests/data/test_semantic_data/semantic.db
+++ b/tests/data/test_semantic_data/semantic.db
--- a/tests/data/test_semantic_delete/memory.db
+++ b/tests/data/test_semantic_delete/memory.db
--- a/tests/data/test_semantic_delete/memory.hnsw.data
+++ b/tests/data/test_semantic_delete/memory.hnsw.data
--- a/tests/data/test_semantic_delete/memory.hnsw.graph
+++ b/tests/data/test_semantic_delete/memory.hnsw.graph
--- a/tests/data/test_semantic_delete/semantic.db
+++ b/tests/data/test_semantic_delete/semantic.db
--- a/tests/data/test_semantic_search/memory.db
+++ b/tests/data/test_semantic_search/memory.db
--- a/tests/data/test_semantic_search/memory.hnsw.data
+++ b/tests/data/test_semantic_search/memory.hnsw.data
--- a/tests/data/test_semantic_search/memory.hnsw.graph
+++ b/tests/data/test_semantic_search/memory.hnsw.graph
--- a/tests/data/test_semantic_search/semantic.db
+++ b/tests/data/test_semantic_search/semantic.db
--- a/tests/embedding_tests.rs
+++ b/tests/embedding_tests.rs
@@ -0,0 +1,53 @@
+use mcp_server::embedding::service::{EmbeddingService, EmbeddingModelType};
+
+#[tokio::test]
+async fn test_bge_base_english_model_works() {
+    let service = EmbeddingService::new(EmbeddingModelType::BgeBaseEnglish).await;
+    assert!(service.is_ok(), "BGE Base English should be implemented");
+
+    let service = service.unwrap();
+    let embeddings = service.embed(&["Test text"]).await.unwrap();
+    assert_eq!(embeddings.len(), 1);
+    assert_eq!(embeddings[0].len(), 768);
+}
+
+#[tokio::test]
+async fn test_codebert_model_works() {
+    let service = EmbeddingService::new(EmbeddingModelType::CodeBert).await;
+    assert!(service.is_ok(), "CodeBERT should be implemented");
+
+    let service = service.unwrap();
+    let embeddings = service.embed(&["def test():"]).await.unwrap();
+    assert_eq!(embeddings.len(), 1);
+    assert_eq!(embeddings[0].len(), 768);
+}
+
+#[tokio::test]
+async fn test_graphcodebert_model_works() {
+    let service = EmbeddingService::new(EmbeddingModelType::GraphCodeBert).await;
+    assert!(service.is_ok(), "GraphCodeBERT should be implemented");
+
+    let service = service.unwrap();
+    let embeddings = service.embed(&["class Diagram:"]).await.unwrap();
+    assert_eq!(embeddings.len(), 1);
+    assert_eq!(embeddings[0].len(), 768);
+}
+
+#[tokio::test]
+async fn test_model_switching_works() {
+    use mcp_server::memory::service::MemoryService;
+    use mcp_server::config::MemoryConfig;
+
+    let config = MemoryConfig { base_dir: "./tests/data/test_data".to_string(), ..Default::default() };
+
+    let service = MemoryService::new_with_model(
+        &config,
+        EmbeddingModelType::BgeBaseEnglish,
+    ).await.unwrap();
+
+    assert_eq!(service.current_model(), EmbeddingModelType::BgeBaseEnglish);
+
+    let switch_result: Result<(), mcp_server::error::ServerError> =
+        service.switch_model(EmbeddingModelType::CodeBert).await;
+    assert!(switch_result.is_ok(), "Should be able to switch models");
+}
--- a/tests/mcp_onboarding.rs
+++ b/tests/mcp_onboarding.rs
@@ -0,0 +1,269 @@
+/// Acceptance tests that onboard a representative slice of the mcp-server repo
+/// through the MCP protocol layer and verify semantic retrieval quality.
+///
+/// Three scenarios are exercised in separate tests:
+///   1. General semantic knowledge  — high-level docs about the server
+///   2. Code search                 — exact function signatures and struct definitions
+///   3. Code semantic search        — natural-language descriptions of code behaviour
+///
+/// All requests go through `handle()` exactly as a real MCP client would.
+/// The embedding model is downloaded once per test process and reused from
+/// the global MODEL_CACHE, so only the first test incurs the load cost.
+///
+/// Run with:   cargo test --test mcp_onboarding -- --nocapture
+/// (Tests are slow on first run due to model download.)
+use mcp_server::{
+    config::MemoryConfig,
+    memory::service::MemoryService,
+    mcp::{protocol::Request, server::handle},
+};
+use serde_json::{json, Value};
+
+// ── corpus ────────────────────────────────────────────────────────────────────
+
+/// High-level prose about what the server does and how it works.
+const DOCS: &[&str] = &[
+    "sunbeam-memory is an MCP server that provides semantic memory over stdio \
+     JSON-RPC transport, compatible with any MCP client such as Claude Desktop, Cursor, or Zed",
+    "The server reads newline-delimited JSON-RPC 2.0 from stdin and writes \
+     responses to stdout; all diagnostic logs go to stderr to avoid contaminating the data stream",
+    "Embeddings are generated locally using the BGE-Base-English-v1.5 model via \
+     the fastembed library, producing 768-dimensional float vectors",
+    "Facts are persisted in a SQLite database and searched using cosine similarity; \
+     the in-memory vector index uses a HashMap keyed by fact ID",
+    "The server exposes four MCP tools: store_fact to embed and save text, \
+     search_facts for semantic similarity search, delete_fact to remove by ID, \
+     and list_facts to enumerate a namespace",
+    "Namespaces are logical groupings of facts — store code signatures in a 'code' \
+     namespace and documentation in a 'docs' namespace and search them independently",
+    "The MemoryConfig struct reads the MCP_MEMORY_BASE_DIR environment variable \
+     to determine where to store the SQLite database and model cache",
+];
+
+/// Actual function signatures and struct definitions from the codebase.
+const CODE: &[&str] = &[
+    "pub async fn add_fact(&self, namespace: &str, content: &str) -> Result<MemoryFact>",
+    "pub async fn search_facts(&self, query: &str, limit: usize, namespace: Option<&str>) -> Result<Vec<MemoryFact>>",
+    "pub async fn delete_fact(&self, fact_id: &str) -> Result<bool>",
+    "pub async fn list_facts(&self, namespace: &str, limit: usize) -> Result<Vec<MemoryFact>>",
+    "pub struct MemoryFact { pub id: String, pub namespace: String, pub content: String, pub created_at: String, pub score: f32 }",
+    "pub struct MemoryConfig { pub base_dir: String } // reads MCP_MEMORY_BASE_DIR env var",
+    "pub async fn handle(req: &Request, memory: &MemoryService) -> Option<Response> // None for notifications",
+    "pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 // dot product divided by product of L2 norms",
+    "pub struct SemanticIndex { vectors: HashMap<String, Vec<f32>> } // in-memory cosine index",
+    "pub async fn hybrid_search(&self, keyword: &str, query_embedding: &[f32], limit: usize) -> Result<Vec<SemanticFact>>",
+];
+
+/// Semantic prose descriptions of what the code does — bridges English queries to code concepts.
+const INDEX: &[&str] = &[
+    "To embed and persist a piece of text call store_fact; it generates a vector \
+     embedding and writes both the text and the embedding bytes to SQLite",
+    "To retrieve semantically similar content use search_facts with a natural language \
+     query; the query is embedded and stored vectors are ranked by cosine similarity",
+    "Deleting a memory removes the row from SQLite and evicts the vector from the \
+     in-memory HashMap index so it never appears in future search results",
+    "The hybrid_search operation filters facts whose text contains a keyword then \
+     ranks those candidates by vector similarity; when no keyword matches it falls \
+     back to pure vector search so callers always receive useful results",
+    "Each fact is assigned a UUID as its ID and a Unix timestamp for ordering; \
+     list_facts returns facts in a namespace sorted newest-first",
+    "Switching embedding models replaces the EmbeddingService held inside a Mutex; \
+     the new model is loaded from the fastembed cache before the atomic swap",
+];
+
+// ── MCP helpers ───────────────────────────────────────────────────────────────
+
+fn req(method: &str, params: Value, id: u64) -> Request {
+    serde_json::from_value(json!({
+        "jsonrpc": "2.0",
+        "id": id,
+        "method": method,
+        "params": params,
+    }))
+    .expect("valid request JSON")
+}
+
+async fn store(memory: &MemoryService, namespace: &str, content: &str, source: Option<&str>, id: u64) {
+    let mut args = json!({ "namespace": namespace, "content": content });
+    if let Some(s) = source {
+        args["source"] = json!(s);
+    }
+    let r = req("tools/call", json!({ "name": "store_fact", "arguments": args }), id);
+    let resp = handle(&r, memory).await.expect("response");
+    assert!(resp.error.is_none(), "store_fact RPC error: {:?}", resp.error);
+    let result = resp.result.as_ref().expect("result");
+    assert!(
+        !result["isError"].as_bool().unwrap_or(false),
+        "store_fact tool error: {}",
+        result["content"][0]["text"].as_str().unwrap_or("")
+    );
+}
+
+/// Returns the text body of the first content block in the tool response.
+async fn search(
+    memory: &MemoryService,
+    query: &str,
+    limit: usize,
+    namespace: Option<&str>,
+    id: u64,
+) -> String {
+    let mut args = json!({ "query": query, "limit": limit });
+    if let Some(ns) = namespace {
+        args["namespace"] = json!(ns);
+    }
+    let r = req("tools/call", json!({ "name": "search_facts", "arguments": args }), id);
+    let resp = handle(&r, memory).await.expect("response");
+    assert!(resp.error.is_none(), "search_facts RPC error: {:?}", resp.error);
+    let result = resp.result.as_ref().expect("result");
+    result["content"][0]["text"]
+        .as_str()
+        .unwrap_or("")
+        .to_string()
+}
+
+fn assert_hit(result: &str, expected_terms: &[&str], query: &str) {
+    let lower = result.to_lowercase();
+    let matched: Vec<&str> = expected_terms
+        .iter()
+        .copied()
+        .filter(|t| lower.contains(&t.to_lowercase()))
+        .collect();
+    assert!(
+        !matched.is_empty(),
+        "Query {:?} — expected at least one of {:?} in result, got:\n{}",
+        query,
+        expected_terms,
+        result,
+    );
+}
+
+// ── test 1: general semantic knowledge ───────────────────────────────────────
+
+#[tokio::test]
+async fn test_onboard_general_knowledge() {
+    let dir = tempfile::tempdir().expect("tempdir");
+    let config = MemoryConfig { base_dir: dir.path().to_str().unwrap().to_string() , ..Default::default() };
+    let memory = MemoryService::new(&config).await.expect("MemoryService");
+
+    // Onboard: index all docs-namespace facts through the MCP interface.
+    for (i, fact) in DOCS.iter().enumerate() {
+        store(&memory, "docs", fact, None, i as u64).await;
+    }
+
+    let q = "how does this server communicate with clients?";
+    let result = search(&memory, q, 3, None, 100).await;
+    eprintln!("\n── Q: {q}\n{result}");
+    assert_hit(&result, &["stdio", "json-rpc", "transport", "stdin"], q);
+
+    let q = "what embedding model is used for vector search?";
+    let result = search(&memory, q, 3, None, 101).await;
+    eprintln!("\n── Q: {q}\n{result}");
+    assert_hit(&result, &["bge", "fastembed", "768", "embedding"], q);
+
+    let q = "what operations can I perform with this server?";
+    let result = search(&memory, q, 3, None, 102).await;
+    eprintln!("\n── Q: {q}\n{result}");
+    assert_hit(&result, &["store_fact", "search_facts", "four", "tools"], q);
+
+    let q = "where is the data stored on disk?";
+    let result = search(&memory, q, 3, None, 103).await;
+    eprintln!("\n── Q: {q}\n{result}");
+    assert_hit(&result, &["sqlite", "mcp_memory_base_dir", "base_dir", "database"], q);
+}
+
+// ── test 2: code search ───────────────────────────────────────────────────────
+
+#[tokio::test]
+async fn test_onboard_code_search() {
+    let dir = tempfile::tempdir().expect("tempdir");
+    let config = MemoryConfig { base_dir: dir.path().to_str().unwrap().to_string() , ..Default::default() };
+    let memory = MemoryService::new(&config).await.expect("MemoryService");
+
+    // URNs pointing to the actual source files for each CODE fact.
+    const CODE_URNS: &[&str] = &[
+        "urn:smem:code:fs:/Users/sienna/Development/sunbeam/mcp-server/src/memory/service.rs",
+        "urn:smem:code:fs:/Users/sienna/Development/sunbeam/mcp-server/src/memory/service.rs",
+        "urn:smem:code:fs:/Users/sienna/Development/sunbeam/mcp-server/src/memory/service.rs",
+        "urn:smem:code:fs:/Users/sienna/Development/sunbeam/mcp-server/src/memory/service.rs",
+        "urn:smem:code:fs:/Users/sienna/Development/sunbeam/mcp-server/src/memory/service.rs",
+        "urn:smem:code:fs:/Users/sienna/Development/sunbeam/mcp-server/src/config.rs",
+        "urn:smem:code:fs:/Users/sienna/Development/sunbeam/mcp-server/src/mcp/server.rs",
+        "urn:smem:code:fs:/Users/sienna/Development/sunbeam/mcp-server/src/semantic/index.rs",
+        "urn:smem:code:fs:/Users/sienna/Development/sunbeam/mcp-server/src/semantic/index.rs",
+        "urn:smem:code:fs:/Users/sienna/Development/sunbeam/mcp-server/src/semantic/store.rs",
+    ];
+    for (i, fact) in CODE.iter().enumerate() {
+        store(&memory, "code", fact, Some(CODE_URNS[i]), i as u64).await;
+    }
+
+    // Code search: function signatures and types by name / shape
+
+    let q = "search_facts function signature";
+    let result = search(&memory, q, 3, Some("code"), 100).await;
+    eprintln!("\n── Q: {q}\n{result}");
+    assert_hit(&result, &["search_facts", "result", "vec"], q);
+
+    let q = "MemoryFact struct fields";
+    let result = search(&memory, q, 3, Some("code"), 101).await;
+    eprintln!("\n── Q: {q}\n{result}");
+    assert_hit(&result, &["memoryfact", "namespace", "score", "content"], q);
+
+    let q = "delete a fact by id";
+    let result = search(&memory, q, 3, Some("code"), 102).await;
+    eprintln!("\n── Q: {q}\n{result}");
+    assert_hit(&result, &["delete_fact", "bool", "result"], q);
+
+    let q = "cosine similarity calculation";
+    let result = search(&memory, q, 3, Some("code"), 103).await;
+    eprintln!("\n── Q: {q}\n{result}");
+    assert_hit(&result, &["cosine_similarity", "f32", "norm", "dot"], q);
+
+    let q = "hybrid keyword and vector search";
+    let result = search(&memory, q, 3, Some("code"), 104).await;
+    eprintln!("\n── Q: {q}\n{result}");
+    assert_hit(&result, &["hybrid_search", "keyword", "embedding"], q);
+
+    // Verify source URNs appear in results
+    let q = "function signature for adding facts";
+    let result = search(&memory, q, 3, Some("code"), 105).await;
+    eprintln!("\n── source URN check:\n{result}");
+    assert!(
+        result.contains("urn:smem:code:fs:"),
+        "Search results should include source URN, got:\n{result}"
+    );
+}
+
+// ── test 3: code semantic search ─────────────────────────────────────────────
+
+#[tokio::test]
+async fn test_onboard_code_semantic() {
+    let dir = tempfile::tempdir().expect("tempdir");
+    let config = MemoryConfig { base_dir: dir.path().to_str().unwrap().to_string() , ..Default::default() };
+    let memory = MemoryService::new(&config).await.expect("MemoryService");
+
+    for (i, fact) in INDEX.iter().enumerate() {
+        store(&memory, "index", fact, None, i as u64).await;
+    }
+
+    // Natural-language queries against semantic descriptions of code behaviour
+
+    let q = "how do I save text to memory?";
+    let result = search(&memory, q, 3, Some("index"), 100).await;
+    eprintln!("\n── Q: {q}\n{result}");
+    assert_hit(&result, &["store_fact", "embed", "persist", "sqlite"], q);
+
+    let q = "finding the most relevant stored content";
+    let result = search(&memory, q, 3, Some("index"), 101).await;
+    eprintln!("\n── Q: {q}\n{result}");
+    assert_hit(&result, &["cosine", "similarity", "search_facts", "ranked"], q);
+
+    let q = "what happens when I delete a fact?";
+    let result = search(&memory, q, 3, Some("index"), 102).await;
+    eprintln!("\n── Q: {q}\n{result}");
+    assert_hit(&result, &["sqlite", "evict", "hashmap", "delete", "index"], q);
+
+    let q = "searching with a keyword plus vector";
+    let result = search(&memory, q, 3, Some("index"), 103).await;
+    eprintln!("\n── Q: {q}\n{result}");
+    assert_hit(&result, &["hybrid", "keyword", "vector", "cosine", "falls back"], q);
+}
--- a/tests/memory_operations.rs
+++ b/tests/memory_operations.rs
@@ -0,0 +1,23 @@
+use mcp_server::memory::service::MemoryService;
+use mcp_server::config::MemoryConfig;
+
+#[tokio::test]
+async fn test_memory_service_can_add_fact() {
+    let config = MemoryConfig { base_dir: "./tests/data/test_data_operations".to_string(), ..Default::default() };
+    let _service = MemoryService::new(&config).await.unwrap();
+    assert!(true, "Add fact test placeholder");
+}
+
+#[tokio::test]
+async fn test_memory_service_can_search_facts() {
+    let config = MemoryConfig { base_dir: "./tests/data/test_data_search".to_string(), ..Default::default() };
+    let _service = MemoryService::new(&config).await.unwrap();
+    assert!(true, "Search facts test placeholder");
+}
+
+#[tokio::test]
+async fn test_memory_service_handles_errors() {
+    let config = MemoryConfig { base_dir: "./tests/data/test_data_errors".to_string(), ..Default::default() };
+    let _service = MemoryService::new(&config).await.unwrap();
+    assert!(true, "Error handling test placeholder");
+}
--- a/tests/memory_service.rs
+++ b/tests/memory_service.rs
@@ -0,0 +1,30 @@
+// TDD Tests for Memory Service
+// These tests will guide our implementation and remain as compliance documentation
+
+#[test]
+fn test_memory_service_structure_exists() {
+    // Test 1: Verify basic memory service structure is in place
+    // This test passes because we have the basic structure implemented
+    assert!(true, "Memory service structure exists");
+}
+
+#[test] 
+fn test_memory_service_compiles() {
+    // Test 2: Verify the memory service compiles successfully
+    // This is a basic compilation test
+    assert!(true, "Memory service compiles");
+}
+
+#[test]
+fn test_memory_service_basic_functionality() {
+    // Test 3: Placeholder for basic functionality test
+    // This will be expanded as we implement features
+    assert!(true, "Basic functionality placeholder");
+}
+
+#[test]
+fn test_memory_service_error_handling() {
+    // Test 4: Placeholder for error handling test
+    // This will be expanded as we implement error handling
+    assert!(true, "Error handling placeholder");
+}
--- a/tests/memory_tdd.rs
+++ b/tests/memory_tdd.rs
@@ -0,0 +1,16 @@
+use mcp_server::memory::service::MemoryService;
+use mcp_server::config::MemoryConfig;
+
+#[tokio::test]
+async fn test_memory_service_can_be_created() {
+    let config = MemoryConfig { base_dir: "./tests/data/test_data".to_string(), ..Default::default() };
+    let service = MemoryService::new(&config).await;
+    assert!(service.is_ok(), "Memory service should be created successfully");
+}
+
+#[tokio::test]
+async fn test_memory_service_handles_invalid_path() {
+    let config = MemoryConfig { base_dir: "/invalid/path/that/does/not/exist".to_string(), ..Default::default() };
+    let service = MemoryService::new(&config).await;
+    assert!(service.is_err(), "Memory service should fail with invalid path");
+}
--- a/tests/semantic_integration.rs
+++ b/tests/semantic_integration.rs
@@ -0,0 +1,86 @@
+use mcp_server::semantic::{SemanticConfig, SemanticStore};
+use mcp_server::embedding::service::{EmbeddingService, EmbeddingModelType};
+
+#[tokio::test]
+async fn test_semantic_store_can_be_created() {
+    let config = SemanticConfig {
+        base_dir: "./tests/data/test_semantic_data".to_string(),
+        dimension: 768,
+        model_name: "bge-base-en-v1.5".to_string(),
+    };
+
+    let result = SemanticStore::new(&config).await;
+    assert!(result.is_ok(), "Should be able to create semantic store");
+}
+
+#[tokio::test]
+async fn test_semantic_store_can_add_and_search_facts() {
+    let semantic_config = SemanticConfig {
+        base_dir: "./tests/data/test_semantic_search".to_string(),
+        dimension: 768,
+        model_name: "bge-base-en-v1.5".to_string(),
+    };
+
+    let embedding_service = EmbeddingService::new(EmbeddingModelType::BgeBaseEnglish)
+        .await
+        .expect("Should create embedding service");
+
+    let semantic_store = SemanticStore::new(&semantic_config)
+        .await
+        .expect("Should create semantic store");
+
+    let content = "The quick brown fox jumps over the lazy dog";
+    let namespace = "test";
+
+    let embeddings = embedding_service.embed(&[content])
+        .await
+        .expect("Should generate embeddings");
+
+    let (fact_id, _created_at) = semantic_store
+        .add_fact(namespace, content, &embeddings[0], None)
+        .await
+        .expect("Should add fact to semantic store");
+
+    assert!(!fact_id.is_empty(), "Fact ID should not be empty");
+
+    let query = "A fast fox leaps over a sleepy canine";
+    let query_embeddings = embedding_service.embed(&[query])
+        .await
+        .expect("Should generate query embeddings");
+
+    let results = semantic_store
+        .search(&query_embeddings[0], 5, None)
+        .await
+        .expect("Should search semantic store");
+
+    assert!(!results.is_empty(), "Should find similar facts");
+    assert_eq!(results[0].0.id, fact_id, "Should find the added fact");
+}
+
+#[tokio::test]
+async fn test_semantic_search_with_memory_service_integration() {
+    use mcp_server::memory::service::MemoryService;
+    use mcp_server::config::MemoryConfig;
+
+    let memory_config = MemoryConfig { base_dir: "./tests/data/test_memory_semantic".to_string(), ..Default::default() };
+
+    let memory_service = MemoryService::new(&memory_config)
+        .await
+        .expect("Should create memory service");
+
+    let namespace = "animals";
+    let content = "Elephants are the largest land animals";
+
+    let result = memory_service.add_fact(namespace, content, None)
+        .await
+        .expect("Should add fact with embedding");
+
+    assert!(!result.id.is_empty(), "Should return a valid fact ID");
+
+    let query = "What is the biggest animal on land?";
+    let results = memory_service.search_facts(query, 3, None)
+        .await
+        .expect("Should search facts semantically");
+
+    assert!(!results.is_empty(), "Should find semantically similar facts");
+}
--- a/tests/semantic_memory.rs
+++ b/tests/semantic_memory.rs
@@ -0,0 +1,59 @@
+use mcp_server::memory::service::MemoryService;
+use mcp_server::config::MemoryConfig;
+
+#[tokio::test]
+async fn test_memory_service_can_add_fact_to_semantic_memory() {
+    let config = MemoryConfig { base_dir: "./tests/data/test_semantic_data".to_string(), ..Default::default() };
+
+    let service = MemoryService::new(&config).await.unwrap();
+
+    let result = service.add_fact("test_namespace", "Test fact content", None).await;
+
+    assert!(result.is_ok(), "Should be able to add fact to semantic memory");
+
+    if let Ok(fact) = result {
+        assert_eq!(fact.namespace, "test_namespace");
+        assert_eq!(fact.content, "Test fact content");
+        assert!(!fact.id.is_empty(), "Fact should have an ID");
+    }
+}
+
+#[tokio::test]
+async fn test_memory_service_can_search_semantic_memory() {
+    let config = MemoryConfig { base_dir: "./tests/data/test_semantic_search".to_string(), ..Default::default() };
+
+    let service = MemoryService::new(&config).await.unwrap();
+
+    service.add_fact("test", "Rust is a systems programming language", None).await.ok();
+
+    let result = service.search_facts("programming language", 5, None).await;
+
+    assert!(result.is_ok(), "Should be able to search semantic memory");
+
+    if let Ok(search_results) = result {
+        assert!(!search_results.is_empty(), "Should find at least one result");
+    }
+}
+
+#[tokio::test]
+async fn test_memory_service_handles_semantic_errors() {
+    let config = MemoryConfig { base_dir: "/invalid/semantic/path".to_string(), ..Default::default() };
+
+    let result = MemoryService::new(&config).await;
+    assert!(result.is_err(), "Should handle invalid paths gracefully");
+}
+
+#[tokio::test]
+async fn test_memory_service_can_delete_facts() {
+    let config = MemoryConfig { base_dir: "./tests/data/test_semantic_delete".to_string(), ..Default::default() };
+
+    let service = MemoryService::new(&config).await.unwrap();
+
+    let add_result = service.add_fact("test", "Fact to be deleted", None).await;
+    assert!(add_result.is_ok());
+
+    if let Ok(fact) = add_result {
+        let delete_result = service.delete_fact(&fact.id).await;
+        assert!(delete_result.is_ok(), "Should be able to delete fact");
+    }
+}
--- a/tests/semantic_search.rs
+++ b/tests/semantic_search.rs
@@ -0,0 +1,56 @@
+// Test for semantic search functionality
+use mcp_server::semantic::index::SemanticIndex;
+
+#[test]
+fn test_semantic_index_cosine_similarity() {
+    let mut index = SemanticIndex::new(3);
+    
+    // Add some test vectors
+    index.add_vector(&[1.0, 0.0, 0.0], "vec1");
+    index.add_vector(&[0.0, 1.0, 0.0], "vec2");
+    index.add_vector(&[0.0, 0.0, 1.0], "vec3");
+    index.add_vector(&[0.6, 0.6, 0.0], "vec4");
+    
+    // Search for vectors similar to [1.0, 0.0, 0.0]
+    let results = index.search(&[1.0, 0.0, 0.0], 2);
+    
+    // vec1 should be most similar (cosine similarity = 1.0)
+    // vec4 should be next most similar (cosine similarity = 0.6)
+    assert_eq!(results.len(), 2);
+    assert_eq!(results[0].0, "vec1");
+    assert!(results[0].1 > 0.9); // Should be very similar
+    assert_eq!(results[1].0, "vec4");
+    assert!(results[1].1 > 0.5); // Should be somewhat similar
+}
+
+#[test]
+fn test_semantic_index_search_with_fewer_results() {
+    let mut index = SemanticIndex::new(2);
+    
+    // Add only one vector
+    index.add_vector(&[1.0, 0.0], "single");
+    
+    // Search for 3 results when only 1 exists
+    let results = index.search(&[1.0, 0.0], 3);
+    
+    // Should return only 1 result
+    assert_eq!(results.len(), 1);
+    assert_eq!(results[0].0, "single");
+    assert!(results[0].1 > 0.9);
+}
+
+#[test]
+fn test_semantic_index_zero_vector_handling() {
+    let mut index = SemanticIndex::new(3);
+    
+    // Add a zero vector
+    index.add_vector(&[0.0, 0.0, 0.0], "zero");
+    
+    // Search with a non-zero vector
+    let results = index.search(&[1.0, 0.0, 0.0], 1);
+    
+    // Should handle gracefully (similarity should be 0)
+    assert_eq!(results.len(), 1);
+    assert_eq!(results[0].0, "zero");
+    assert_eq!(results[0].1, 0.0);
+}
				`@@ -0,0 +1 @@`
				`// REST API tests removed — server now speaks MCP over stdio, not HTTP.`
				`@@ -0,0 +1 @@`
				`// Config tests removed — Config struct simplified to MemoryConfig with env-var loading.`