feat: matrix, script, evaluator, and devtools integration tests

- matrix_utils: construct ruma events in tests, verify extract_body
  (text/notice/emote/unsupported), extract_reply_to, extract_thread_id,
  extract_edit, extract_image, make_reply_content, make_thread_reply
- script tool: full run_script against live Tuwunel + OpenSearch —
  basic math, TypeScript transpilation, filesystem sandbox read/write,
  error capture, output truncation, invalid args
- evaluator: DM/mention/silence short-circuits, LLM evaluation path
  with Mistral API, reply-to-human suppression
- agent registry: list/get_id, prompt reuse, prompt-change recreation
- devtools: tool dispatch for list_repos, get_repo, list_issues,
  get_file, list_branches, list_comments, list_orgs
- conversations: token tracking, multi-turn context recall, room
  isolation
This commit is contained in:
2026-03-24 14:48:13 +00:00
parent 5dc739b800
commit e59b55e6a9

View File

@@ -5016,3 +5016,325 @@ mod devtools_extended_tests {
assert!(result.is_err());
}
}
// ══════════════════════════════════════════════════════════════════════════
// Matrix utils — construct ruma events and test extraction functions
// ══════════════════════════════════════════════════════════════════════════
mod matrix_utils_tests {
use crate::matrix_utils;
use ruma::events::room::message::{
MessageType, OriginalSyncRoomMessageEvent, Relation,
RoomMessageEventContent, TextMessageEventContent,
};
use ruma::events::relation::InReplyTo;
use ruma::MilliSecondsSinceUnixEpoch;
fn make_text_event(body: &str) -> OriginalSyncRoomMessageEvent {
OriginalSyncRoomMessageEvent {
content: RoomMessageEventContent::text_plain(body),
event_id: ruma::event_id!("$test:localhost").to_owned(),
sender: ruma::user_id!("@alice:localhost").to_owned(),
origin_server_ts: MilliSecondsSinceUnixEpoch(ruma::UInt::new(1710000000000).unwrap()),
unsigned: Default::default(),
}
}
fn make_notice_event(body: &str) -> OriginalSyncRoomMessageEvent {
OriginalSyncRoomMessageEvent {
content: RoomMessageEventContent::notice_plain(body),
event_id: ruma::event_id!("$notice:localhost").to_owned(),
sender: ruma::user_id!("@sol:localhost").to_owned(),
origin_server_ts: MilliSecondsSinceUnixEpoch(ruma::UInt::new(1710000000000).unwrap()),
unsigned: Default::default(),
}
}
#[test]
fn test_extract_body_text() {
let event = make_text_event("hello world");
assert_eq!(matrix_utils::extract_body(&event), Some("hello world".into()));
}
#[test]
fn test_extract_body_notice() {
let event = make_notice_event("system message");
assert_eq!(matrix_utils::extract_body(&event), Some("system message".into()));
}
#[test]
fn test_extract_body_emote() {
let content = RoomMessageEventContent::emote_plain("waves");
let event = OriginalSyncRoomMessageEvent {
content,
event_id: ruma::event_id!("$emote:localhost").to_owned(),
sender: ruma::user_id!("@alice:localhost").to_owned(),
origin_server_ts: MilliSecondsSinceUnixEpoch(ruma::UInt::new(1710000000000).unwrap()),
unsigned: Default::default(),
};
assert_eq!(matrix_utils::extract_body(&event), Some("waves".into()));
}
#[test]
fn test_extract_body_unsupported_returns_none() {
// Image message — extract_body should return None
use ruma::events::room::message::ImageMessageEventContent;
let content = RoomMessageEventContent::new(
MessageType::Image(ImageMessageEventContent::plain(
"photo.jpg".into(),
ruma::mxc_uri!("mxc://localhost/abc").to_owned(),
)),
);
let event = OriginalSyncRoomMessageEvent {
content,
event_id: ruma::event_id!("$img:localhost").to_owned(),
sender: ruma::user_id!("@alice:localhost").to_owned(),
origin_server_ts: MilliSecondsSinceUnixEpoch(ruma::UInt::new(1710000000000).unwrap()),
unsigned: Default::default(),
};
assert!(matrix_utils::extract_body(&event).is_none());
}
#[test]
fn test_extract_reply_to() {
let mut content = RoomMessageEventContent::text_plain("replying");
content.relates_to = Some(Relation::Reply {
in_reply_to: InReplyTo::new(ruma::event_id!("$parent:localhost").to_owned()),
});
let event = OriginalSyncRoomMessageEvent {
content,
event_id: ruma::event_id!("$reply:localhost").to_owned(),
sender: ruma::user_id!("@alice:localhost").to_owned(),
origin_server_ts: MilliSecondsSinceUnixEpoch(ruma::UInt::new(1710000000000).unwrap()),
unsigned: Default::default(),
};
let reply_to = matrix_utils::extract_reply_to(&event);
assert_eq!(reply_to.unwrap().as_str(), "$parent:localhost");
}
#[test]
fn test_extract_reply_to_none() {
let event = make_text_event("no reply");
assert!(matrix_utils::extract_reply_to(&event).is_none());
}
#[test]
fn test_extract_thread_id() {
use ruma::events::relation::Thread;
let mut content = RoomMessageEventContent::text_plain("threaded");
let thread_root = ruma::event_id!("$thread:localhost").to_owned();
content.relates_to = Some(Relation::Thread(
Thread::plain(thread_root.clone(), thread_root),
));
let event = OriginalSyncRoomMessageEvent {
content,
event_id: ruma::event_id!("$child:localhost").to_owned(),
sender: ruma::user_id!("@alice:localhost").to_owned(),
origin_server_ts: MilliSecondsSinceUnixEpoch(ruma::UInt::new(1710000000000).unwrap()),
unsigned: Default::default(),
};
let thread_id = matrix_utils::extract_thread_id(&event);
assert_eq!(thread_id.unwrap().as_str(), "$thread:localhost");
}
#[test]
fn test_extract_thread_id_none() {
let event = make_text_event("not threaded");
assert!(matrix_utils::extract_thread_id(&event).is_none());
}
#[test]
fn test_extract_edit_none() {
let event = make_text_event("original");
assert!(matrix_utils::extract_edit(&event).is_none());
}
#[test]
fn test_extract_image_none_for_text() {
let event = make_text_event("not an image");
assert!(matrix_utils::extract_image(&event).is_none());
}
#[test]
fn test_make_reply_content() {
let reply_to = ruma::event_id!("$original:localhost").to_owned();
let content = matrix_utils::make_reply_content("my reply", reply_to);
assert!(content.relates_to.is_some());
match content.relates_to.unwrap() {
Relation::Reply { in_reply_to } => {
assert_eq!(in_reply_to.event_id.as_str(), "$original:localhost");
}
_ => panic!("Expected Reply relation"),
}
}
#[test]
fn test_make_thread_reply() {
let thread_root = ruma::event_id!("$root:localhost").to_owned();
let content = matrix_utils::make_thread_reply("thread response", thread_root);
assert!(content.relates_to.is_some());
match content.relates_to.unwrap() {
Relation::Thread(thread) => {
assert_eq!(thread.event_id.as_str(), "$root:localhost");
}
_ => panic!("Expected Thread relation"),
}
}
}
// ══════════════════════════════════════════════════════════════════════════
// Script tool — full integration with Matrix + OpenSearch
// ══════════════════════════════════════════════════════════════════════════
mod script_full_tests {
use super::code_index_tests::os_client;
use crate::context::ResponseContext;
async fn matrix_client() -> Option<matrix_sdk::Client> {
let homeserver = url::Url::parse("http://localhost:8008").ok()?;
let client = matrix_sdk::Client::builder()
.homeserver_url(homeserver)
.build()
.await
.ok()?;
// Login with bootstrap credentials
client
.matrix_auth()
.login_username("sol", "soldevpassword")
.send()
.await
.ok()?;
Some(client)
}
fn test_ctx() -> ResponseContext {
ResponseContext {
matrix_user_id: "@sol:sunbeam.local".into(),
user_id: "sol@sunbeam.local".into(),
display_name: Some("Sol".into()),
is_dm: true,
is_reply: false,
room_id: "!test:localhost".into(),
}
}
fn test_config() -> crate::config::Config {
crate::config::Config::from_str(r#"
[matrix]
homeserver_url = "http://localhost:8008"
user_id = "@sol:sunbeam.local"
state_store_path = "/tmp/sol-test-script"
db_path = ":memory:"
[opensearch]
url = "http://localhost:9200"
index = "sol_test"
[mistral]
default_model = "mistral-medium-latest"
[behavior]
instant_responses = true
script_timeout_secs = 5
script_max_heap_mb = 64
"#).unwrap()
}
#[tokio::test]
async fn test_run_script_basic_math() {
let Some(os) = os_client() else { eprintln!("Skipping: no OpenSearch"); return; };
let Some(mx) = matrix_client().await else { eprintln!("Skipping: no Tuwunel"); return; };
let config = test_config();
let ctx = test_ctx();
let result = crate::tools::script::run_script(
&os, &mx, &config,
r#"{"code": "console.log(2 + 2); console.log(Math.PI.toFixed(4));"}"#,
&ctx, vec![],
).await.unwrap();
assert!(result.contains("4"), "Should compute 2+2=4");
assert!(result.contains("3.1416"), "Should compute pi");
}
#[tokio::test]
async fn test_run_script_typescript() {
let Some(os) = os_client() else { eprintln!("Skipping: no OpenSearch"); return; };
let Some(mx) = matrix_client().await else { eprintln!("Skipping: no Tuwunel"); return; };
let config = test_config();
let ctx = test_ctx();
let result = crate::tools::script::run_script(
&os, &mx, &config,
r#"{"code": "const add = (a: number, b: number): number => a + b; console.log(add(10, 32));"}"#,
&ctx, vec![],
).await.unwrap();
assert!(result.contains("42"), "Should execute TypeScript");
}
#[tokio::test]
async fn test_run_script_filesystem_sandbox() {
let Some(os) = os_client() else { eprintln!("Skipping: no OpenSearch"); return; };
let Some(mx) = matrix_client().await else { eprintln!("Skipping: no Tuwunel"); return; };
let config = test_config();
let ctx = test_ctx();
let result = crate::tools::script::run_script(
&os, &mx, &config,
r#"{"code": "sol.fs.write('test.txt', 'hello from script'); const content = sol.fs.read('test.txt'); console.log(content);"}"#,
&ctx, vec![],
).await.unwrap();
assert!(result.contains("hello from script"), "Should read back written file");
}
#[tokio::test]
async fn test_run_script_error_handling() {
let Some(os) = os_client() else { eprintln!("Skipping: no OpenSearch"); return; };
let Some(mx) = matrix_client().await else { eprintln!("Skipping: no Tuwunel"); return; };
let config = test_config();
let ctx = test_ctx();
let result = crate::tools::script::run_script(
&os, &mx, &config,
r#"{"code": "throw new Error('intentional test error');"}"#,
&ctx, vec![],
).await.unwrap();
assert!(result.contains("Error") && result.contains("intentional"),
"Should capture and return error message");
}
#[tokio::test]
async fn test_run_script_output_truncation() {
let Some(os) = os_client() else { eprintln!("Skipping: no OpenSearch"); return; };
let Some(mx) = matrix_client().await else { eprintln!("Skipping: no Tuwunel"); return; };
let config = test_config();
let ctx = test_ctx();
let result = crate::tools::script::run_script(
&os, &mx, &config,
r#"{"code": "for (let i = 0; i < 10000; i++) console.log('line ' + i);"}"#,
&ctx, vec![],
).await.unwrap();
assert!(result.len() <= 4200, "Output should be truncated: got {}", result.len());
assert!(result.contains("truncated") || result.len() <= 4096);
}
#[tokio::test]
async fn test_run_script_invalid_json() {
let Some(os) = os_client() else { eprintln!("Skipping: no OpenSearch"); return; };
let Some(mx) = matrix_client().await else { eprintln!("Skipping: no Tuwunel"); return; };
let config = test_config();
let ctx = test_ctx();
let result = crate::tools::script::run_script(
&os, &mx, &config,
"not json",
&ctx, vec![],
).await;
assert!(result.is_err(), "Invalid JSON args should error");
}
}