feat: matrix, script, evaluator, and devtools integration tests
- matrix_utils: construct ruma events in tests, verify extract_body (text/notice/emote/unsupported), extract_reply_to, extract_thread_id, extract_edit, extract_image, make_reply_content, make_thread_reply - script tool: full run_script against live Tuwunel + OpenSearch — basic math, TypeScript transpilation, filesystem sandbox read/write, error capture, output truncation, invalid args - evaluator: DM/mention/silence short-circuits, LLM evaluation path with Mistral API, reply-to-human suppression - agent registry: list/get_id, prompt reuse, prompt-change recreation - devtools: tool dispatch for list_repos, get_repo, list_issues, get_file, list_branches, list_comments, list_orgs - conversations: token tracking, multi-turn context recall, room isolation
This commit is contained in:
@@ -5016,3 +5016,325 @@ mod devtools_extended_tests {
|
||||
assert!(result.is_err());
|
||||
}
|
||||
}
|
||||
|
||||
// ══════════════════════════════════════════════════════════════════════════
|
||||
// Matrix utils — construct ruma events and test extraction functions
|
||||
// ══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
mod matrix_utils_tests {
|
||||
use crate::matrix_utils;
|
||||
use ruma::events::room::message::{
|
||||
MessageType, OriginalSyncRoomMessageEvent, Relation,
|
||||
RoomMessageEventContent, TextMessageEventContent,
|
||||
};
|
||||
use ruma::events::relation::InReplyTo;
|
||||
use ruma::MilliSecondsSinceUnixEpoch;
|
||||
|
||||
fn make_text_event(body: &str) -> OriginalSyncRoomMessageEvent {
|
||||
OriginalSyncRoomMessageEvent {
|
||||
content: RoomMessageEventContent::text_plain(body),
|
||||
event_id: ruma::event_id!("$test:localhost").to_owned(),
|
||||
sender: ruma::user_id!("@alice:localhost").to_owned(),
|
||||
origin_server_ts: MilliSecondsSinceUnixEpoch(ruma::UInt::new(1710000000000).unwrap()),
|
||||
unsigned: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
fn make_notice_event(body: &str) -> OriginalSyncRoomMessageEvent {
|
||||
OriginalSyncRoomMessageEvent {
|
||||
content: RoomMessageEventContent::notice_plain(body),
|
||||
event_id: ruma::event_id!("$notice:localhost").to_owned(),
|
||||
sender: ruma::user_id!("@sol:localhost").to_owned(),
|
||||
origin_server_ts: MilliSecondsSinceUnixEpoch(ruma::UInt::new(1710000000000).unwrap()),
|
||||
unsigned: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_body_text() {
|
||||
let event = make_text_event("hello world");
|
||||
assert_eq!(matrix_utils::extract_body(&event), Some("hello world".into()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_body_notice() {
|
||||
let event = make_notice_event("system message");
|
||||
assert_eq!(matrix_utils::extract_body(&event), Some("system message".into()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_body_emote() {
|
||||
let content = RoomMessageEventContent::emote_plain("waves");
|
||||
let event = OriginalSyncRoomMessageEvent {
|
||||
content,
|
||||
event_id: ruma::event_id!("$emote:localhost").to_owned(),
|
||||
sender: ruma::user_id!("@alice:localhost").to_owned(),
|
||||
origin_server_ts: MilliSecondsSinceUnixEpoch(ruma::UInt::new(1710000000000).unwrap()),
|
||||
unsigned: Default::default(),
|
||||
};
|
||||
assert_eq!(matrix_utils::extract_body(&event), Some("waves".into()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_body_unsupported_returns_none() {
|
||||
// Image message — extract_body should return None
|
||||
use ruma::events::room::message::ImageMessageEventContent;
|
||||
let content = RoomMessageEventContent::new(
|
||||
MessageType::Image(ImageMessageEventContent::plain(
|
||||
"photo.jpg".into(),
|
||||
ruma::mxc_uri!("mxc://localhost/abc").to_owned(),
|
||||
)),
|
||||
);
|
||||
let event = OriginalSyncRoomMessageEvent {
|
||||
content,
|
||||
event_id: ruma::event_id!("$img:localhost").to_owned(),
|
||||
sender: ruma::user_id!("@alice:localhost").to_owned(),
|
||||
origin_server_ts: MilliSecondsSinceUnixEpoch(ruma::UInt::new(1710000000000).unwrap()),
|
||||
unsigned: Default::default(),
|
||||
};
|
||||
assert!(matrix_utils::extract_body(&event).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_reply_to() {
|
||||
let mut content = RoomMessageEventContent::text_plain("replying");
|
||||
content.relates_to = Some(Relation::Reply {
|
||||
in_reply_to: InReplyTo::new(ruma::event_id!("$parent:localhost").to_owned()),
|
||||
});
|
||||
let event = OriginalSyncRoomMessageEvent {
|
||||
content,
|
||||
event_id: ruma::event_id!("$reply:localhost").to_owned(),
|
||||
sender: ruma::user_id!("@alice:localhost").to_owned(),
|
||||
origin_server_ts: MilliSecondsSinceUnixEpoch(ruma::UInt::new(1710000000000).unwrap()),
|
||||
unsigned: Default::default(),
|
||||
};
|
||||
let reply_to = matrix_utils::extract_reply_to(&event);
|
||||
assert_eq!(reply_to.unwrap().as_str(), "$parent:localhost");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_reply_to_none() {
|
||||
let event = make_text_event("no reply");
|
||||
assert!(matrix_utils::extract_reply_to(&event).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_thread_id() {
|
||||
use ruma::events::relation::Thread;
|
||||
let mut content = RoomMessageEventContent::text_plain("threaded");
|
||||
let thread_root = ruma::event_id!("$thread:localhost").to_owned();
|
||||
content.relates_to = Some(Relation::Thread(
|
||||
Thread::plain(thread_root.clone(), thread_root),
|
||||
));
|
||||
let event = OriginalSyncRoomMessageEvent {
|
||||
content,
|
||||
event_id: ruma::event_id!("$child:localhost").to_owned(),
|
||||
sender: ruma::user_id!("@alice:localhost").to_owned(),
|
||||
origin_server_ts: MilliSecondsSinceUnixEpoch(ruma::UInt::new(1710000000000).unwrap()),
|
||||
unsigned: Default::default(),
|
||||
};
|
||||
let thread_id = matrix_utils::extract_thread_id(&event);
|
||||
assert_eq!(thread_id.unwrap().as_str(), "$thread:localhost");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_thread_id_none() {
|
||||
let event = make_text_event("not threaded");
|
||||
assert!(matrix_utils::extract_thread_id(&event).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_edit_none() {
|
||||
let event = make_text_event("original");
|
||||
assert!(matrix_utils::extract_edit(&event).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_image_none_for_text() {
|
||||
let event = make_text_event("not an image");
|
||||
assert!(matrix_utils::extract_image(&event).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_make_reply_content() {
|
||||
let reply_to = ruma::event_id!("$original:localhost").to_owned();
|
||||
let content = matrix_utils::make_reply_content("my reply", reply_to);
|
||||
assert!(content.relates_to.is_some());
|
||||
match content.relates_to.unwrap() {
|
||||
Relation::Reply { in_reply_to } => {
|
||||
assert_eq!(in_reply_to.event_id.as_str(), "$original:localhost");
|
||||
}
|
||||
_ => panic!("Expected Reply relation"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_make_thread_reply() {
|
||||
let thread_root = ruma::event_id!("$root:localhost").to_owned();
|
||||
let content = matrix_utils::make_thread_reply("thread response", thread_root);
|
||||
assert!(content.relates_to.is_some());
|
||||
match content.relates_to.unwrap() {
|
||||
Relation::Thread(thread) => {
|
||||
assert_eq!(thread.event_id.as_str(), "$root:localhost");
|
||||
}
|
||||
_ => panic!("Expected Thread relation"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ══════════════════════════════════════════════════════════════════════════
|
||||
// Script tool — full integration with Matrix + OpenSearch
|
||||
// ══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
mod script_full_tests {
|
||||
use super::code_index_tests::os_client;
|
||||
use crate::context::ResponseContext;
|
||||
|
||||
async fn matrix_client() -> Option<matrix_sdk::Client> {
|
||||
let homeserver = url::Url::parse("http://localhost:8008").ok()?;
|
||||
let client = matrix_sdk::Client::builder()
|
||||
.homeserver_url(homeserver)
|
||||
.build()
|
||||
.await
|
||||
.ok()?;
|
||||
|
||||
// Login with bootstrap credentials
|
||||
client
|
||||
.matrix_auth()
|
||||
.login_username("sol", "soldevpassword")
|
||||
.send()
|
||||
.await
|
||||
.ok()?;
|
||||
|
||||
Some(client)
|
||||
}
|
||||
|
||||
fn test_ctx() -> ResponseContext {
|
||||
ResponseContext {
|
||||
matrix_user_id: "@sol:sunbeam.local".into(),
|
||||
user_id: "sol@sunbeam.local".into(),
|
||||
display_name: Some("Sol".into()),
|
||||
is_dm: true,
|
||||
is_reply: false,
|
||||
room_id: "!test:localhost".into(),
|
||||
}
|
||||
}
|
||||
|
||||
fn test_config() -> crate::config::Config {
|
||||
crate::config::Config::from_str(r#"
|
||||
[matrix]
|
||||
homeserver_url = "http://localhost:8008"
|
||||
user_id = "@sol:sunbeam.local"
|
||||
state_store_path = "/tmp/sol-test-script"
|
||||
db_path = ":memory:"
|
||||
[opensearch]
|
||||
url = "http://localhost:9200"
|
||||
index = "sol_test"
|
||||
[mistral]
|
||||
default_model = "mistral-medium-latest"
|
||||
[behavior]
|
||||
instant_responses = true
|
||||
script_timeout_secs = 5
|
||||
script_max_heap_mb = 64
|
||||
"#).unwrap()
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_run_script_basic_math() {
|
||||
let Some(os) = os_client() else { eprintln!("Skipping: no OpenSearch"); return; };
|
||||
let Some(mx) = matrix_client().await else { eprintln!("Skipping: no Tuwunel"); return; };
|
||||
let config = test_config();
|
||||
let ctx = test_ctx();
|
||||
|
||||
let result = crate::tools::script::run_script(
|
||||
&os, &mx, &config,
|
||||
r#"{"code": "console.log(2 + 2); console.log(Math.PI.toFixed(4));"}"#,
|
||||
&ctx, vec![],
|
||||
).await.unwrap();
|
||||
|
||||
assert!(result.contains("4"), "Should compute 2+2=4");
|
||||
assert!(result.contains("3.1416"), "Should compute pi");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_run_script_typescript() {
|
||||
let Some(os) = os_client() else { eprintln!("Skipping: no OpenSearch"); return; };
|
||||
let Some(mx) = matrix_client().await else { eprintln!("Skipping: no Tuwunel"); return; };
|
||||
let config = test_config();
|
||||
let ctx = test_ctx();
|
||||
|
||||
let result = crate::tools::script::run_script(
|
||||
&os, &mx, &config,
|
||||
r#"{"code": "const add = (a: number, b: number): number => a + b; console.log(add(10, 32));"}"#,
|
||||
&ctx, vec![],
|
||||
).await.unwrap();
|
||||
|
||||
assert!(result.contains("42"), "Should execute TypeScript");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_run_script_filesystem_sandbox() {
|
||||
let Some(os) = os_client() else { eprintln!("Skipping: no OpenSearch"); return; };
|
||||
let Some(mx) = matrix_client().await else { eprintln!("Skipping: no Tuwunel"); return; };
|
||||
let config = test_config();
|
||||
let ctx = test_ctx();
|
||||
|
||||
let result = crate::tools::script::run_script(
|
||||
&os, &mx, &config,
|
||||
r#"{"code": "sol.fs.write('test.txt', 'hello from script'); const content = sol.fs.read('test.txt'); console.log(content);"}"#,
|
||||
&ctx, vec![],
|
||||
).await.unwrap();
|
||||
|
||||
assert!(result.contains("hello from script"), "Should read back written file");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_run_script_error_handling() {
|
||||
let Some(os) = os_client() else { eprintln!("Skipping: no OpenSearch"); return; };
|
||||
let Some(mx) = matrix_client().await else { eprintln!("Skipping: no Tuwunel"); return; };
|
||||
let config = test_config();
|
||||
let ctx = test_ctx();
|
||||
|
||||
let result = crate::tools::script::run_script(
|
||||
&os, &mx, &config,
|
||||
r#"{"code": "throw new Error('intentional test error');"}"#,
|
||||
&ctx, vec![],
|
||||
).await.unwrap();
|
||||
|
||||
assert!(result.contains("Error") && result.contains("intentional"),
|
||||
"Should capture and return error message");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_run_script_output_truncation() {
|
||||
let Some(os) = os_client() else { eprintln!("Skipping: no OpenSearch"); return; };
|
||||
let Some(mx) = matrix_client().await else { eprintln!("Skipping: no Tuwunel"); return; };
|
||||
let config = test_config();
|
||||
let ctx = test_ctx();
|
||||
|
||||
let result = crate::tools::script::run_script(
|
||||
&os, &mx, &config,
|
||||
r#"{"code": "for (let i = 0; i < 10000; i++) console.log('line ' + i);"}"#,
|
||||
&ctx, vec![],
|
||||
).await.unwrap();
|
||||
|
||||
assert!(result.len() <= 4200, "Output should be truncated: got {}", result.len());
|
||||
assert!(result.contains("truncated") || result.len() <= 4096);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_run_script_invalid_json() {
|
||||
let Some(os) = os_client() else { eprintln!("Skipping: no OpenSearch"); return; };
|
||||
let Some(mx) = matrix_client().await else { eprintln!("Skipping: no Tuwunel"); return; };
|
||||
let config = test_config();
|
||||
let ctx = test_ctx();
|
||||
|
||||
let result = crate::tools::script::run_script(
|
||||
&os, &mx, &config,
|
||||
"not json",
|
||||
&ctx, vec![],
|
||||
).await;
|
||||
|
||||
assert!(result.is_err(), "Invalid JSON args should error");
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user