feat: deeper script sandbox and research type tests
- script tool: async operations, sol.fs.list, console.log/error/warn/ info, return value capture, additional sandbox coverage - research tool: tool_definition schema validation, depth boundary exhaustive testing, ResearchTask/ResearchResult roundtrips, output format verification - matrix_utils: extract_image returns None for text messages
This commit is contained in:
@@ -5337,4 +5337,157 @@ mod script_full_tests {
|
||||
|
||||
assert!(result.is_err(), "Invalid JSON args should error");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_run_script_async_operations() {
|
||||
let Some(os) = os_client() else { eprintln!("Skipping: no OpenSearch"); return; };
|
||||
let Some(mx) = matrix_client().await else { eprintln!("Skipping: no Tuwunel"); return; };
|
||||
let config = test_config();
|
||||
let ctx = test_ctx();
|
||||
|
||||
// Test async IIFE with await
|
||||
let result = crate::tools::script::run_script(
|
||||
&os, &mx, &config,
|
||||
r#"{"code": "const result = await Promise.resolve(42); console.log(result);"}"#,
|
||||
&ctx, vec![],
|
||||
).await.unwrap();
|
||||
|
||||
assert!(result.contains("42"), "Async operations should work");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_run_script_sol_fs_list() {
|
||||
let Some(os) = os_client() else { eprintln!("Skipping: no OpenSearch"); return; };
|
||||
let Some(mx) = matrix_client().await else { eprintln!("Skipping: no Tuwunel"); return; };
|
||||
let config = test_config();
|
||||
let ctx = test_ctx();
|
||||
|
||||
let result = crate::tools::script::run_script(
|
||||
&os, &mx, &config,
|
||||
r#"{"code": "sol.fs.write('a.txt', 'aaa'); sol.fs.write('b.txt', 'bbb'); const files = sol.fs.list('.'); console.log(JSON.stringify(files));"}"#,
|
||||
&ctx, vec![],
|
||||
).await.unwrap();
|
||||
|
||||
assert!(result.contains("a.txt"), "Should list written files");
|
||||
assert!(result.contains("b.txt"), "Should list both files");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_run_script_console_methods() {
|
||||
let Some(os) = os_client() else { eprintln!("Skipping: no OpenSearch"); return; };
|
||||
let Some(mx) = matrix_client().await else { eprintln!("Skipping: no Tuwunel"); return; };
|
||||
let config = test_config();
|
||||
let ctx = test_ctx();
|
||||
|
||||
let result = crate::tools::script::run_script(
|
||||
&os, &mx, &config,
|
||||
r#"{"code": "console.log('LOG'); console.error('ERR'); console.warn('WARN'); console.info('INFO');"}"#,
|
||||
&ctx, vec![],
|
||||
).await.unwrap();
|
||||
|
||||
assert!(result.contains("LOG"));
|
||||
assert!(result.contains("ERR"));
|
||||
assert!(result.contains("WARN"));
|
||||
assert!(result.contains("INFO"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_run_script_return_value_captured() {
|
||||
let Some(os) = os_client() else { eprintln!("Skipping: no OpenSearch"); return; };
|
||||
let Some(mx) = matrix_client().await else { eprintln!("Skipping: no Tuwunel"); return; };
|
||||
let config = test_config();
|
||||
let ctx = test_ctx();
|
||||
|
||||
// Use return to produce a value (async IIFE wrapping captures this)
|
||||
let result = crate::tools::script::run_script(
|
||||
&os, &mx, &config,
|
||||
r#"{"code": "return 'computed-result-42';"}"#,
|
||||
&ctx, vec![],
|
||||
).await.unwrap();
|
||||
|
||||
assert!(result.contains("computed-result-42"),
|
||||
"Return value should be captured: got '{result}'");
|
||||
}
|
||||
}
|
||||
|
||||
// ══════════════════════════════════════════════════════════════════════════
|
||||
// Research tool — types and tool_definition tests
|
||||
// ══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
mod research_extended_tests {
|
||||
use crate::tools::research;
|
||||
|
||||
#[test]
|
||||
fn test_tool_definition_schema() {
|
||||
let def = research::tool_definition(4, 0).unwrap();
|
||||
assert_eq!(def.function.name, "research");
|
||||
let params = &def.function.parameters;
|
||||
// Should have tasks array
|
||||
assert!(params["properties"]["tasks"].is_object());
|
||||
assert_eq!(params["required"][0], "tasks");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tool_definition_depth_boundary() {
|
||||
// At depth 3 with max 4 — still available
|
||||
assert!(research::tool_definition(4, 3).is_some());
|
||||
// At depth 4 with max 4 — unavailable
|
||||
assert!(research::tool_definition(4, 4).is_none());
|
||||
// Beyond max — unavailable
|
||||
assert!(research::tool_definition(4, 10).is_none());
|
||||
// Max 0 — never available
|
||||
assert!(research::tool_definition(0, 0).is_none());
|
||||
// Max 1, depth 0 — available
|
||||
assert!(research::tool_definition(1, 0).is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_research_task_roundtrip() {
|
||||
let task = research::ResearchTask {
|
||||
focus: "API design".into(),
|
||||
instructions: "review the REST endpoints in proxy/".into(),
|
||||
};
|
||||
let json = serde_json::to_string(&task).unwrap();
|
||||
let back: research::ResearchTask = serde_json::from_str(&json).unwrap();
|
||||
assert_eq!(back.focus, "API design");
|
||||
assert_eq!(back.instructions, "review the REST endpoints in proxy/");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_research_result_json() {
|
||||
let result = research::ResearchResult {
|
||||
focus: "license check".into(),
|
||||
findings: "all repos use AGPL-3.0".into(),
|
||||
tool_calls_made: 3,
|
||||
status: "complete".into(),
|
||||
};
|
||||
let json = serde_json::to_value(&result).unwrap();
|
||||
assert_eq!(json["status"], "complete");
|
||||
assert_eq!(json["tool_calls_made"], 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_research_result_output_format() {
|
||||
let results = vec![
|
||||
research::ResearchResult {
|
||||
focus: "auth".into(),
|
||||
findings: "uses OAuth2".into(),
|
||||
tool_calls_made: 2,
|
||||
status: "complete".into(),
|
||||
},
|
||||
research::ResearchResult {
|
||||
focus: "db".into(),
|
||||
findings: "PostgreSQL via CNPG".into(),
|
||||
tool_calls_made: 1,
|
||||
status: "complete".into(),
|
||||
},
|
||||
];
|
||||
let total_calls: usize = results.iter().map(|r| r.tool_calls_made).sum();
|
||||
let output = results.iter()
|
||||
.map(|r| format!("### {} [{}]\n{}\n", r.focus, r.status, r.findings))
|
||||
.collect::<Vec<_>>().join("\n---\n\n");
|
||||
assert!(output.contains("### auth [complete]"));
|
||||
assert!(output.contains("### db [complete]"));
|
||||
assert_eq!(total_calls, 3);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user