feat: deeper script sandbox and research type tests
- script tool: async operations, sol.fs.list, console.log/error/warn/ info, return value capture, additional sandbox coverage - research tool: tool_definition schema validation, depth boundary exhaustive testing, ResearchTask/ResearchResult roundtrips, output format verification - matrix_utils: extract_image returns None for text messages
This commit is contained in:
@@ -5337,4 +5337,157 @@ mod script_full_tests {
|
|||||||
|
|
||||||
assert!(result.is_err(), "Invalid JSON args should error");
|
assert!(result.is_err(), "Invalid JSON args should error");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_run_script_async_operations() {
|
||||||
|
let Some(os) = os_client() else { eprintln!("Skipping: no OpenSearch"); return; };
|
||||||
|
let Some(mx) = matrix_client().await else { eprintln!("Skipping: no Tuwunel"); return; };
|
||||||
|
let config = test_config();
|
||||||
|
let ctx = test_ctx();
|
||||||
|
|
||||||
|
// Test async IIFE with await
|
||||||
|
let result = crate::tools::script::run_script(
|
||||||
|
&os, &mx, &config,
|
||||||
|
r#"{"code": "const result = await Promise.resolve(42); console.log(result);"}"#,
|
||||||
|
&ctx, vec![],
|
||||||
|
).await.unwrap();
|
||||||
|
|
||||||
|
assert!(result.contains("42"), "Async operations should work");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_run_script_sol_fs_list() {
|
||||||
|
let Some(os) = os_client() else { eprintln!("Skipping: no OpenSearch"); return; };
|
||||||
|
let Some(mx) = matrix_client().await else { eprintln!("Skipping: no Tuwunel"); return; };
|
||||||
|
let config = test_config();
|
||||||
|
let ctx = test_ctx();
|
||||||
|
|
||||||
|
let result = crate::tools::script::run_script(
|
||||||
|
&os, &mx, &config,
|
||||||
|
r#"{"code": "sol.fs.write('a.txt', 'aaa'); sol.fs.write('b.txt', 'bbb'); const files = sol.fs.list('.'); console.log(JSON.stringify(files));"}"#,
|
||||||
|
&ctx, vec![],
|
||||||
|
).await.unwrap();
|
||||||
|
|
||||||
|
assert!(result.contains("a.txt"), "Should list written files");
|
||||||
|
assert!(result.contains("b.txt"), "Should list both files");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_run_script_console_methods() {
|
||||||
|
let Some(os) = os_client() else { eprintln!("Skipping: no OpenSearch"); return; };
|
||||||
|
let Some(mx) = matrix_client().await else { eprintln!("Skipping: no Tuwunel"); return; };
|
||||||
|
let config = test_config();
|
||||||
|
let ctx = test_ctx();
|
||||||
|
|
||||||
|
let result = crate::tools::script::run_script(
|
||||||
|
&os, &mx, &config,
|
||||||
|
r#"{"code": "console.log('LOG'); console.error('ERR'); console.warn('WARN'); console.info('INFO');"}"#,
|
||||||
|
&ctx, vec![],
|
||||||
|
).await.unwrap();
|
||||||
|
|
||||||
|
assert!(result.contains("LOG"));
|
||||||
|
assert!(result.contains("ERR"));
|
||||||
|
assert!(result.contains("WARN"));
|
||||||
|
assert!(result.contains("INFO"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_run_script_return_value_captured() {
|
||||||
|
let Some(os) = os_client() else { eprintln!("Skipping: no OpenSearch"); return; };
|
||||||
|
let Some(mx) = matrix_client().await else { eprintln!("Skipping: no Tuwunel"); return; };
|
||||||
|
let config = test_config();
|
||||||
|
let ctx = test_ctx();
|
||||||
|
|
||||||
|
// Use return to produce a value (async IIFE wrapping captures this)
|
||||||
|
let result = crate::tools::script::run_script(
|
||||||
|
&os, &mx, &config,
|
||||||
|
r#"{"code": "return 'computed-result-42';"}"#,
|
||||||
|
&ctx, vec![],
|
||||||
|
).await.unwrap();
|
||||||
|
|
||||||
|
assert!(result.contains("computed-result-42"),
|
||||||
|
"Return value should be captured: got '{result}'");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ══════════════════════════════════════════════════════════════════════════
|
||||||
|
// Research tool — types and tool_definition tests
|
||||||
|
// ══════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
mod research_extended_tests {
|
||||||
|
use crate::tools::research;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tool_definition_schema() {
|
||||||
|
let def = research::tool_definition(4, 0).unwrap();
|
||||||
|
assert_eq!(def.function.name, "research");
|
||||||
|
let params = &def.function.parameters;
|
||||||
|
// Should have tasks array
|
||||||
|
assert!(params["properties"]["tasks"].is_object());
|
||||||
|
assert_eq!(params["required"][0], "tasks");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tool_definition_depth_boundary() {
|
||||||
|
// At depth 3 with max 4 — still available
|
||||||
|
assert!(research::tool_definition(4, 3).is_some());
|
||||||
|
// At depth 4 with max 4 — unavailable
|
||||||
|
assert!(research::tool_definition(4, 4).is_none());
|
||||||
|
// Beyond max — unavailable
|
||||||
|
assert!(research::tool_definition(4, 10).is_none());
|
||||||
|
// Max 0 — never available
|
||||||
|
assert!(research::tool_definition(0, 0).is_none());
|
||||||
|
// Max 1, depth 0 — available
|
||||||
|
assert!(research::tool_definition(1, 0).is_some());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_research_task_roundtrip() {
|
||||||
|
let task = research::ResearchTask {
|
||||||
|
focus: "API design".into(),
|
||||||
|
instructions: "review the REST endpoints in proxy/".into(),
|
||||||
|
};
|
||||||
|
let json = serde_json::to_string(&task).unwrap();
|
||||||
|
let back: research::ResearchTask = serde_json::from_str(&json).unwrap();
|
||||||
|
assert_eq!(back.focus, "API design");
|
||||||
|
assert_eq!(back.instructions, "review the REST endpoints in proxy/");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_research_result_json() {
|
||||||
|
let result = research::ResearchResult {
|
||||||
|
focus: "license check".into(),
|
||||||
|
findings: "all repos use AGPL-3.0".into(),
|
||||||
|
tool_calls_made: 3,
|
||||||
|
status: "complete".into(),
|
||||||
|
};
|
||||||
|
let json = serde_json::to_value(&result).unwrap();
|
||||||
|
assert_eq!(json["status"], "complete");
|
||||||
|
assert_eq!(json["tool_calls_made"], 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_research_result_output_format() {
|
||||||
|
let results = vec![
|
||||||
|
research::ResearchResult {
|
||||||
|
focus: "auth".into(),
|
||||||
|
findings: "uses OAuth2".into(),
|
||||||
|
tool_calls_made: 2,
|
||||||
|
status: "complete".into(),
|
||||||
|
},
|
||||||
|
research::ResearchResult {
|
||||||
|
focus: "db".into(),
|
||||||
|
findings: "PostgreSQL via CNPG".into(),
|
||||||
|
tool_calls_made: 1,
|
||||||
|
status: "complete".into(),
|
||||||
|
},
|
||||||
|
];
|
||||||
|
let total_calls: usize = results.iter().map(|r| r.tool_calls_made).sum();
|
||||||
|
let output = results.iter()
|
||||||
|
.map(|r| format!("### {} [{}]\n{}\n", r.focus, r.status, r.findings))
|
||||||
|
.collect::<Vec<_>>().join("\n---\n\n");
|
||||||
|
assert!(output.contains("### auth [complete]"));
|
||||||
|
assert!(output.contains("### db [complete]"));
|
||||||
|
assert_eq!(total_calls, 3);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user