feat: deeper script sandbox and research type tests

- script tool: async operations, sol.fs.list, console.log/error/warn/ info, return value capture, additional sandbox coverage - research tool: tool_definition schema validation, depth boundary exhaustive testing, ResearchTask/ResearchResult roundtrips, output format verification - matrix_utils: extract_image returns None for text messages
2026-03-24 15:04:39 +00:00
parent e59b55e6a9
commit f1009ddda4
1 changed files with 153 additions and 0 deletions
--- a/src/integration_test.rs
+++ b/src/integration_test.rs
@@ -5337,4 +5337,157 @@ mod script_full_tests {
        assert!(result.is_err(), "Invalid JSON args should error");
    }
    #[tokio::test]
    async fn test_run_script_async_operations() {
        let Some(os) = os_client() else { eprintln!("Skipping: no OpenSearch"); return; };
        let Some(mx) = matrix_client().await else { eprintln!("Skipping: no Tuwunel"); return; };
        let config = test_config();
        let ctx = test_ctx();
        // Test async IIFE with await
        let result = crate::tools::script::run_script(
            &os, &mx, &config,
            r#"{"code": "const result = await Promise.resolve(42); console.log(result);"}"#,
            &ctx, vec![],
        ).await.unwrap();
        assert!(result.contains("42"), "Async operations should work");
    }
    #[tokio::test]
    async fn test_run_script_sol_fs_list() {
        let Some(os) = os_client() else { eprintln!("Skipping: no OpenSearch"); return; };
        let Some(mx) = matrix_client().await else { eprintln!("Skipping: no Tuwunel"); return; };
        let config = test_config();
        let ctx = test_ctx();
        let result = crate::tools::script::run_script(
            &os, &mx, &config,
            r#"{"code": "sol.fs.write('a.txt', 'aaa'); sol.fs.write('b.txt', 'bbb'); const files = sol.fs.list('.'); console.log(JSON.stringify(files));"}"#,
            &ctx, vec![],
        ).await.unwrap();
        assert!(result.contains("a.txt"), "Should list written files");
        assert!(result.contains("b.txt"), "Should list both files");
    }
    #[tokio::test]
    async fn test_run_script_console_methods() {
        let Some(os) = os_client() else { eprintln!("Skipping: no OpenSearch"); return; };
        let Some(mx) = matrix_client().await else { eprintln!("Skipping: no Tuwunel"); return; };
        let config = test_config();
        let ctx = test_ctx();
        let result = crate::tools::script::run_script(
            &os, &mx, &config,
            r#"{"code": "console.log('LOG'); console.error('ERR'); console.warn('WARN'); console.info('INFO');"}"#,
            &ctx, vec![],
        ).await.unwrap();
        assert!(result.contains("LOG"));
        assert!(result.contains("ERR"));
        assert!(result.contains("WARN"));
        assert!(result.contains("INFO"));
    }
    #[tokio::test]
    async fn test_run_script_return_value_captured() {
        let Some(os) = os_client() else { eprintln!("Skipping: no OpenSearch"); return; };
        let Some(mx) = matrix_client().await else { eprintln!("Skipping: no Tuwunel"); return; };
        let config = test_config();
        let ctx = test_ctx();
        // Use return to produce a value (async IIFE wrapping captures this)
        let result = crate::tools::script::run_script(
            &os, &mx, &config,
            r#"{"code": "return 'computed-result-42';"}"#,
            &ctx, vec![],
        ).await.unwrap();
        assert!(result.contains("computed-result-42"),
            "Return value should be captured: got '{result}'");
    }
 }
 // ══════════════════════════════════════════════════════════════════════════
 // Research tool — types and tool_definition tests
 // ══════════════════════════════════════════════════════════════════════════
 mod research_extended_tests {
    use crate::tools::research;
    #[test]
    fn test_tool_definition_schema() {
        let def = research::tool_definition(4, 0).unwrap();
        assert_eq!(def.function.name, "research");
        let params = &def.function.parameters;
        // Should have tasks array
        assert!(params["properties"]["tasks"].is_object());
        assert_eq!(params["required"][0], "tasks");
    }
    #[test]
    fn test_tool_definition_depth_boundary() {
        // At depth 3 with max 4 — still available
        assert!(research::tool_definition(4, 3).is_some());
        // At depth 4 with max 4 — unavailable
        assert!(research::tool_definition(4, 4).is_none());
        // Beyond max — unavailable
        assert!(research::tool_definition(4, 10).is_none());
        // Max 0 — never available
        assert!(research::tool_definition(0, 0).is_none());
        // Max 1, depth 0 — available
        assert!(research::tool_definition(1, 0).is_some());
    }
    #[test]
    fn test_research_task_roundtrip() {
        let task = research::ResearchTask {
            focus: "API design".into(),
            instructions: "review the REST endpoints in proxy/".into(),
        };
        let json = serde_json::to_string(&task).unwrap();
        let back: research::ResearchTask = serde_json::from_str(&json).unwrap();
        assert_eq!(back.focus, "API design");
        assert_eq!(back.instructions, "review the REST endpoints in proxy/");
    }
    #[test]
    fn test_research_result_json() {
        let result = research::ResearchResult {
            focus: "license check".into(),
            findings: "all repos use AGPL-3.0".into(),
            tool_calls_made: 3,
            status: "complete".into(),
        };
        let json = serde_json::to_value(&result).unwrap();
        assert_eq!(json["status"], "complete");
        assert_eq!(json["tool_calls_made"], 3);
    }
    #[test]
    fn test_research_result_output_format() {
        let results = vec![
            research::ResearchResult {
                focus: "auth".into(),
                findings: "uses OAuth2".into(),
                tool_calls_made: 2,
                status: "complete".into(),
            },
            research::ResearchResult {
                focus: "db".into(),
                findings: "PostgreSQL via CNPG".into(),
                tool_calls_made: 1,
                status: "complete".into(),
            },
        ];
        let total_calls: usize = results.iter().map(|r| r.tool_calls_made).sum();
        let output = results.iter()
            .map(|r| format!("### {} [{}]\n{}\n", r.focus, r.status, r.findings))
            .collect::<Vec<_>>().join("\n---\n\n");
        assert!(output.contains("### auth [complete]"));
        assert!(output.contains("### db [complete]"));
        assert_eq!(total_calls, 3);
    }
 }