diff --git a/.agent_rules/README.md b/.agent_rules/README.md index 469408c03..b72df9707 100644 --- a/.agent_rules/README.md +++ b/.agent_rules/README.md @@ -90,4 +90,4 @@ uv pip install -e . # Install deps --- -**Pipeline Version**: 1.6.0 | **Steps**: 25 | **Tests**: 2,250 passed, 7 skipped, 1 xpassed with Ollama integration excludes on 2026-05-20 | **MCP Tools**: 133 +**Pipeline Version**: 1.8.0 | **Steps**: 25 | **Tests**: latest recorded full suite with Ollama integration excludes: 2,379 passed, 17 skipped, 1 xfailed; collect-only inventory is 2,397 tests | **MCP Tools**: verify with `src/tests/mcp/test_mcp_audit.py` diff --git a/AGENTS.md b/AGENTS.md index bcd30eaf2..221bda99f 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -224,7 +224,7 @@ graph TD - **Docs audit**: `uv run --extra dev python doc/development/docs_audit.py --strict --check-anchors --no-write` reports no broken links, anchor gaps, or AGENTS/README coverage gaps. - **GNN doc patterns**: `uv run --extra dev python scripts/check_gnn_doc_patterns.py --strict` reports no banned GNN documentation patterns. -- **Tests**: command of record is `uv run --extra dev python -m pytest src/tests/ -q --tb=no --ignore=src/tests/llm/test_llm_ollama.py --ignore=src/tests/llm/test_llm_ollama_integration.py`; current collect-only inventory (2026-06-09) is 171 test files and 2,296 collected tests with the same Ollama ignores. Latest recorded full suite with the same excludes passed on 2026-06-09: 2,281 passed, 14 skipped, 1 xfailed in 744.50s. Re-enable `src/tests/llm/test_llm_ollama*.py` when `ollama` is available. +- **Tests**: command of record is `uv run --extra dev python -m pytest src/tests/ -q --tb=no --ignore=src/tests/llm/test_llm_ollama.py --ignore=src/tests/llm/test_llm_ollama_integration.py`; current collect-only inventory (2026-06-12) is 184 test files and 2,397 collected tests with the same Ollama ignores. Latest recorded full suite evidence with the same excludes is 2,379 passed, 17 skipped, 1 xfailed. Re-enable `src/tests/llm/test_llm_ollama*.py` when `ollama` is available. - **LLM Default Model**: `smollm2:135m-instruct-q4_K_S` via Ollama (`llm.defaults.DEFAULT_OLLAMA_MODEL`; override with `OLLAMA_MODEL` / `input/config.yaml`). - **Renderer inventory**: PyMDP, RxInfer, JAX, NumPyro, Stan, PyTorch, ActiveInference.jl, and DisCoPy have maintained render paths; run focused backend tests before publishing operational pass counts. - **Visual Accessibility**: All pipeline steps now include enhanced visual indicators and progress tracking. @@ -514,7 +514,7 @@ Each module provides specialized agent capabilities for different aspects of Act --- -**Last Updated**: 2026-06-09 -**Pipeline Version**: 1.6.0 +**Last Updated**: 2026-06-12 +**Pipeline Version**: 1.8.0 **Total Steps**: 25 (0-24) **Status**: Maintained diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 0251a9abb..daa74d3ae 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -2,8 +2,8 @@ This guide details the architecture of the Generalized Notation Notation (GNN) system. It complements `DOCS.md` and `doc/pipeline/README.md` with an implementation-oriented perspective for developers. -**Last Updated**: 2026-05-08 -**Version**: 1.6.0 +**Last Updated**: 2026-06-11 +**Version**: 1.8.0 **Status**: Maintained **Pipeline Steps**: 25 (0-24) @@ -20,10 +20,10 @@ This guide details the architecture of the Generalized Notation Notation (GNN) s ### Quality Assurance Principles - **Real Implementation Testing**: All tests use real code paths and actual data dependencies -- **Comprehensive Coverage**: >95% test coverage with performance and integration validation +- **Comprehensive Coverage**: Use current CI/local test output for exact coverage and pass counts - **Real Data Processing**: Tests and examples use explicit, runnable data -- **Performance Standards**: Sub-30-minute execution time, <2GB memory usage for standard workloads -- **Error Rate Targets**: <1% critical failure rate, >99% step completion success rate +- **Performance Standards**: Publish performance and memory figures only from current measured runs +- **Error Rate Targets**: Publish failure-rate claims only from current measured runs ### Agent Architecture Principles @@ -323,8 +323,8 @@ Each agent implements comprehensive performance monitoring: --- -**Architecture Version**: 1.6.0 -**Last Updated**: 2026-05-08 +**Architecture Version**: 1.8.0 +**Last Updated**: 2026-06-11 **Status**: ✅ Production Ready **Compliance**: Thin orchestrator pattern **Latest Validation**: See current test and pipeline runs diff --git a/CHANGELOG.md b/CHANGELOG.md index 550a26304..6490ffe8d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,21 +8,27 @@ Format follows [Keep a Changelog](https://keepachangelog.com/) and [Semantic Ver ## [Unreleased] +No unreleased changes yet. + +--- + +## [1.8.0] — 2026-06-12 + ### Added -- **Developer tooling**: `justfile` (21 recipes), `.pre-commit-config.yaml` (Ruff, file hygiene hooks), `.devcontainer/` (Python 3.11 + uv + just for GitHub Codespaces) -- **NumPyro/Stan E2E tests**: 15-test suite (`test_render_numpyro_stan.py`) validating render success, Python compilation, AST parsing, import correctness, type mapping, and empty-input handling -- **Renderer health verification**: All 8/8 backends confirmed operational (PyMDP, RxInfer, JAX, NumPyro, Stan, PyTorch, ActiveInference.jl, DisCoPy) +- **Template developer kit**: packaged template index, package-data template assets, `gnn templates list`, `gnn templates show NAME`, and `gnn pull NAME --output-dir ... --dry-run --overwrite`. +- **Template safety contracts**: checksum verification, collision handling, symlink/path traversal rejection, unknown-template failures, and installed-wheel smoke coverage outside the repo checkout. +- **MCP local HTTP orchestration**: bearer-token auth through `GNN_MCP_TOKEN`, localhost default binding, explicit insecure local opt-in with `GNN_MCP_ALLOW_INSECURE_LOCAL=1`, safe-tool filtering, optional rate limiting, and default-denied resource reads unless explicitly allowlisted. +- **Capability-contract verifier**: release-facing checks for template package data, MCP auth/resource safety, acceptance-command isolation, roadmap ordering, count drift, and autonomy non-mutation claims. +- **Roadmap foundations**: contract fixtures for v1.7 multi-agent/rendering/UI/audio/Three.js surfaces and v1.9 model-family acceptance/interpretability ledgers without marking those future release items complete. ### Changed -- Test suite expanded to 2,200 passed, 70 skipped (from 1,906/30) -- Documentation version synchronized to 1.6.0 across 35 files (resolved v1.3.0/v1.5.0 drift) -- TO-DO.md rewritten with verified completed items and restructured roadmap -- Pre-commit detect-secrets hook removed (not in project deps; CI uses GitGuardian) +- Current test evidence updated to 2,397 collected tests and latest full local suite evidence of 2,379 passed, 17 skipped, 1 xfailed with the documented Ollama integration excludes. +- `TO-DO.md` now treats v1.8.0 as the developer-kit release and v1.9.0 as the next model-family reliability target. +- Developer documentation now advertises verified template and MCP commands only, with `/tmp` output directories in acceptance smokes to avoid tracked `output/` churn. +- Pre-commit/dev tooling remains scoped to Ruff, file hygiene, and `just`/devcontainer ergonomics; dedicated secret scanning is not claimed. ### Fixed -- 4 broken Mermaid diagram blocks in `doc/gnn/modules/` (00_template, 01_setup, 11_render, 21_mcp) -- Stale line-count claims for orchestrator scripts (11_render.py, 12_execute.py, main.py) -- Root `AGENTS.md` version (1.5.0 → 1.6.0) and `README.md` feature attribution (v1.5.0 → v1.6.0) +- Removed release-facing false-certification paths around optional framework fallback, stale counts, MCP unauthenticated HTTP, unsafe MCP resource exposure, and template assets that only work from a source checkout. --- @@ -123,7 +129,8 @@ Format follows [Keep a Changelog](https://keepachangelog.com/) and [Semantic Ver - pytest test suite with comprehensive coverage - MCP tool registration framework -[Unreleased]: https://github.com/ActiveInferenceInstitute/GeneralizedNotationNotation/compare/v1.6.0...HEAD +[Unreleased]: https://github.com/ActiveInferenceInstitute/GeneralizedNotationNotation/compare/v1.8.0...HEAD +[1.8.0]: https://github.com/ActiveInferenceInstitute/GeneralizedNotationNotation/compare/v1.6.0...v1.8.0 [1.6.0]: https://github.com/ActiveInferenceInstitute/GeneralizedNotationNotation/compare/v1.3.0...v1.6.0 [1.3.0]: https://github.com/ActiveInferenceInstitute/GeneralizedNotationNotation/compare/v1.2.0...v1.3.0 [1.2.0]: https://github.com/ActiveInferenceInstitute/GeneralizedNotationNotation/compare/v1.1.0...v1.2.0 diff --git a/CITATION.cff b/CITATION.cff index c1bcfac00..d0b7992a5 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -9,8 +9,8 @@ authors: # This entry acknowledges all contributors. Individual contributors can be listed above if desired. title: "GeneralizedNotationNotation (GNN)" -version: 1.6.0 # Current stable release -date-released: 2026-04-15 +version: 1.8.0 # Current stable release +date-released: 2026-06-12 abstract: | Generalized Notation Notation (GNN) is a text-based language designed to standardize the representation and communication of Active Inference generative models. It aims to enhance clarity, reproducibility, and interoperability in the field of Active Inference and cognitive modeling. GNN provides a structured way to describe complex models, making them human-readable and machine-parsable. It supports a "Triple Play" approach: text-based models, graphical model visualizations, and a blueprint for executable cognitive models. diff --git a/DOCS.md b/DOCS.md index 8bca77294..377d315d9 100644 --- a/DOCS.md +++ b/DOCS.md @@ -1,6 +1,6 @@ # Generalized Notation Notation (GNN) — Comprehensive Documentation -**Last Updated**: 2026-05-08 +**Last Updated**: 2026-06-11 This document provides a complete, machine-parsable and human-accessible overview of GNN: the what, why, and how. It consolidates architecture, pipeline, data flows, artifacts, and integration points with multiple Mermaid diagrams. @@ -15,6 +15,9 @@ This document provides a complete, machine-parsable and human-accessible overvie - Consistent, reproducible model specification and sharing - Interoperability across ecosystems (PyMDP, RxInfer.jl, ActiveInference.jl, JAX, PyTorch, NumPyro) - Traceable artifact lineage and rigorous validation +- Capability-contract checks for roadmap-visible CLI, GUI, MCP, renderer, visualization, and measured-count claims +- Developer-kit surfaces for maintained templates (`gnn templates list`, `gnn templates show`, `gnn pull`) and safe local MCP HTTP orchestration +- Model-family acceptance ledgers for broader fixture reliability and interpretability evidence ## High-Level Concept Map diff --git a/README.md b/README.md index 27c3e5298..a41598bd9 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # GeneralizedNotationNotation (GNN) -**Last Updated**: 2026-06-09 +**Last Updated**: 2026-06-12
@@ -48,11 +48,12 @@ ### 📚 Initial Publication **Smékal, J., & Friedman, D. A. (2023)**. *Generalized Notation Notation for Active Inference Models*. Active Inference Journal. -**Last Updated**: 2026-06-09 -**Version**: 1.6.0 +**Last Updated**: 2026-06-12 +**Version**: 1.8.0 **Status**: ✅ Production Ready (Active Inference Institute) -**Test Suite Inventory (measured 2026-06-09)**: 171 `test_*.py` files under `src/tests/`; `uv run --extra dev python -m pytest --collect-only src/tests/ -q --tb=no --ignore=src/tests/llm/test_llm_ollama.py --ignore=src/tests/llm/test_llm_ollama_integration.py` collected 2,296 tests. Latest recorded full suite with the same Ollama integration excludes passed on 2026-06-09: 2,281 passed, 14 skipped, 1 xfailed in 744.50s. -**Features (v1.6.0)**: Neurosymbolic LLM Context Injection, D3 Execution Dashboards, NumPyro/Stan Renderers, structured PyMDP 1.0 POMDP execution, PyMDP Scaling Study, MCP Full Module Exposure. +**Test Suite Inventory (measured 2026-06-12)**: 184 `test_*.py` files under `src/tests/`; `uv run --extra dev python -m pytest --collect-only src/tests/ -q --tb=no --ignore=src/tests/llm/test_llm_ollama.py --ignore=src/tests/llm/test_llm_ollama_integration.py` collected 2,397 tests. Latest recorded full suite evidence with the same Ollama integration excludes is 2,379 passed, 17 skipped, 1 xfailed. +**Features (v1.8.0)**: maintained template CLI (`gnn templates list`, `gnn templates show`, `gnn pull`), packaged template assets with checksum/collision handling, authenticated local MCP HTTP orchestration, pre-commit/devcontainer tooling, structured PyMDP 1.0 POMDP execution, PyMDP Scaling Study, and MCP Full Module Exposure. +**Roadmap foundations (unreleased)**: model-family acceptance/interpretability ledgers for v1.9.0 reliability work. 📖 **DOI:** [10.5281/zenodo.7803328](https://doi.org/10.5281/zenodo.7803328) 📁 **Archive:** [zenodo.org/records/7803328](https://zenodo.org/records/7803328) diff --git a/TO-DO.md b/TO-DO.md index ffb9b8d1c..f5b410414 100644 --- a/TO-DO.md +++ b/TO-DO.md @@ -1,8 +1,17 @@ # TO-DO — GNN Pipeline Roadmap -**Last Updated**: 2026-05-08 -**Current Version**: 1.6.0 -**Next Target**: v1.7.0 +**Last Updated**: 2026-06-12 +**Current Version**: 1.8.0 +**Next Target**: v1.9.0 (model-family reliability and interpretability) + +**Current Evidence (2026-06-12)**: Maintained verifier gates pass on the +roadmap-hardening branch. Current inventory is `2397` collected tests with the +documented Ollama ignores. Latest full local suite evidence with the same +Ollama ignores is `2379 passed, 17 skipped, 1 xfailed`. v1.8.0 focused release +smokes passed for `gnn templates list`, `gnn templates show +pomdp-gridworld-3x3`, dry-run `gnn pull` to `/tmp/gnn-pull`, and authenticated +MCP HTTP tests (`12 passed`; combined CLI/MCP/capability suite `32 passed`); +`just lint` passes. --- @@ -15,7 +24,7 @@ - [x] **MCP Full Module Exposure** — All 25 pipeline modules + infrastructure modules expose tools via MCP files. Current audit coverage is tracked by `src/tests/mcp/test_mcp_audit.py`; the 2026-05-14 focused audit registered 133 tools and 1 resource. - [x] **PyMDP Scaling Study** — Automated scaling analysis pipeline (`scripts/run_pymdp_gnn_scaling_analysis.py`) with configurable N=[2,256] grids, exponential state-space sweeps, and 19-artifact visualization suite. - [x] **Test Suite Hardening** — Real-implementation coverage across all modules. Current collect-only inventory is tracked in `src/tests/TEST_SUITE_SUMMARY.md`; Hypothesis tests were refactored to deterministic parametric matrices. -- [x] **Documentation Integrity** — 105 `doc/gnn/` files, 34 `AGENTS.md` across `src/`, all version strings synchronized to `1.6.0`. Zero phantom file references. +- [x] **Documentation Integrity** — Maintained documentation coverage is enforced by `doc/development/docs_audit.py --strict --check-anchors --no-write`, GNN doc-pattern checks, maintained-doc terminology checks, and repo terminology checks. Zero phantom file references at the latest recorded verifier pass. - [x] **Enhanced Visual Logging** — Progress bars, color-coded output, structured summaries, correlation ID tracking, screen reader support across all 25 pipeline steps. - [x] **LLM & ML Fixes** — LLM recursive glob fix, ML cross-validation fold logic hardened (`min(5, len(X), min_class_count)`). @@ -24,6 +33,7 @@ ## 🎯 v1.7.0 — Multi-Agent Topologies & Interactive Frontends > **Scope**: Push the pipeline from single-agent generation to interactive, multi-agent architectures with real-time editing and streaming capabilities. +> **RC status**: Foundation contracts are implemented, but this release remains deferred until runtime-depth evidence catches up to the public claims. - [ ] **Multi-Agent Message Passing (RxInfer)** — Expand the `execute/` layer to handle clustered topologies (100+ agents) passing states asynchronously utilizing graph factorization in Julia via RxInfer.jl. - [ ] **Categorical Symmetries (DisCoPy)** — Sync matrix permutations natively to string diagrams, allowing visual topology validation before simulation generation. @@ -31,42 +41,103 @@ - [ ] **Audio Parameter Streaming** — Bridge Step 15 (Audio/Pedalboard/SAPF) to accept dynamic telemetry updates from long-running PyMDP agent simulations in real time. Extend the existing `process_realtime_chunk` pattern. - [ ] **3D Matrix Visualization** — Upgrade the Matrix Visualization module into interactive Three.js canvas structures for explorable generative model inspection. +Contract foundations now exist for these items: compact RxInfer agent +population keys, DisCoPy permutation metadata, GUI WebSocket message schemas, +audio telemetry chunk artifacts, and Three.js tensor explorer HTML artifacts. +The items stay unchecked until backend execution, UI runtime behavior, and +optional-framework integration are verified end to end. + +Review grouping for the current branch: +docs/verifiers; RxInfer/DisCoPy; GUI/audio/visualization; CLI/MCP/autonomy; +setup/scaling/test stabilization. + ### Acceptance ```bash -uv run python src/main.py --only-steps "11,12" --frameworks "rxinfer,discopy" --target-dir input/multi_agent_models --verbose -uv run pytest src/tests/test_audio*.py src/tests/test_gui*.py +uv run --extra dev python scripts/check_capability_contracts.py --strict +uv run --extra dev python src/main.py --only-steps "11,12" --frameworks "rxinfer,discopy" --target-dir input/multi_agent_models --output-dir /tmp/gnn-v17-multi-agent-accept --verbose +uv run --extra dev python -m pytest src/tests/audio src/tests/gui src/tests/render/test_rxinfer_multiagent_contract.py src/tests/render/test_discopy_symmetry_contract.py -q ``` --- -## 🌐 v1.8.0 — Developer Kit & Template Ecosystem +## ✅ v1.8.0 — Developer Kit & Template Ecosystem (Released) > **Scope**: Standardizing GNN as the definitive orchestration language with developer-grade tooling and reusable template packages. +> **Released**: 2026-06-12 (tag: `v1.8.0`) + +- [x] **GNN Template Library Engine** — Enable package-manager style downloads for specialized active-inference setups directly using `gnn pull [template_name]` via CLI (Step `src/cli/`). Maintained packaged template index, `gnn templates list`, `gnn templates show`, `gnn pull`, dry-run, checksum, collision, overwrite, wheel/install smoke, and index path-safety contracts are covered by focused release tests. +- [x] **Pre-commit Ecosystem** — Ship `.pre-commit-config.yaml`, `justfile`, `.devcontainer/` (Dockerfile + devcontainer.json), Ruff lint/format hooks, and general file-hygiene checks to make repository contributions more consistent. Dedicated secret-scanning is not currently claimed by this item. +- [x] **MCP Local HTTP Orchestration** — Extend MCP server from local tool discovery to authenticated local JSON-RPC HTTP orchestration with bearer-token auth, rate limiting, localhost default binding, safe-tool exposure, and default-denied resource reads by default. Missing/invalid tokens return `401`, configured rate limits return `429`, and resource reads are denied unless explicitly allowlisted. + +### Acceptance +```bash +uv run gnn templates list +uv run gnn templates show pomdp-gridworld-3x3 +uv run gnn pull pomdp-gridworld-3x3 --output-dir /tmp/gnn-pull --dry-run +GNN_MCP_TOKEN=local-dev-token uv run --extra dev python -m pytest src/tests/mcp/test_mcp_http_auth.py -q +just lint +``` + +--- + +## 🧭 v1.9.0 — Model-Family Reliability & Interpretability + +> **Scope**: Make broader families of generative models reliably traverse the maintained pipeline with validation, execution-status, telemetry, interpretability, and report evidence. -- [ ] **GNN Template Library Engine** — Enable package-manager style downloads for specialized active-inference setups directly using `gnn pull [template_name]` via CLI (Step `src/cli/`). -- [x] **Pre-commit Ecosystem** — Ship `.pre-commit-config.yaml`, `justfile` (21 recipes), `.devcontainer/` (Dockerfile + devcontainer.json), Ruff lint/format hooks, and secret detection to make repository contributions frictionless. -- [ ] **MCP Remote Orchestration** — Extend MCP server from local tool discovery to remote CI/CD agent manipulation with authenticated HTTP transport and rate limiting. +- [ ] **Model-Family Acceptance Harness** — Maintain `input/model_family_manifest.json` and run representative basics, discrete, continuous, hierarchical, multi-agent, precision, structured, gridworld, and scaling-study fixtures through pipeline evidence steps with explicit passed/skipped/failed statuses. +- [ ] **Cross-Step Evidence Ledger** — Link Step 3/5/6/11/12/15/16/23 evidence for each accepted family: parsed model identity, matrix dimensions, renderer status, execution status, telemetry, analysis, visualization, and report artifacts. +- [ ] **Interpretability Summaries** — Emit per-family variable/edge inventories, matrix-shape tables, optional observation/action/free-energy trace previews, renderer/execution status, and artifact links. + +Current RC foundation: the all-family strict harness parses real pipeline +summaries, fails closed when summaries or required per-step records are missing, +rejects incomplete `--only-steps` acceptance profiles, clears stale per-family +outputs before each run, and requires concrete artifacts for selected evidence +steps. `continuous` and `hierarchical` still expose raw failed Step 11/12 +outcomes; the ledger may record them as explicitly allowed unsupported +renderer/executor skips with incompatibility and `no_executable_scripts` +reasons. This is honest traversal evidence, not full backend reliability. ### Acceptance ```bash -gnn pull actinf-pomdp-2state # Template library works -just lint # Developer tooling works -mcp-test-client ping gnn-server # Remote MCP endpoint responds +uv run --extra dev python -m pytest src/tests/pipeline/test_model_family_acceptance.py src/tests/analysis/test_interpretability_summary.py src/tests/report/test_model_family_report.py -q +uv run --extra dev python scripts/run_model_family_acceptance.py --manifest input/model_family_manifest.json --families basics,discrete,multiagent,structured --output-dir /tmp/gnn-family-acceptance --strict +uv run --extra dev python scripts/run_model_family_acceptance.py --manifest input/model_family_manifest.json --output-dir /tmp/gnn-family-acceptance-all --strict +uv run --extra dev python src/main.py --target-dir input/gnn_files/discrete --output-dir /tmp/gnn-v19-discrete-smoke --skip-steps "2" --skip-llm --verbose ``` --- -## 🚀 v2.0.0 — Multimodal Autonomy & Self-Modifying Workflows +## 🧪 v2.0.0 — Semantic Fidelity & Cross-Framework Reliability + +> **Scope**: Upgrade GNN from broad fixture acceptance to stronger semantic preservation, cross-format round trips, and cross-framework equivalence checks. + +- [ ] **Semantic Round-Trip Gates** — Require representative model families to preserve variables, edges, dimensions, and key matrix contracts across maintained formats. +- [ ] **Cross-Framework Result Comparisons** — Compare compatible PyMDP, RxInfer, JAX, NumPyro, PyTorch, ActiveInference.jl, and DisCoPy outputs with explicit skipped/failed states for unavailable frameworks. +- [ ] **Release Readiness Ledger** — Produce one release ledger tying docs, verifier gates, collect-only inventory, focused tests, and acceptance smokes to checked roadmap items. + +--- + +## 🌱 v3.0.0 — Long-Running Orchestration & Distributed Ecology Plans + +> **Scope**: Prepare safe long-running orchestration, durable observation streams, and auditable container plans before any live infrastructure mutation. + +- [ ] **Durable Observation Streams** — Standardize file/array stream manifests and replayable execution traces before adding live sensors or device-backed streams. +- [ ] **Long-Running Pipeline Sessions** — Add resumable run manifests, status inspection, and cancellation-safe cleanup for extended model-family acceptance runs. +- [ ] **Auditable Container Plans** — Generate validated container plans with security review and rollback semantics; do not mutate real clusters. + +--- + +## 🚀 v4.0.0 — Bounded Autonomy & Self-Modifying Workflows -> **Scope**: Evolving the pipeline from a linear generator into a continuously-running autonomous ecology. Agents define, write, evaluate, and rewrite their own generative models. +> **Scope**: Only after v1.9, v2.0, and v3.0 reliability gates are release-grade, promote bounded proposal artifacts toward reviewed self-editing workflows. -- [ ] **Self-Modifying Active Inference** — Implement the capacity for the pipeline to self-recompile agent matrices based on failed execution evaluations, entering a recursive design loop. -- [ ] **Multimodal Agent Interfaces** — Integrate real-time vision processing into the `execute/` modules, allowing simulated agents to optimize policies based on dynamic streams natively defined in their notation. -- [ ] **Distributed Ecology Scaling** — Implement container orchestration allowing massive-scale distributed agent computing clusters triggered by GNN architecture definitions. +- [ ] **Autonomous Candidate Scoring** — Expand proposal-only candidate patch scoring using existing validators, model-family ledgers, execution summaries, and interpretability reports. +- [ ] **Reviewed Self-Editing GNN Files** — Add guarded workflows for proposing and applying GNN edits only after explicit user approval; no automatic source mutation. +- [ ] **Autonomous Ecology Controls** — Add policy, rollback, audit, and security controls before any self-editing or distributed mutation is permitted. ### Acceptance ```bash -uv run python src/main.py --autonomous --target-dir input/recursive_models/ +uv run python src/main.py --autonomous --target-dir input/recursive_models/ --output-dir /tmp/gnn-autonomous-smoke ``` --- @@ -74,5 +145,5 @@ uv run python src/main.py --autonomous --target-dir input/recursive_models/ ## Conventions - Versions follow [SemVer](https://semver.org/) — `MAJOR.MINOR.PATCH` -- All releases require 100% pipeline stability, real-implementation test coverage, documentation integrity, and verifiable console acceptance metrics. +- All releases require current verifier gates, real-implementation tests for changed surfaces, documentation integrity, and verifiable console acceptance metrics. - Items marked `[x]` are verified complete against the codebase, not estimated. diff --git a/doc/axiom/axiom_implementation/axiom_core_architecture.md b/doc/axiom/axiom_implementation/axiom_core_architecture.md index deacab486..192342302 100644 --- a/doc/axiom/axiom_implementation/axiom_core_architecture.md +++ b/doc/axiom/axiom_implementation/axiom_core_architecture.md @@ -144,4 +144,4 @@ Signature: Author: "AXIOM Research Team" Institution: "VERSES AI / Active Inference Institute" ContactEmail: "axiom-gnn@verses.ai" - DOI: "TBD - Not yet published" \ No newline at end of file + DOI: "Not assigned; draft specification" diff --git a/doc/axiom/axiom_implementation/axiom_identity_mixture_model.md b/doc/axiom/axiom_implementation/axiom_identity_mixture_model.md index 82b79bc36..112c43526 100644 --- a/doc/axiom/axiom_implementation/axiom_identity_mixture_model.md +++ b/doc/axiom/axiom_implementation/axiom_identity_mixture_model.md @@ -121,4 +121,4 @@ Signature: Author: "AXIOM Research Team - iMM Module" Institution: "VERSES AI / Active Inference Institute" ContactEmail: "axiom-imm@verses.ai" - DOI: "TBD - Not yet published" \ No newline at end of file + DOI: "Not assigned; draft specification" diff --git a/doc/axiom/axiom_implementation/axiom_planning.md b/doc/axiom/axiom_implementation/axiom_planning.md index e382df395..38f975df1 100644 --- a/doc/axiom/axiom_implementation/axiom_planning.md +++ b/doc/axiom/axiom_implementation/axiom_planning.md @@ -188,4 +188,4 @@ Signature: Author: "AXIOM Research Team - Planning Module" Institution: "VERSES AI / Active Inference Institute" ContactEmail: "axiom-planning@verses.ai" - DOI: "TBD - Not yet published" \ No newline at end of file + DOI: "Not assigned; draft specification" diff --git a/doc/axiom/axiom_implementation/axiom_recurrent_mixture_model.md b/doc/axiom/axiom_implementation/axiom_recurrent_mixture_model.md index 0437b9313..ac2e56414 100644 --- a/doc/axiom/axiom_implementation/axiom_recurrent_mixture_model.md +++ b/doc/axiom/axiom_implementation/axiom_recurrent_mixture_model.md @@ -181,4 +181,4 @@ Signature: Author: "AXIOM Research Team - rMM Module" Institution: "VERSES AI / Active Inference Institute" ContactEmail: "axiom-rmm@verses.ai" - DOI: "TBD - Not yet published" \ No newline at end of file + DOI: "Not assigned; draft specification" diff --git a/doc/axiom/axiom_implementation/axiom_slot_mixture_model.md b/doc/axiom/axiom_implementation/axiom_slot_mixture_model.md index a03a9f33e..e2f78e757 100644 --- a/doc/axiom/axiom_implementation/axiom_slot_mixture_model.md +++ b/doc/axiom/axiom_implementation/axiom_slot_mixture_model.md @@ -117,4 +117,4 @@ Signature: Author: "AXIOM Research Team - sMM Module" Institution: "VERSES AI / Active Inference Institute" ContactEmail: "axiom-smm@verses.ai" - DOI: "TBD - Not yet published" \ No newline at end of file + DOI: "Not assigned; draft specification" diff --git a/doc/axiom/axiom_implementation/axiom_structure_learning.md b/doc/axiom/axiom_implementation/axiom_structure_learning.md index 6d68d6d64..d925c1953 100644 --- a/doc/axiom/axiom_implementation/axiom_structure_learning.md +++ b/doc/axiom/axiom_implementation/axiom_structure_learning.md @@ -266,4 +266,4 @@ Signature: Author: "AXIOM Research Team - Structure Learning Module" Institution: "VERSES AI / Active Inference Institute" ContactEmail: "axiom-structure@verses.ai" - DOI: "TBD - Not yet published" \ No newline at end of file + DOI: "Not assigned; draft specification" diff --git a/doc/axiom/axiom_implementation/axiom_transition_mixture_model.md b/doc/axiom/axiom_implementation/axiom_transition_mixture_model.md index 1563e05d1..4481d00d9 100644 --- a/doc/axiom/axiom_implementation/axiom_transition_mixture_model.md +++ b/doc/axiom/axiom_implementation/axiom_transition_mixture_model.md @@ -128,4 +128,4 @@ Signature: Author: "AXIOM Research Team - tMM Module" Institution: "VERSES AI / Active Inference Institute" ContactEmail: "axiom-tmm@verses.ai" - DOI: "TBD - Not yet published" \ No newline at end of file + DOI: "Not assigned; draft specification" diff --git a/doc/gnn/README.md b/doc/gnn/README.md index c370c90c1..08e801271 100644 --- a/doc/gnn/README.md +++ b/doc/gnn/README.md @@ -1,7 +1,7 @@ # GNN Documentation Index **Version**: v1.6.0 Engine (Bundle v2.0.0) -**Last Updated**: 2026-05-08 +**Last Updated**: 2026-06-10 **Status**: Maintained **Pipeline Steps**: 25 @@ -252,6 +252,6 @@ All GNN documentation follows these principles: **GNN syntax standard**: v1.1 (see [gnn_syntax.md](gnn_syntax.md)) **Python package** (`generalized-notation-notation`): **1.6.0** ([pyproject.toml](../../pyproject.toml)) **Total Pipeline Steps**: 25 (0–24) -**Last Updated**: 2026-05-08 +**Last Updated**: 2026-06-10 **Status**: Maintained (re-run `uv run --extra dev python doc/development/docs_audit.py` after link or tree changes) diff --git a/doc/gnn/modules/02_tests.md b/doc/gnn/modules/02_tests.md index 9385d090b..912f2ce5d 100644 --- a/doc/gnn/modules/02_tests.md +++ b/doc/gnn/modules/02_tests.md @@ -494,7 +494,7 @@ def test_new_module_complex(): ### Test Files - **120+** `test_*.py` modules under `src/tests/` (exact count drifts; use `find src/tests -maxdepth 1 -name 'test_*.py' | wc -l`) -- **2,296** collected tests with standard Ollama integration ignores as measured by collect-only on 2026-06-09 +- **2,397** collected tests with standard Ollama integration ignores as measured by collect-only on 2026-06-12 - **20+ test categories** for organized execution - **25+ test markers** for selective execution diff --git a/doc/gnn/operations/improvement_analysis.md b/doc/gnn/operations/improvement_analysis.md index b4fff9823..6852067ef 100644 --- a/doc/gnn/operations/improvement_analysis.md +++ b/doc/gnn/operations/improvement_analysis.md @@ -1,11 +1,11 @@ # GNN Pipeline Improvement Analysis **Version**: v1.6.0 Engine (Bundle v2.0.0) -**Last Updated**: 2026-04-15 -**Status**: ✅ Production Ready +**Last Reviewed**: 2026-06-12 +**Status**: Historical analysis; use `TO-DO.md` and current verifier output for live release status **Modules**: 38+ · **Pipeline steps**: 25 · **Renderers**: 9 backends (see [../implementations/README.md](../implementations/README.md)) · **Tests**: see [../../../README.md](../../../README.md) -Comprehensive analysis of identified areas for improvement, streamlining, and ensuring robust functionality within and across modules. +Historical analysis of identified areas for improvement, streamlining, and ensuring robust functionality within and across modules. This page is not a current pass-rate, runtime, or release-readiness certificate. ## Pipeline Architecture References @@ -16,10 +16,9 @@ For current pipeline implementation and standards: - **[src/main.py](../../../src/main.py)**: Pipeline orchestrator implementation - **[architecture_reference.md](../reference/architecture_reference.md)**: Implementation patterns and cross-module data flow -- 25 steps (0-24): All operational with 100% success rate -- Execution time: ~2 minutes for full pipeline -- Memory usage: < 25MB peak (excellent efficiency) -- All modules use thin orchestrator pattern +- 25 steps (0-24) are maintained through the thin-orchestrator architecture. +- Current pass counts, runtime, optional-backend availability, and release evidence must be taken from live command output and `TO-DO.md`. +- Optional frameworks must be reported as passed, skipped, or failed explicitly; fallback behavior is not release evidence. --- @@ -262,7 +261,8 @@ def process_gnn_content(content: str) -> dict: **Issues:** -- Pipeline step template contains TODOs and example comments (`src/pipeline_step_template.py:47-51`) +- Pipeline step template has been replaced by maintained thin-orchestrator + examples; audit current numbered scripts before filing new template issues. - Mixed step counting (template says 13 steps, actual pipeline has 24) - Inconsistent validation patterns @@ -292,9 +292,12 @@ def process_gnn_content(content: str) -> dict: ### Priority 1: High (Immediate Attention) -## Implementation Plan +## Historical Remediation Outline -### Phase 1: Critical Infrastructure (Weeks 1-2) +The outline below records improvement themes from the earlier analysis. Treat it +as background context, not as an active dated schedule. + +### Phase 1: Critical Infrastructure 1. **Standardize Dependency Management** - Expand `DependencyManager` to handle all modules @@ -306,7 +309,7 @@ def process_gnn_content(content: str) -> dict: - Standardize correlation ID system across modules - Implement consistent recovery strategies -### Phase 2: Cross-Module Standards (Weeks 3-4) +### Phase 2: Cross-Module Standards 1. **Data Exchange Standards** - Create `src/data_contracts.py` with schemas @@ -318,7 +321,7 @@ def process_gnn_content(content: str) -> dict: - Handle Python 3.13 compatibility centrally - Standardize alternative function patterns -### Phase 3: MCP Completion (Weeks 5-6) +### Phase 3: MCP Completion 1. **Complete MCP Integration** - Fix render and execute module import issues @@ -328,7 +331,7 @@ def process_gnn_content(content: str) -> dict: ## Success Metrics -### Quantitative Targets +### Historical Quantitative Targets - **Dependency Success Rate**: 95%+ modules load without errors - **Cross-Module Communication**: 100% standardized data formats diff --git a/doc/tutorials/interactive_tutorial_system.md b/doc/tutorials/interactive_tutorial_system.md index 29289ba86..92f62b8f8 100644 --- a/doc/tutorials/interactive_tutorial_system.md +++ b/doc/tutorials/interactive_tutorial_system.md @@ -448,4 +448,4 @@ python src/tutorials/quick_start_wizard.py --- **Status**: Design Phase - Proposed Interactive Platform -**Next Steps**: Launch Tutorials (TBD) | Tutorial Development (TBD) +**Next Steps**: Not launched; tutorial development remains proposed and is tracked through the maintained roadmap. diff --git a/input/model_family_manifest.json b/input/model_family_manifest.json new file mode 100644 index 000000000..6eca3c6ad --- /dev/null +++ b/input/model_family_manifest.json @@ -0,0 +1,94 @@ +{ + "schema": "gnn_model_family_manifest_v1", + "acceptance_profile_defaults": { + "required_steps": [3, 5, 6, 15, 16, 23], + "evidence_steps": [11, 12], + "allow_unsupported_steps": [], + "allow_unsupported_reason_patterns": [] + }, + "families": [ + { + "name": "basics", + "description": "Minimal perception fixtures used for parser and validator smoke coverage.", + "target_dir": "input/gnn_files/basics", + "frameworks": "pymdp", + "representative_files": ["dynamic_perception.md"] + }, + { + "name": "discrete", + "description": "Discrete POMDP and HMM-style active inference fixtures.", + "target_dir": "input/gnn_files/discrete", + "frameworks": "pymdp", + "representative_files": ["actinf_pomdp_agent.md", "simple_mdp.md"] + }, + { + "name": "continuous", + "description": "Continuous-state and stochastic-dynamics fixtures.", + "target_dir": "input/gnn_files/continuous", + "frameworks": "jax", + "representative_files": ["continuous_navigation.md", "stochastic_dynamics.md"], + "acceptance_profile": { + "evidence_steps": [], + "allow_unsupported_steps": [11, 12], + "allow_unsupported_reason_patterns": [ + "POMDP not compatible", + "Missing required matrices", + "no_executable_scripts", + "No executable scripts found" + ] + } + }, + { + "name": "hierarchical", + "description": "Hierarchical and temporal model fixtures.", + "target_dir": "input/gnn_files/hierarchical", + "frameworks": "pymdp", + "representative_files": ["hierarchical_pomdp.md", "temporal_hierarchy.md"], + "acceptance_profile": { + "evidence_steps": [], + "allow_unsupported_steps": [11, 12], + "allow_unsupported_reason_patterns": [ + "POMDP not compatible", + "Missing required matrices", + "no_executable_scripts", + "No executable scripts found" + ] + } + }, + { + "name": "multiagent", + "description": "Multi-agent coordination and swarm fixtures.", + "target_dir": "input/multi_agent_models", + "frameworks": "rxinfer", + "representative_files": ["multi_agent_coordination.md"] + }, + { + "name": "precision", + "description": "Precision weighting and curiosity-driven fixtures.", + "target_dir": "input/gnn_files/precision", + "frameworks": "pymdp", + "representative_files": ["curiosity_driven_agent.md", "precision_weighted.md"] + }, + { + "name": "structured", + "description": "Structured factor graph and posterior fixtures.", + "target_dir": "input/gnn_files/structured", + "frameworks": "pymdp", + "representative_files": ["factor_graph_model.md"] + }, + { + "name": "gridworld", + "description": "Gridworld POMDP fixture used for cross-framework acceptance checks.", + "target_dir": "input/gnn_files/pomdp_gridworld", + "frameworks": "pymdp", + "representative_files": ["pomdp_gridworld_3x3.md"] + }, + { + "name": "scaling-study", + "description": "PyMDP scaling-study fixtures, sampled conservatively for acceptance.", + "target_dir": "input/gnn_files/pymdp_scaling_study", + "frameworks": "pymdp", + "representative_files": ["pymdp_scaling_N2_T10.md", "pymdp_scaling_N4_T10.md"] + } + ] +} diff --git a/input/multi_agent_models/README.md b/input/multi_agent_models/README.md new file mode 100644 index 000000000..111247110 --- /dev/null +++ b/input/multi_agent_models/README.md @@ -0,0 +1,6 @@ +# Multi-Agent Model Fixtures + +This directory holds compact multi-agent GNN fixtures used by roadmap +acceptance commands. The maintained examples under `input/gnn_files/multiagent/` +remain the canonical authored models; this directory exists so Step 11/12 +acceptance commands target a real directory. diff --git a/input/multi_agent_models/multi_agent_coordination.md b/input/multi_agent_models/multi_agent_coordination.md new file mode 100644 index 000000000..eb6eacfc5 --- /dev/null +++ b/input/multi_agent_models/multi_agent_coordination.md @@ -0,0 +1,39 @@ +# Multi-Agent Coordination Acceptance Fixture + +## ModelName +Multi-Agent Coordination Acceptance Fixture + +## ModelAnnotation +Compact fixture for RxInfer and DisCoPy roadmap acceptance tests. + +## StateSpaceBlock +s[2,1,type=categorical] +o[2,1,type=categorical] +u[2,1,type=categorical] + +## Connections +s > o +s > s +u > s + +## InitialParameterization +nr_agents=3 +agent_ids=[1,2,3] +agent_initial_positions=[[0.0,0.0],[1.0,0.0],[0.0,1.0]] +agent_target_positions=[[2.0,2.0],[3.0,2.0],[2.0,3.0]] +agent_radii=[1.0,1.0,1.0] +agent_edges=[[1,2],[2,3]] +agent_clusters=[{"name":"left","agent_ids":[1,2]},{"name":"right","agent_ids":[3]}] +message_passing=clustered_mean_field +A={(0.9,0.1),(0.1,0.9)} +B={((0.9,0.1),(0.1,0.9)),((0.1,0.9),(0.9,0.1))} +C={(1.0,0.0)} +D={(0.5,0.5)} + +## Time +Dynamic + +## ActInfOntologyAnnotation +s=HiddenState +o=Observation +u=Action diff --git a/input/recursive_models/README.md b/input/recursive_models/README.md new file mode 100644 index 000000000..8acad675b --- /dev/null +++ b/input/recursive_models/README.md @@ -0,0 +1,5 @@ +# Recursive Model Fixtures + +This directory is the default target for bounded `--autonomous` proposal-loop +acceptance commands. Autonomous mode writes candidate proposals under +`output/autonomous/` and does not edit files in this directory. diff --git a/pyproject.toml b/pyproject.toml index b8ed563dd..959b04ed6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "generalized-notation-notation" -version = "1.6.0" +version = "1.8.0" description = "A text-based language for standardizing Active Inference generative models" readme = "README.md" requires-python = ">=3.11,<3.14" diff --git a/scripts/check_capability_contracts.py b/scripts/check_capability_contracts.py new file mode 100644 index 000000000..d92abf810 --- /dev/null +++ b/scripts/check_capability_contracts.py @@ -0,0 +1,457 @@ +#!/usr/bin/env python3 +"""Audit roadmap-visible capability claims against source support.""" + +from __future__ import annotations + +import argparse +import re +import sys +from pathlib import Path +from typing import Iterable, List + +REPO_ROOT = Path(__file__).resolve().parents[1] + + +def _read(path: str) -> str: + return (REPO_ROOT / path).read_text(encoding="utf-8") + + +def _exists(path: str) -> bool: + return (REPO_ROOT / path).exists() + + +def _contains(path: str, patterns: Iterable[str]) -> bool: + text = _read(path) + return all(pattern in text for pattern in patterns) + + +def _maintained_test_directory_counts() -> tuple[int, int]: + tests_root = REPO_ROOT / "src" / "tests" + directories = [ + path + for path in tests_root.iterdir() + if path.is_dir() and not path.name.startswith("__") + ] + with_direct_tests = [path for path in directories if any(path.glob("test_*.py"))] + return len(directories), len(with_direct_tests) + + +def run_audit() -> List[str]: + """Return a list of contract violations.""" + failures: List[str] = [] + + stale_patterns = ( + "Last Updated**: 2026-05-08", + "2,250 passed", + "7 skipped, 1 xpassed", + ) + for doc in ( + "TO-DO.md", + "DOCS.md", + "ARCHITECTURE.md", + ".agent_rules/README.md", + "doc/gnn/README.md", + "doc/gnn/modules/02_tests.md", + "src/AGENTS.md", + "src/tests/TEST_SUITE_SUMMARY.md", + ): + text = _read(doc) + for pattern in stale_patterns: + if pattern in text: + failures.append(f"{doc}: stale measured-doc pattern remains: {pattern}") + + improvement_text = _read("doc/gnn/operations/improvement_analysis.md") + for pattern in ( + "100% success rate", + "Execution time: ~2 minutes", + "Week 1:", + "Week 2:", + "Implementation Plan", + "Implementation Timeline", + ): + if pattern in improvement_text: + failures.append( + "doc/gnn/operations/improvement_analysis.md: " + f"stale live-status wording remains: {pattern}" + ) + + cli_text = _read("src/cli/__init__.py") + for command in ('add_parser("templates"', 'add_parser("show"', 'add_parser("pull"'): + if command not in cli_text: + failures.append( + f"src/cli/__init__.py: missing CLI command contract {command}" + ) + if not _exists("src/cli/templates.py"): + failures.append("src/cli/templates.py: template library implementation missing") + else: + templates_text = _read("src/cli/templates.py") + for required in ( + "importlib import resources", + "_validate_template_record", + "template_assets", + "PurePosixPath", + "destination.is_symlink()", + ): + if required not in templates_text: + failures.append( + f"src/cli/templates.py: missing package-data/template safety guard {required}" + ) + if not _exists("src/cli/template_index.json"): + failures.append("src/cli/template_index.json: external template index missing") + elif "pomdp-gridworld-3x3" not in _read("src/cli/template_index.json"): + failures.append("src/cli/template_index.json: gridworld template missing") + + todo_text = _read("TO-DO.md") + if ( + "**Next Target**: v1.8.0" in todo_text + and "v1.7.0 remains foundation-only/deferred" not in todo_text + ): + failures.append( + "TO-DO.md: v1.8.0 is next while v1.7.0 remains unchecked without an explicit deferred/foundation-only status" + ) + + readme_tests = _read("src/tests/README.md") + maintained_dirs, direct_test_dirs = _maintained_test_directory_counts() + expected_count_text = ( + f"{maintained_dirs} maintained first-level subdirectories; " + f"{direct_test_dirs} contain direct test files" + ) + if expected_count_text not in readme_tests: + failures.append( + "src/tests/README.md: maintained test-directory count drift; " + f"expected '{expected_count_text}'" + ) + + main_acceptance_without_output_dir = _main_commands_missing_isolated_output_dir( + todo_text + ) + for command in main_acceptance_without_output_dir: + failures.append( + "TO-DO.md: acceptance pipeline command must use an isolated /tmp output dir: " + f"{command}" + ) + + guarded_pending_items = ( + "Multi-Agent Message Passing (RxInfer)", + "Categorical Symmetries (DisCoPy)", + "Reactive WebSocket GUI", + "Audio Parameter Streaming", + "3D Matrix Visualization", + "GNN Template Library Engine", + "MCP Local HTTP Orchestration", + "Model-Family Acceptance Harness", + "Cross-Step Evidence Ledger", + "Interpretability Summaries", + "Semantic Round-Trip Gates", + "Cross-Framework Result Comparisons", + "Release Readiness Ledger", + "Durable Observation Streams", + "Long-Running Pipeline Sessions", + "Auditable Container Plans", + "Autonomous Candidate Scoring", + "Reviewed Self-Editing GNN Files", + "Autonomous Ecology Controls", + ) + release_evidence_by_item = { + "GNN Template Library Engine": ( + "gnn templates list", + "gnn templates show pomdp-gridworld-3x3", + "gnn pull` to `/tmp/gnn-pull", + "combined CLI/MCP/capability suite `32 passed`", + ), + "MCP Local HTTP Orchestration": ( + "authenticated\nMCP HTTP tests (`12 passed`", + "combined CLI/MCP/capability suite `32 passed`", + "`just lint` passes", + ), + } + for item in guarded_pending_items: + if f"- [x] **{item}**" in todo_text: + required_evidence = release_evidence_by_item.get(item) + if not required_evidence or not all( + evidence in todo_text for evidence in required_evidence + ): + failures.append( + f"TO-DO.md: {item} is marked complete before release-readiness evidence" + ) + for path in ("input/multi_agent_models", "src/tests/audio", "src/tests/gui"): + if path in todo_text and not _exists(path): + failures.append(f"TO-DO.md: acceptance path does not exist: {path}") + for path in ( + "input/model_family_manifest.json", + "scripts/run_model_family_acceptance.py", + "src/tests/pipeline/test_model_family_acceptance.py", + "src/tests/analysis/test_interpretability_summary.py", + "src/tests/report/test_model_family_report.py", + ): + if path in todo_text and not _exists(path): + failures.append(f"TO-DO.md: acceptance path does not exist: {path}") + + roadmap_sections = _split_todo_sections(todo_text) + early_versions = ("v1.8.0", "v1.9.0", "v2.0.0", "v3.0.0") + autonomy_patterns = ( + "Self-Modifying", + "self-editing", + "self editing", + "rewrite their own", + "autonomous ecology", + ) + for version in early_versions: + section = roadmap_sections.get(version, "") + for pattern in autonomy_patterns: + if pattern.lower() in section.lower(): + failures.append( + f"TO-DO.md: autonomy/self-editing claim appears before v4.0.0 in {version}" + ) + if ( + "v4.0.0" not in roadmap_sections + or "--autonomous" not in roadmap_sections["v4.0.0"] + ): + failures.append("TO-DO.md: bounded autonomous mode must be scoped under v4.0.0") + + for required in ( + "input/model_family_manifest.json", + "scripts/run_model_family_acceptance.py", + "src/pipeline/model_family_acceptance.py", + "src/analysis/interpretability.py", + "src/report/model_family.py", + ): + if not _exists(required): + failures.append(f"v1.9 model-family contract missing: {required}") + + if "WebSocket" in todo_text: + if not _contains( + "src/gui/websocket_bridge.py", + ( + "model.load", + "matrix.patch", + "validation.result", + "model.export", + "error", + ), + ): + failures.append( + "src/gui/websocket_bridge.py: missing required GUI message types" + ) + + docs_with_three = [ + path + for path in ("TO-DO.md", "src/advanced_visualization/README.md") + if re.search(r"Three\.js|three\.js", _read(path)) + ] + if docs_with_three and not _contains( + "src/visualization/matrix/visualizer.py", + ("generate_threejs_tensor_explorer", "three@"), + ): + failures.append( + "Three.js is documented but matrix Three.js renderer is missing" + ) + + if "GNN_MCP_TOKEN" in todo_text or "authenticated HTTP" in todo_text: + if not _contains( + "src/mcp/server_http.py", ("GNN_MCP_TOKEN", "Authorization", "Bearer") + ): + failures.append( + "MCP HTTP auth is documented but bearer-token gate is missing" + ) + server_http_text = _read("src/mcp/server_http.py") + if "GNN_MCP_ALLOW_INSECURE_LOCAL" not in server_http_text: + failures.append( + "src/mcp/server_http.py: insecure local HTTP opt-in variable missing" + ) + if "is_loopback_client" not in server_http_text: + failures.append( + "src/mcp/server_http.py: insecure local HTTP opt-in must be loopback-gated" + ) + if "get_environment_info" in _safe_allowlist_literal(server_http_text): + failures.append( + "src/mcp/server_http.py: get_environment_info must not be safe by default" + ) + if "get_system_info" in _safe_allowlist_literal(server_http_text): + failures.append( + "src/mcp/server_http.py: get_system_info must not be safe by default" + ) + if "GNN_MCP_SAFE_RESOURCES" not in server_http_text: + failures.append( + "src/mcp/server_http.py: missing explicit HTTP resource allowlist" + ) + if "is_safe_http_resource" not in server_http_text: + failures.append( + "src/mcp/server_http.py: mcp.resource.get must be default-denied" + ) + if "get_http_capabilities" not in server_http_text: + failures.append( + "src/mcp/server_http.py: HTTP capabilities must be allowlist-filtered" + ) + do_post_index = server_http_text.find("def do_POST") + auth_index = server_http_text.find("is_authorized(", do_post_index) + rate_index = server_http_text.find("is_rate_limited(", do_post_index) + if auth_index != -1 and rate_index != -1 and auth_index < rate_index: + failures.append( + "src/mcp/server_http.py: rate limiting must run before bearer auth" + ) + + acceptance_text = _read("src/pipeline/model_family_acceptance.py") + if "_load_pipeline_summary" not in acceptance_text: + failures.append( + "src/pipeline/model_family_acceptance.py: missing pipeline summary parsing" + ) + if "_selected_steps_passed" not in acceptance_text: + failures.append( + "src/pipeline/model_family_acceptance.py: missing selected-step status gate" + ) + if "acceptance_profile_defaults" not in _read("input/model_family_manifest.json"): + failures.append( + "input/model_family_manifest.json: missing manifest-level acceptance profiles" + ) + if "pipeline_passed = False" not in acceptance_text: + failures.append( + "src/pipeline/model_family_acceptance.py: missing pipeline-summary fail-closed path" + ) + if ( + "return_code in {0, 2}" in acceptance_text + and "pipeline_summary" not in acceptance_text + ): + failures.append( + "src/pipeline/model_family_acceptance.py: return code 2 is accepted without summary evidence" + ) + for required in ( + "_reset_family_dir", + "STEP_ARTIFACT_REQUIREMENTS", + "missing_artifact_evidence", + "_pipeline_run_outcome_acceptable", + "partial_render_failure", + ): + if required not in acceptance_text: + failures.append( + "src/pipeline/model_family_acceptance.py: " + f"missing model-family oracle hardening marker {required}" + ) + + rxinfer_toml_text = _read("src/render/rxinfer/toml_generator.py") + if not all( + required in rxinfer_toml_text + for required in ( + "agent_ids", + "agent_initial_positions", + "agent_target_positions", + ) + ): + failures.append("RxInfer compact multi-agent keys are missing") + for required in ( + "_validate_topology_references", + "Malformed topology edge", + "members must be a list", + ): + if required not in rxinfer_toml_text: + failures.append( + "src/render/rxinfer/toml_generator.py: " + f"missing topology fail-closed marker {required}" + ) + + rxinfer_renderer_text = _read("src/render/rxinfer/rxinfer_renderer.py") + execute_text = _read("src/execute/processor.py") + for required in ("script_sha256", "metadata_provenance"): + if required not in rxinfer_renderer_text or required not in execute_text: + failures.append( + "RxInfer execution metadata missing sidecar provenance/hash guard: " + f"{required}" + ) + + audio_processor_text = _read("src/audio/processor.py") + audio_streaming_text = _read("src/audio/streaming.py") + for required in ( + "telemetry_provenance", + "relative_to(execution_output_dir.resolve())", + 'write_stream_summary([], output_dir / "audio_stream_chunks.json")', + ): + if required not in audio_processor_text: + failures.append( + f"src/audio/processor.py: missing audio streaming guard {required}" + ) + if '"streaming_safe": False' not in audio_streaming_text: + failures.append( + "src/audio/streaming.py: empty telemetry chunks must not be streaming-safe" + ) + + if "pip install" in _read("src/render/discopy/discopy_renderer.py"): + failures.append("DisCoPy renderer still emits runtime dependency installation") + + if not _exists("src/pipeline/autonomous.py"): + failures.append("Autonomous proposal loop implementation missing") + autonomous_text = ( + _read("src/pipeline/autonomous.py") + if _exists("src/pipeline/autonomous.py") + else "" + ) + for required in ("source_mutation_performed", "cluster_mutation_performed"): + if required not in autonomous_text: + failures.append( + f"Autonomous proposal loop missing non-mutation marker: {required}" + ) + if "--autonomous" not in _read("src/utils/argument_utils.py"): + failures.append("Pipeline argument parser missing --autonomous") + + return failures + + +def _main_commands_missing_isolated_output_dir(todo_text: str) -> list[str]: + """Return acceptance commands that run src/main.py without /tmp output.""" + failures: list[str] = [] + for line in todo_text.splitlines(): + command = line.strip() + if not command or command.startswith("#"): + continue + if "src/main.py" not in command: + continue + if "--output-dir /tmp/" not in command: + failures.append(command) + return failures + + +def _safe_allowlist_literal(source: str) -> str: + """Extract the default MCP safe-tool allowlist source text.""" + match = re.search( + r"DEFAULT_SAFE_HTTP_TOOL_NAMES\s*=\s*frozenset\(\s*\{(.*?)\}\s*\)", + source, + re.DOTALL, + ) + return match.group(1) if match else "" + + +def _split_todo_sections(todo_text: str) -> dict[str, str]: + """Return roadmap sections keyed by semantic version heading.""" + sections: dict[str, str] = {} + matches = list(re.finditer(r"^## .*?(v\d+\.\d+\.\d+).*?$", todo_text, re.MULTILINE)) + for index, match in enumerate(matches): + start = match.start() + end = matches[index + 1].start() if index + 1 < len(matches) else len(todo_text) + sections[match.group(1)] = todo_text[start:end] + return sections + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--strict", + action="store_true", + help="Compatibility flag; failures are strict by default", + ) + parser.add_argument( + "--warn-only", + action="store_true", + help="Report failures without a nonzero exit", + ) + args = parser.parse_args(argv) + + failures = run_audit() + if failures: + for failure in failures: + print(f"FAIL: {failure}") + return 0 if args.warn_only else 1 + print("Capability contracts verified") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/run_model_family_acceptance.py b/scripts/run_model_family_acceptance.py new file mode 100644 index 000000000..df64323e8 --- /dev/null +++ b/scripts/run_model_family_acceptance.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 +"""Run manifest-driven GNN model-family acceptance checks.""" + +from __future__ import annotations + +import argparse +import sys +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parents[1] +SRC_DIR = REPO_ROOT / "src" +if str(SRC_DIR) not in sys.path: + sys.path.insert(0, str(SRC_DIR)) + +from pipeline.model_family_acceptance import run_model_family_acceptance + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--manifest", + type=Path, + default=Path("input/model_family_manifest.json"), + help="Path to the model-family manifest", + ) + parser.add_argument( + "--families", + default="", + help="Comma-separated family names to run; defaults to all families", + ) + parser.add_argument( + "--output-dir", + type=Path, + required=True, + help="Directory for acceptance ledger artifacts", + ) + parser.add_argument( + "--only-steps", + default="3,5,6,11,12,15,16,23", + help="Pipeline steps to run for each family; use empty string for full pipeline", + ) + parser.add_argument( + "--frameworks", + default="", + help="Override manifest renderer/executor frameworks for evidence-step runs", + ) + parser.add_argument("--strict", action="store_true", help="Fail on family failure") + args = parser.parse_args(argv) + + families = [item.strip() for item in args.families.split(",") if item.strip()] + try: + ledger = run_model_family_acceptance( + args.manifest, + args.output_dir, + family_names=families, + only_steps=args.only_steps or None, + frameworks=args.frameworks or None, + strict=args.strict, + ) + except (FileNotFoundError, KeyError, RuntimeError, ValueError) as exc: + print(f"FAIL: {exc}", file=sys.stderr) + return 1 + print(f"Model-family acceptance {ledger['status']}: {args.output_dir}") + return 0 if ledger["status"] == "passed" else 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/run_pymdp_gnn_scaling_analysis.py b/scripts/run_pymdp_gnn_scaling_analysis.py index 42f74e348..e614de9ba 100755 --- a/scripts/run_pymdp_gnn_scaling_analysis.py +++ b/scripts/run_pymdp_gnn_scaling_analysis.py @@ -39,7 +39,10 @@ PROJECT_ROOT = Path(__file__).resolve().parent.parent sys.path.append(str(PROJECT_ROOT)) -from pymdp_spec_generator import estimate_gnn_file_bytes, generate_gnn_file +from pymdp_spec_generator import ( # type: ignore[import-not-found] + estimate_gnn_file_bytes, + generate_gnn_file, +) from utils.visual_logging import VisualConfig, create_visual_logger @@ -420,7 +423,7 @@ def _write_run_manifest(pipeline_output_dir: Path, manifest: dict[str, object]) return path -def _usage_snapshot(path: Path) -> dict[str, int | float]: +def _usage_snapshot(path: Path) -> dict[str, Any]: u = shutil.disk_usage(path) total = u.total used = u.total - u.free @@ -646,7 +649,7 @@ def _load_and_validate_config() -> dict: try: with open(CONFIG_FILE, "r") as f: config = yaml.safe_load(f) - if config is None: + if not isinstance(config, dict): config = {} return config except yaml.YAMLError as e: diff --git a/src/AGENTS.md b/src/AGENTS.md index 9f53a3dae..b6c7b507b 100644 --- a/src/AGENTS.md +++ b/src/AGENTS.md @@ -189,10 +189,10 @@ graph TD --tb=no --ignore=src/tests/llm/test_llm_ollama.py --ignore=src/tests/llm/test_llm_ollama_integration.py`. Re-include the two Ollama files when `ollama` is installed and reachable. -- **Current test inventory (2026-06-09)**: 171 `test_*.py` files under `src/tests/`; - the command-of-record collect pass with Ollama integration tests ignored collected 2,296 tests. - Latest recorded full suite with the same Ollama integration excludes passed on 2026-06-09: - 2,281 passed, 14 skipped, 1 xfailed in 744.50s. +- **Current test inventory (2026-06-12)**: 184 `test_*.py` files under `src/tests/`; + the command-of-record collect pass with Ollama integration tests ignored collected 2,397 tests. + Latest recorded full suite evidence with the same Ollama integration excludes is + 2,379 passed, 17 skipped, 1 xfailed. - All 25 orchestrator scripts comply with the <150 line thin orchestrator pattern. - Maintained source/test documentation coverage is enforced by `doc/development/docs_audit.py --strict`. @@ -341,7 +341,7 @@ pytest --cov=src --cov-report=term-missing --- -**Last Updated**: 2026-06-09 -**Pipeline Version**: 1.6.0 +**Last Updated**: 2026-06-11 +**Pipeline Version**: 1.8.0 **Total Steps**: 25 (0-24) **Status**: Maintained diff --git a/src/analysis/interpretability.py b/src/analysis/interpretability.py new file mode 100644 index 000000000..f383913ad --- /dev/null +++ b/src/analysis/interpretability.py @@ -0,0 +1,303 @@ +"""Model-family interpretability summaries for acceptance evidence.""" + +from __future__ import annotations + +import json +import re +from pathlib import Path +from typing import Any, Dict, Iterable, List + +from gnn.discovery import is_model_source_path +from gnn.pomdp_extractor import POMDPExtractor +from gnn.schema import parse_connections, parse_state_space, validate_matrix_dimensions + +TRACE_KEYS = ( + "free_energy_trace", + "belief_trace", + "action_trace", + "actions", + "observations", + "expected_free_energy", +) + + +def build_model_interpretability_summary( + model_path: Path, pipeline_output_dir: Path | None = None +) -> Dict[str, Any]: + """Summarize one GNN model from parser, POMDP, and artifact evidence.""" + content = model_path.read_text(encoding="utf-8") + variables, variable_errors = parse_state_space(content, file_path=str(model_path)) + variable_names = {variable.name for variable in variables} + connections, connection_errors = parse_connections( + content, + known_variables=variable_names, + file_path=str(model_path), + ) + matrix_errors = validate_matrix_dimensions( + content, variables, file_path=str(model_path) + ) + pomdp_space = POMDPExtractor(strict_validation=False).extract_from_gnn_content( + content + ) + matrices = pomdp_space.matrices if pomdp_space and pomdp_space.matrices else {} + pipeline_output_dir = pipeline_output_dir or Path() + telemetry_preview = _collect_trace_preview(pipeline_output_dir, model_path.stem) + return { + "model_name": _extract_model_name(content) or model_path.stem, + "source_file": str(model_path), + "variables": [ + { + "name": variable.name, + "dimensions": variable.dimensions, + "dtype": variable.dtype, + } + for variable in variables + ], + "variable_count": len(variables), + "connections": [ + { + "source": edge.source, + "target": edge.target, + "directed": edge.directed, + "label": edge.label, + } + for edge in connections + ], + "connection_count": len(connections), + "matrix_shapes": { + name: _shape_of(value) for name, value in sorted(matrices.items()) + }, + "validation_messages": [ + str(error) + for error in [*variable_errors, *connection_errors, *matrix_errors] + ], + "pipeline_evidence": _collect_pipeline_evidence(pipeline_output_dir), + "telemetry_present": bool(telemetry_preview), + "telemetry_preview": telemetry_preview, + "artifact_links": _collect_artifact_links(pipeline_output_dir, model_path.stem), + } + + +def build_family_interpretability_summary( + family_name: str, target_dir: Path, pipeline_output_dir: Path | None = None +) -> Dict[str, Any]: + """Summarize all representative GNN files for one accepted model family.""" + model_paths = [ + path for path in sorted(target_dir.rglob("*.md")) if is_model_source_path(path) + ] + model_summaries = [ + build_model_interpretability_summary(path, pipeline_output_dir) + for path in model_paths + ] + return { + "schema": "gnn_model_family_interpretability_v1", + "family": family_name, + "target_dir": str(target_dir), + "model_count": len(model_summaries), + "models": model_summaries, + "totals": { + "variables": sum(model["variable_count"] for model in model_summaries), + "connections": sum(model["connection_count"] for model in model_summaries), + "validation_messages": sum( + len(model["validation_messages"]) for model in model_summaries + ), + "models_with_telemetry": sum( + 1 for model in model_summaries if model["telemetry_present"] + ), + }, + } + + +def render_family_interpretability_markdown(summary: Dict[str, Any]) -> str: + """Render a compact family interpretability Markdown report.""" + lines = [ + f"# Model Family Interpretability: {summary['family']}", + "", + f"- Target directory: {summary['target_dir']}", + f"- Models: {summary['model_count']}", + f"- Variables: {summary['totals']['variables']}", + f"- Connections: {summary['totals']['connections']}", + f"- Validation messages: {summary['totals']['validation_messages']}", + f"- Models with telemetry: {summary['totals']['models_with_telemetry']}", + "", + "| Model | Variables | Connections | Matrices | Render | Execute | Telemetry | Artifacts |", + "| --- | ---: | ---: | --- | --- | --- | --- | ---: |", + ] + for model in summary["models"]: + matrix_names = ", ".join(sorted(model["matrix_shapes"])) or "none" + evidence = model.get("pipeline_evidence", {}) + lines.append( + "| {name} | {variables} | {connections} | {matrices} | {render} | {execute} | {telemetry} | {artifacts} |".format( + name=model["model_name"], + variables=model["variable_count"], + connections=model["connection_count"], + matrices=matrix_names, + render=evidence.get("render_status", "unknown"), + execute=evidence.get("execution_status", "unknown"), + telemetry="present" if model.get("telemetry_present") else "missing", + artifacts=len(model["artifact_links"]), + ) + ) + return "\n".join(lines) + "\n" + + +def _extract_model_name(content: str) -> str | None: + match = re.search(r"^## ModelName\s*\n(?P.+?)\s*$", content, re.MULTILINE) + return match.group("name").strip() if match else None + + +def _shape_of(value: Any) -> List[int]: + if isinstance(value, (list, tuple)): + if not value: + return [0] + return [len(value), *_shape_of(value[0])] + return [] + + +def _collect_trace_preview(output_dir: Path, model_stem: str) -> Dict[str, Any]: + if not output_dir.exists(): + return {} + previews: Dict[str, Any] = {} + for json_path in _candidate_json_files(output_dir, model_stem): + payload = _load_json(json_path) + for key, value in _walk_trace_values(payload): + if key not in previews: + previews[key] = value[:5] if isinstance(value, list) else value + return previews + + +def _collect_artifact_links(output_dir: Path, model_stem: str) -> List[str]: + if not output_dir.exists(): + return [] + artifact_paths = [ + path + for path in output_dir.rglob("*") + if path.is_file() and model_stem.lower() in str(path).lower() + ] + return [str(path) for path in sorted(artifact_paths)[:25]] + + +def _collect_pipeline_evidence(output_dir: Path) -> Dict[str, Any]: + summary = _load_pipeline_summary(output_dir) + step_statuses = _extract_step_statuses(summary) + execution_summary = _load_first_json( + output_dir, + ("execution_summary.json", "execute_summary.json"), + ) + render_summary = _load_first_json( + output_dir, + ("render_processing_summary.json", "render_summary.json"), + ) + return { + "pipeline_summary_available": bool(summary), + "render_status": step_statuses.get("11", "unknown"), + "execution_status": step_statuses.get("12", "unknown"), + "render_summary_available": bool(render_summary), + "execution_summary_available": bool(execution_summary), + "skip_or_failure_reason": _extract_skip_or_failure_reason( + summary, render_summary, execution_summary + ), + } + + +def _candidate_json_files(output_dir: Path, model_stem: str) -> Iterable[Path]: + for path in sorted(output_dir.rglob("*.json")): + if model_stem.lower() in str(path).lower() or "summary" in path.name.lower(): + yield path + + +def _load_json(path: Path) -> Any: + try: + return json.loads(path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + return {} + + +def _load_pipeline_summary(output_dir: Path) -> Dict[str, Any]: + summary_path = ( + output_dir / "00_pipeline_summary" / "pipeline_execution_summary.json" + ) + payload = _load_json(summary_path) if summary_path.exists() else {} + return payload if isinstance(payload, dict) else {} + + +def _load_first_json(output_dir: Path, names: tuple[str, ...]) -> Dict[str, Any]: + if not output_dir.exists(): + return {} + for path in sorted(output_dir.rglob("*.json")): + if path.name in names: + payload = _load_json(path) + return payload if isinstance(payload, dict) else {} + return {} + + +def _extract_step_statuses(summary: Dict[str, Any]) -> Dict[str, str]: + statuses: Dict[str, str] = {} + for raw_step in summary.get("steps", []): + if not isinstance(raw_step, dict): + continue + script_name = str(raw_step.get("script_name", "")) + match = re.match(r"(?P\d+)_", script_name) + if not match: + continue + statuses[match.group("number")] = _normalize_status( + str(raw_step.get("status", "")) + ) + return statuses + + +def _normalize_status(status: str) -> str: + normalized = status.strip().upper().replace("-", "_") + if normalized in {"SUCCESS", "PASSED", "PASS", "OK"}: + return "passed" + if "SKIP" in normalized: + return "skipped" + if "SUCCESS" in normalized and "PARTIAL" not in normalized: + return "passed" + return "failed" if normalized else "unknown" + + +def _extract_skip_or_failure_reason( + pipeline_summary: Dict[str, Any], + render_summary: Dict[str, Any], + execution_summary: Dict[str, Any], +) -> str | None: + render_reasons = [ + str(item.get("message")) + for item in render_summary.get("failed_framework_renderings", []) + if isinstance(item, dict) and item.get("message") + ] + if render_reasons: + return "; ".join(render_reasons) + execution_reasons = [] + for key in ("skip_reason", "skipped_reason", "failure_reason", "error", "message"): + if execution_summary.get(key): + execution_reasons.append(str(execution_summary[key])) + for item in execution_summary.get("render_failures", []): + if isinstance(item, dict) and item.get("message"): + execution_reasons.append(str(item["message"])) + if execution_reasons: + return "; ".join(execution_reasons) + for raw_step in pipeline_summary.get("steps", []): + if not isinstance(raw_step, dict): + continue + status = _normalize_status(str(raw_step.get("status", ""))) + if status in {"failed", "skipped"}: + return str( + raw_step.get("error") + or raw_step.get("description") + or raw_step.get("script_name") + or status + ) + return None + + +def _walk_trace_values(payload: Any) -> Iterable[tuple[str, Any]]: + if isinstance(payload, dict): + for key, value in payload.items(): + if key in TRACE_KEYS and isinstance(value, list): + yield key, value + yield from _walk_trace_values(value) + elif isinstance(payload, list): + for item in payload: + yield from _walk_trace_values(item) diff --git a/src/audio/processor.py b/src/audio/processor.py index dbd28eca3..f1ecff932 100644 --- a/src/audio/processor.py +++ b/src/audio/processor.py @@ -41,6 +41,11 @@ generate_sonification_audio, generate_tonal_representation, ) +from .streaming import ( + chunks_from_frames, + frames_from_execution_trace, + write_stream_summary, +) def process_audio( @@ -74,6 +79,7 @@ def process_audio( "audio_files_generated": [], "sonification_results": [], "audio_analysis": [], + "audio_streaming": {}, } # Find GNN files @@ -111,6 +117,10 @@ def process_audio( results["errors"].append(error_info) logger.error(f"Error processing {gnn_file}: {e}") + stream_summary = _process_audio_streaming(kwargs, results_dir, logger) + if stream_summary: + results["audio_streaming"] = stream_summary + # Save detailed results results_file = results_dir / "audio_results.json" with open(results_file, "w") as f: @@ -134,6 +144,164 @@ def process_audio( return False +def _process_audio_streaming( + kwargs: Dict[str, Any], output_dir: Path, logger: logging.Logger +) -> Dict[str, Any]: + """Generate streaming-safe audio chunk metadata from optional telemetry.""" + telemetries: List[tuple[str, Dict[str, Any]]] = [] + telemetry = kwargs.get("telemetry") + telemetry_file = kwargs.get("telemetry_file") + if isinstance(telemetry, dict): + telemetries.append(("inline", telemetry)) + telemetry_files = list(kwargs.get("telemetry_files") or []) + if telemetry_file: + telemetry_files.append(telemetry_file) + for path_value in telemetry_files: + telemetry_path = Path(path_value) + loaded = _load_telemetry_json(telemetry_path, logger) + if loaded: + telemetries.append((str(telemetry_path), loaded)) + execution_output_dir = kwargs.get("execution_output_dir") or kwargs.get( + "execution_results_dir" + ) + if not execution_output_dir: + sibling_execution_dir = output_dir.parent / "12_execute_output" + if sibling_execution_dir.exists(): + execution_output_dir = sibling_execution_dir + if execution_output_dir: + telemetries.extend( + _load_execution_telemetry_dir(Path(execution_output_dir), logger) + ) + if not telemetries: + return {} + frames = [] + provenance = [] + for source, item in telemetries: + provenance.append(source) + frames.extend(frames_from_execution_trace(item)) + if not frames: + summary = write_stream_summary([], output_dir / "audio_stream_chunks.json") + summary["telemetry_source_count"] = len(telemetries) + summary["telemetry_provenance"] = provenance + (output_dir / "audio_stream_chunks.json").write_text( + json.dumps(summary, indent=2), encoding="utf-8" + ) + return summary + chunks = chunks_from_frames( + frames, chunk_size=int(kwargs.get("audio_chunk_size", 32)) + ) + summary = write_stream_summary(chunks, output_dir / "audio_stream_chunks.json") + summary["telemetry_source_count"] = len(telemetries) + summary["telemetry_provenance"] = provenance + (output_dir / "audio_stream_chunks.json").write_text( + json.dumps(summary, indent=2), encoding="utf-8" + ) + return summary + + +def _load_telemetry_json(path: Path, logger: logging.Logger) -> Dict[str, Any]: + """Load one telemetry JSON file if it contains an object.""" + try: + payload = json.loads(path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError) as exc: + logger.warning("Could not read audio telemetry file %s: %s", path, exc) + return {} + return payload if isinstance(payload, dict) else {} + + +def _load_execution_telemetry_dir( + execution_output_dir: Path, logger: logging.Logger +) -> List[tuple[str, Dict[str, Any]]]: + """Load Step 12 result JSON files that contain telemetry-like payloads.""" + if not execution_output_dir.exists(): + logger.warning("Execution output directory not found: %s", execution_output_dir) + return [] + telemetries: List[tuple[str, Dict[str, Any]]] = [] + for path in sorted(execution_output_dir.rglob("*.json")): + payload = _load_telemetry_json(path, logger) + if not payload: + continue + if path.name == "execution_summary.json": + telemetries.extend( + _load_execution_summary_telemetries( + payload, path.parent, execution_output_dir, logger + ) + ) + if any( + key in payload + for key in ( + "simulation_data", + "telemetry", + "free_energy", + "expected_free_energy", + "beliefs", + "actions", + ) + ): + telemetries.append((str(path), payload)) + return _dedupe_telemetries(telemetries) + + +def _load_execution_summary_telemetries( + summary: Dict[str, Any], + summary_dir: Path, + execution_output_dir: Path, + logger: logging.Logger, +) -> List[tuple[str, Dict[str, Any]]]: + """Follow Step 12 slim-summary pointers to structured result JSON payloads.""" + telemetries: List[tuple[str, Dict[str, Any]]] = [] + details = summary.get("execution_details") + if not isinstance(details, list): + return telemetries + for detail in details: + if not isinstance(detail, dict): + continue + simulation_data = detail.get("simulation_data") + if isinstance(simulation_data, dict): + telemetries.append( + ("execution_summary.inline", {"simulation_data": simulation_data}) + ) + continue + structured_result_file = detail.get("structured_result_file") + if not isinstance(structured_result_file, str) or not structured_result_file: + continue + structured_path = Path(structured_result_file) + if not structured_path.is_absolute(): + structured_path = (summary_dir / structured_path).resolve() + else: + structured_path = structured_path.resolve() + try: + structured_path.relative_to(execution_output_dir.resolve()) + except ValueError: + logger.warning( + "Ignoring Step 12 telemetry outside execution output: %s", + structured_path, + ) + continue + payload = _load_telemetry_json(structured_path, logger) + simulation_data = payload.get("simulation_data") if payload else None + if isinstance(simulation_data, dict): + telemetries.append( + (str(structured_path), {"simulation_data": simulation_data}) + ) + return telemetries + + +def _dedupe_telemetries( + telemetries: List[tuple[str, Dict[str, Any]]], +) -> List[tuple[str, Dict[str, Any]]]: + """Remove duplicate telemetry payloads found through summary and file scans.""" + unique: List[tuple[str, Dict[str, Any]]] = [] + seen: set[str] = set() + for source, telemetry in telemetries: + key = json.dumps(telemetry, sort_keys=True, default=str) + if key in seen: + continue + seen.add(key) + unique.append((source, telemetry)) + return unique + + def generate_audio_from_gnn( file_path_or_content: Any, output_dir: Path | None = None, verbose: bool = False ) -> Dict[str, Any]: diff --git a/src/audio/streaming.py b/src/audio/streaming.py new file mode 100644 index 000000000..ea9388b13 --- /dev/null +++ b/src/audio/streaming.py @@ -0,0 +1,196 @@ +"""Device-free audio telemetry contracts for Step 12 stream artifacts.""" + +from __future__ import annotations + +import json +from dataclasses import asdict, dataclass, field +from pathlib import Path +from typing import Any, Dict, Iterable, List + + +@dataclass(frozen=True) +class AudioTelemetryFrame: + """One normalized Step 12 telemetry sample available to audio generation.""" + + t: int + free_energy: float | None = None + belief: List[float] = field(default_factory=list) + action: int | None = None + + +@dataclass(frozen=True) +class AudioStreamChunk: + """A deterministic audio-control chunk derived from telemetry frames.""" + + index: int + frame_start: int + frame_count: int + amplitude: float + frequency_hz: float + metadata: Dict[str, Any] = field(default_factory=dict) + + +def frames_from_execution_trace(trace: Dict[str, Any]) -> List[AudioTelemetryFrame]: + """Normalize Step 12 simulation outputs into telemetry frames.""" + trace = _unwrap_trace_payload(trace) + free_energy = _first_sequence( + trace, + "free_energy", + "expected_free_energy", + "free_energy_trace", + "expected_free_energy_trace", + "free_energy_history", + ) + beliefs = _first_sequence(trace, "beliefs", "belief_trace", "belief_history") + actions = _first_sequence( + trace, "actions", "action_trace", "action_history", "selected_actions" + ) + n = max(len(free_energy), len(beliefs), len(actions)) + frames: List[AudioTelemetryFrame] = [] + for index in range(n): + belief = _coerce_belief(beliefs[index]) if index < len(beliefs) else [] + action = _coerce_action(actions[index]) if index < len(actions) else None + frames.append( + AudioTelemetryFrame( + t=index, + free_energy=_coerce_scalar(free_energy[index]) + if index < len(free_energy) + else None, + belief=[float(value) for value in belief], + action=int(action) if action is not None else None, + ) + ) + return frames + + +def _unwrap_trace_payload(trace: Dict[str, Any]) -> Dict[str, Any]: + """Return the telemetry-bearing object from common Step 12 result envelopes.""" + for key in ("simulation_data", "telemetry", "execution_trace", "trace"): + nested = trace.get(key) + if isinstance(nested, dict): + return nested + traces = trace.get("traces") + if isinstance(traces, list) and traces and isinstance(traces[0], dict): + return traces[0] + return trace + + +def _first_sequence(trace: Dict[str, Any], *keys: str) -> List[Any]: + """Return the first list value under ``keys``.""" + for key in keys: + value = trace.get(key) + if isinstance(value, list): + return value + return [] + + +def _coerce_belief(value: Any) -> List[float]: + """Coerce Step 12 belief shapes into a numeric vector.""" + if isinstance(value, dict): + value = value.get("belief") or value.get("state_beliefs") or value.get("values") + if isinstance(value, list): + return [float(item) for item in value if isinstance(item, (int, float))] + return [] + + +def _coerce_action(value: Any) -> int | None: + """Coerce Step 12 action shapes into an integer action id.""" + if isinstance(value, dict): + for key in ("action", "selected_action", "id"): + if key in value: + value = value[key] + break + else: + value = None + if isinstance(value, list): + value = next((item for item in value if isinstance(item, (int, float))), None) + if value is None: + return None + return int(value) + + +def _coerce_scalar(value: Any) -> float | None: + """Coerce scalar or vector telemetry values into one numeric sample.""" + if isinstance(value, dict): + for key in ("value", "free_energy", "expected_free_energy"): + if key in value: + value = value[key] + break + if isinstance(value, list): + numeric = [float(item) for item in value if isinstance(item, (int, float))] + return sum(numeric) / len(numeric) if numeric else None + if isinstance(value, (int, float)): + return float(value) + return None + + +def chunks_from_frames( + frames: Iterable[AudioTelemetryFrame], *, chunk_size: int = 32 +) -> List[AudioStreamChunk]: + """Create deterministic chunks without requiring live audio devices.""" + if chunk_size <= 0: + raise ValueError("chunk_size must be positive") + frame_list = list(frames) + chunks: List[AudioStreamChunk] = [] + for chunk_index, start in enumerate(range(0, len(frame_list), chunk_size)): + group = frame_list[start : start + chunk_size] + free_energy_values = [ + frame.free_energy for frame in group if frame.free_energy is not None + ] + mean_fe = ( + sum(free_energy_values) / len(free_energy_values) + if free_energy_values + else 0.0 + ) + last_action = next( + (frame.action for frame in reversed(group) if frame.action is not None), + None, + ) + belief_confidence = max( + (max(frame.belief) for frame in group if frame.belief), default=0.0 + ) + chunks.append( + AudioStreamChunk( + index=chunk_index, + frame_start=start, + frame_count=len(group), + amplitude=max(0.0, min(1.0, belief_confidence)), + frequency_hz=220.0 + + abs(mean_fe) * 30.0 + + float(last_action or 0) * 15.0, + metadata={"mean_free_energy": mean_fe, "last_action": last_action}, + ) + ) + return chunks + + +def write_stream_summary( + chunks: Iterable[AudioStreamChunk], output_path: Path +) -> Dict[str, Any]: + """Persist stream chunk metadata for downstream SAPF/Pedalboard consumers.""" + chunk_list = list(chunks) + if not chunk_list: + payload = { + "schema": "gnn_audio_stream_chunks_v1", + "status": "no_frames", + "streaming_safe": False, + "frame_count": 0, + "duration_frames": 0, + "chunk_count": 0, + "chunks": [], + } + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(json.dumps(payload, indent=2), encoding="utf-8") + return payload + payload = { + "schema": "gnn_audio_stream_chunks_v1", + "status": "ready", + "streaming_safe": True, + "frame_count": sum(chunk.frame_count for chunk in chunk_list), + "duration_frames": sum(chunk.frame_count for chunk in chunk_list), + "chunk_count": len(chunk_list), + "chunks": [asdict(chunk) for chunk in chunk_list], + } + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(json.dumps(payload, indent=2), encoding="utf-8") + return payload diff --git a/src/cli/README.md b/src/cli/README.md index d6374ad02..508ebb73e 100644 --- a/src/cli/README.md +++ b/src/cli/README.md @@ -2,7 +2,7 @@ ## Overview -Unified command-line interface for the GNN pipeline. Provides 12 subcommands for running, validating, rendering, and managing GNN models. +Unified command-line interface for the GNN pipeline. Provides subcommands for running, validating, rendering, templating, and managing GNN models. **Entry point**: `gnn = "src.cli:main"` (defined in `pyproject.toml`) @@ -19,6 +19,9 @@ Unified command-line interface for the GNN pipeline. Provides 12 subcommands for | `gnn preflight` | Run environment & config checks | | `gnn health` | Show renderer & dependency status (8/8 renderers) | | `gnn serve` | Start Pipeline-as-a-Service API (FastAPI) | +| `gnn templates list` | List maintained local GNN templates with checksums | +| `gnn templates show ` | Show one maintained template record | +| `gnn pull ` | Copy a maintained template into an input directory | | `gnn lsp` | Launch GNN Language Server (stdio) | | `gnn watch ` | Monitor directory and live-reparse on file change | | `gnn graph ` | Generate dependency graph from multi-model files | @@ -38,6 +41,11 @@ gnn parse input/gnn_files/discrete/actinf_pomdp_agent.md # Check environment gnn preflight gnn health + +# Inspect and dry-run template installation +gnn templates list +gnn templates show pomdp-gridworld-3x3 +gnn pull pomdp-gridworld-3x3 --output-dir /tmp/gnn-pull --dry-run ``` ## Architecture @@ -53,6 +61,7 @@ The CLI module is a thin dispatcher — each subcommand delegates to the corresp - `preflight` → `pipeline.preflight.run_preflight()` - `health` → `render.health.check_renderers()` + `pipeline.preflight.check_environment()` - `serve` → `api.app.start_server()` +- `templates` / `pull` → `cli.templates` maintained template index, checksum, and copy helpers - `lsp` → `lsp.start_server()` - `watch` → `gnn.watcher.GNNWatcher()` - `graph` → `gnn.dep_graph.render_graph_from_file()` diff --git a/src/cli/__init__.py b/src/cli/__init__.py index dcf8fb5e7..fbf7b844c 100644 --- a/src/cli/__init__.py +++ b/src/cli/__init__.py @@ -12,6 +12,8 @@ gnn preflight — Run environment & config checks gnn health — Show renderer & dependency status gnn serve — Start Pipeline-as-a-Service API + gnn templates — Inspect maintained GNN templates + gnn pull — Copy a maintained template into an input directory gnn lsp — Launch Language Server """ @@ -145,6 +147,32 @@ def main(argv: Optional[List[str]] = None) -> int: serve_p.add_argument("--host", default="127.0.0.1", help="Bind host") serve_p.add_argument("--port", type=int, default=8000, help="Bind port") + # ── gnn templates ─────────────────────────────────────────────────────── + templates_p = subparsers.add_parser("templates", help="Inspect template library") + templates_sub = templates_p.add_subparsers( + dest="templates_command", help="Template commands" + ) + templates_sub.add_parser("list", help="List available templates") + templates_show_p = templates_sub.add_parser("show", help="Show one template") + templates_show_p.add_argument("name", help="Template name") + + # ── gnn pull ──────────────────────────────────────────────────────────── + pull_p = subparsers.add_parser("pull", help="Copy a maintained GNN template") + pull_p.add_argument("name", help="Template name") + pull_p.add_argument( + "--output-dir", + "-o", + type=Path, + default=Path("input/gnn_files"), + help="Directory to receive the template", + ) + pull_p.add_argument("--dry-run", action="store_true", help="Report without copying") + pull_p.add_argument( + "--overwrite", + action="store_true", + help="Replace destination on checksum mismatch", + ) + # ── gnn watch ──────────────────────────────────────────────────────────── watch_p = subparsers.add_parser( "watch", help="Monitor directory and live-reparse on change" @@ -191,6 +219,8 @@ def main(argv: Optional[List[str]] = None) -> int: "preflight": _cmd_preflight, "health": _cmd_health, "serve": _cmd_serve, + "templates": _cmd_templates, + "pull": _cmd_pull, "lsp": _cmd_lsp, "watch": _cmd_watch, "graph": _cmd_graph, @@ -568,6 +598,41 @@ def _cmd_serve(args: Any) -> Any: return 0 +def _cmd_templates(args: Any) -> Any: + """Inspect the maintained template library.""" + from .templates import list_templates, show_template + + if getattr(args, "templates_command", None) in {None, "list"}: + print(json.dumps({"templates": list_templates()}, indent=2)) + return 0 + if args.templates_command == "show": + try: + print(json.dumps({"template": show_template(args.name)}, indent=2)) + except KeyError as exc: + logger.error(str(exc)) + return 1 + return 0 + return 1 + + +def _cmd_pull(args: Any) -> Any: + """Copy a maintained template into an input directory.""" + from .templates import pull_template + + try: + result = pull_template( + args.name, + Path(args.output_dir), + dry_run=bool(args.dry_run), + overwrite=bool(args.overwrite), + ) + except (KeyError, FileExistsError, FileNotFoundError, OSError) as exc: + logger.error(str(exc)) + return 1 + print(json.dumps(result, indent=2)) + return 0 + + def _cmd_lsp(args: Any) -> Any: """Launch GNN Language Server.""" try: diff --git a/src/cli/mcp.py b/src/cli/mcp.py index 907d0f8b7..9f38f0ea4 100644 --- a/src/cli/mcp.py +++ b/src/cli/mcp.py @@ -22,6 +22,10 @@ def cli_health_check(params: (Dict[str, Any]) | None = None) -> Dict[str, Any]: "preflight", "health", "serve", + "templates", + "pull", + "watch", + "graph", "lsp", ] return { diff --git a/src/cli/template_assets/actinf_pomdp_2state.md b/src/cli/template_assets/actinf_pomdp_2state.md new file mode 100644 index 000000000..c1115a5ae --- /dev/null +++ b/src/cli/template_assets/actinf_pomdp_2state.md @@ -0,0 +1,130 @@ +# GNN Example: Active Inference POMDP Agent +# GNN Version: 1.0 +# This file is machine-readable and specifies a classic Active Inference agent for a discrete POMDP with one observation modality and one hidden state factor. The model is suitable for rendering into various simulation or inference backends. + +## GNNSection +ActInfPOMDP + +## GNNVersionAndFlags +GNN v1 + +## ModelName +Active Inference POMDP Agent + +## ModelAnnotation +This model describes a classic Active Inference agent for a discrete POMDP: +- One observation modality ("state_observation") with 3 possible outcomes. +- One hidden state factor ("location") with 3 possible states. +- The hidden state is fully controllable via 3 discrete actions. +- The agent's preferences are encoded as log-probabilities over observations. +- The agent has an initial policy prior (habit) encoded as log-probabilities over actions. + +## StateSpaceBlock +# Likelihood matrix: A[observation_outcomes, hidden_states] +A[3,3,type=float] # Likelihood mapping hidden states to observations + +# Transition matrix: B[states_next, states_previous, actions] +B[3,3,3,type=float] # State transitions given previous state and action + +# Preference vector: C[observation_outcomes] +C[3,type=float] # Log-preferences over observations + +# Prior vector: D[states] +D[3,type=float] # Prior over initial hidden states + +# Habit vector: E[actions] +E[3,type=float] # Initial policy prior (habit) over actions + +# Hidden State +s[3,1,type=float] # Current hidden state distribution +s_prime[3,1,type=float] # Next hidden state distribution +F[π,type=float] # Variational Free Energy for belief updating from observations + +# Observation +o[3,1,type=int] # Current observation (integer index) + +# Policy and Control +π[3,type=float] # Policy (distribution over actions), no planning +u[1,type=int] # Action taken +G[π,type=float] # Expected Free Energy (per policy) + +# Time +t[1,type=int] # Discrete time step + +## Connections +D>s +s-A +s>s_prime +A-o +s-B +C>G +E>π +G>π +π>u +B>u +u>s_prime + +## InitialParameterization +# A: 3 observations x 3 hidden states. Identity mapping (each state deterministically produces a unique observation). Rows are observations, columns are hidden states. +A={ + (0.9, 0.05, 0.05), + (0.05, 0.9, 0.05), + (0.05, 0.05, 0.9) +} + +# B: 3 states x 3 previous states x 3 actions. Each action deterministically moves to a state. For each slice, rows are previous states, columns are next states. Each slice is a transition matrix corresponding to a different action selection. +B={ + ( (1.0,0.0,0.0), (0.0,1.0,0.0), (0.0,0.0,1.0) ), + ( (0.0,1.0,0.0), (1.0,0.0,0.0), (0.0,0.0,1.0) ), + ( (0.0,0.0,1.0), (0.0,1.0,0.0), (1.0,0.0,0.0) ) +} + +# C: 3 observations. Preference in terms of log-probabilities over observations. +C={(0.1, 0.1, 1.0)} + +# D: 3 states. Uniform prior over hidden states. Rows are hidden states, columns are prior probabilities. +D={(0.33333, 0.33333, 0.33333)} + +# E: 3 actions. Uniform habit used as initial policy prior. +E={(0.33333, 0.33333, 0.33333)} + +## Equations +# Standard Active Inference update equations for POMDPs: +# - State inference using Variational Free Energy with infer_states() +# - Policy inference using Expected Free Energy = with infer_policies() +# - Action selection from policy posterior: action = sample_action() +# - Belief updating using Variational Free Energy with update_beliefs() + +## Time +Time=t +Dynamic +Discrete +ModelTimeHorizon=Unbounded # The agent is defined for an unbounded time horizon; simulation runs may specify a finite horizon. + +## ActInfOntologyAnnotation +A=LikelihoodMatrix +B=TransitionMatrix +C=LogPreferenceVector +D=PriorOverHiddenStates +E=Habit +F=VariationalFreeEnergy +G=ExpectedFreeEnergy +s=HiddenState +s_prime=NextHiddenState +o=Observation +π=PolicyVector # Distribution over actions +u=Action # Chosen action +t=Time + +## ModelParameters +num_hidden_states: 3 # s[3] +num_obs: 3 # o[3] +num_actions: 3 # B actions_dim=3 (controlled by π) +num_timesteps: 30 # Number of simulation timesteps for all frameworks + +## Footer +Active Inference POMDP Agent v1 - GNN Representation. +Currently there is a planning horizon of 1 step (no deep planning), no precision modulation, no hierarchical nesting. + +## Signature +Cryptographic signature goes here diff --git a/src/cli/template_assets/multi_agent_coordination.md b/src/cli/template_assets/multi_agent_coordination.md new file mode 100644 index 000000000..05ced9738 --- /dev/null +++ b/src/cli/template_assets/multi_agent_coordination.md @@ -0,0 +1,184 @@ +# GNN Example: Multi-Agent Cooperative Active Inference + +# GNN Version: 1.0 + +# Two agents cooperating via shared observation space + +## GNNSection + +ActInfPOMDP_MultiAgent + +## GNNVersionAndFlags + +GNN v1 + +## ModelName + +Multi-Agent Cooperative Active Inference + +## ModelAnnotation + +Two Active Inference agents cooperating on a joint task: + +- Agent 1 and Agent 2 each maintain independent beliefs +- Shared observation space: agents observe each other's actions +- Joint task state includes both agents' positions (4x4 = 16 joint states) +- Cooperative preferences: both agents prefer the same goal configuration +- Models social cognition and coordination without explicit communication + +## StateSpaceBlock + +# Agent 1 + +A1[4,4,type=float] # Agent 1 likelihood +B1[4,4,3,type=float] # Agent 1 transitions (3 actions) +C1[4,type=float] # Agent 1 preferences +D1[4,type=float] # Agent 1 prior +s1[4,1,type=float] # Agent 1 hidden state +s1_prime[4,1,type=float] # Agent 1 next hidden state +o1[4,1,type=int] # Agent 1 observations (includes Agent 2 obs) +π1[3,type=float] # Agent 1 policy +u1[1,type=int] # Agent 1 action +G1[π1,type=float] # Agent 1 EFE + +# Agent 2 + +A2[4,4,type=float] # Agent 2 likelihood +B2[4,4,3,type=float] # Agent 2 transitions (3 actions) +C2[4,type=float] # Agent 2 preferences +D2[4,type=float] # Agent 2 prior +s2[4,1,type=float] # Agent 2 hidden state +s2_prime[4,1,type=float] # Agent 2 next hidden state +o2[4,1,type=int] # Agent 2 observations (includes Agent 1 obs) +π2[3,type=float] # Agent 2 policy +u2[1,type=int] # Agent 2 action +G2[π2,type=float] # Agent 2 EFE + +# Shared environment state + +s_joint[16,1,type=float] # Joint state (Agent1_pos x Agent2_pos) +o_joint[4,1,type=int] # Joint observation (goal achievement) + +# Time + +t[1,type=int] + +## Connections + +D1>s1 +s1-A1 +A1-o1 +s1>s1_prime +C1>G1 +G1>π1 +π1>u1 +B1>u1 +D2>s2 +s2-A2 +A2-o2 +s2>s2_prime +C2>G2 +G2>π2 +π2>u2 +B2>u2 +u1>s_joint +u2>s_joint +s_joint-o_joint +o1-s_joint +o2-s_joint + +## InitialParameterization + +A1={ + (0.85, 0.05, 0.05, 0.05), + (0.05, 0.85, 0.05, 0.05), + (0.05, 0.05, 0.85, 0.05), + (0.05, 0.05, 0.05, 0.85) +} + +A2={ + (0.85, 0.05, 0.05, 0.05), + (0.05, 0.85, 0.05, 0.05), + (0.05, 0.05, 0.85, 0.05), + (0.05, 0.05, 0.05, 0.85) +} + +# Shared cooperative preference: goal = state 4 (index 3) + +C1={(-1.0, -1.0, -1.0, 2.0)} +C2={(-1.0, -1.0, -1.0, 2.0)} +D1={(0.25, 0.25, 0.25, 0.25)} +D2={(0.25, 0.25, 0.25, 0.25)} + +B1={ + ( (0.9,0.1,0.0,0.0), (0.0,0.9,0.1,0.0), (0.0,0.0,0.9,0.1), (0.1,0.0,0.0,0.9) ), + ( (0.9,0.0,0.0,0.1), (0.1,0.9,0.0,0.0), (0.0,0.1,0.9,0.0), (0.0,0.0,0.1,0.9) ), + ( (0.8,0.1,0.1,0.0), (0.1,0.8,0.0,0.1), (0.1,0.0,0.8,0.1), (0.0,0.1,0.1,0.8) ) +} + +B2={ + ( (0.9,0.1,0.0,0.0), (0.0,0.9,0.1,0.0), (0.0,0.0,0.9,0.1), (0.1,0.0,0.0,0.9) ), + ( (0.9,0.0,0.0,0.1), (0.1,0.9,0.0,0.0), (0.0,0.1,0.9,0.0), (0.0,0.0,0.1,0.9) ), + ( (0.8,0.1,0.1,0.0), (0.1,0.8,0.0,0.1), (0.1,0.0,0.8,0.1), (0.0,0.1,0.1,0.8) ) +} + +## Equations + +# Each agent independently minimizes their own VFE + +# Coordination emerges from shared observation space and aligned preferences + +# Agent 1 observes both own state and Agent 2's last action + +# No explicit communication channel — implicit coordination only + +## Time + +Time=t +Dynamic +Discrete +ModelTimeHorizon=20 + +## ActInfOntologyAnnotation + +A1=LikelihoodMatrix +B1=TransitionMatrix +C1=LogPreferenceVector +D1=PriorOverHiddenStates +s1=Agent1HiddenState +s1_prime=Agent1NextHiddenState +o1=Agent1Observation +π1=Agent1PolicyVector +u1=Agent1Action +G1=Agent1ExpectedFreeEnergy +A2=LikelihoodMatrix +B2=TransitionMatrix +C2=LogPreferenceVector +D2=PriorOverHiddenStates +s2=Agent2HiddenState +s2_prime=Agent2NextHiddenState +o2=Agent2Observation +π2=Agent2PolicyVector +u2=Agent2Action +G2=Agent2ExpectedFreeEnergy +s_joint=JointState +o_joint=JointObservation +t=Time + +## ModelParameters + +num_agents: 2 +num_hidden_states_per_agent: 4 +num_obs_per_agent: 4 +num_actions_per_agent: 3 +num_timesteps: 20 + +## Footer + +Multi-Agent Cooperative Active Inference v1 - GNN Representation. +Implicit coordination via shared observation space. +No explicit communication — emergent cooperation from aligned preferences. + +## Signature + +Cryptographic signature goes here diff --git a/src/cli/template_assets/pomdp_gridworld_3x3.md b/src/cli/template_assets/pomdp_gridworld_3x3.md new file mode 100644 index 000000000..edb2e2cf4 --- /dev/null +++ b/src/cli/template_assets/pomdp_gridworld_3x3.md @@ -0,0 +1,201 @@ +# GNN Example: POMDP GridWorld 3x3 +# GNN Version: 1.0 + +## GNNSection +ActInfPOMDP + +## GNNVersionAndFlags +GNN v1 + +## ModelName +POMDP GridWorld 3x3 + +## ModelAnnotation +Discrete 3x3 GridWorld POMDP for strict cross-framework validation. The model has one hidden state factor with 9 grid cells, one observation modality with noisy cell observations, and one control factor with 5 boundary-clamped actions: up, down, left, right, and stay. + +## StateSpaceBlock +A[9,9,type=float] # Likelihood matrix: observations by hidden states +B[9,9,5,type=float] # Transition tensor: next_state, previous_state, action +C[9,type=float] # Log-preferences over observations +D[9,type=float] # Prior over initial hidden state +E[5,type=float] # Policy prior over actions + +s[9,1,type=float] # Current hidden state distribution +s_prime[9,1,type=float] # Next hidden state distribution +o[9,1,type=int] # Current observation +π[5,type=float] # Policy distribution over actions +u[1,type=int] # Action index +G[π,type=float] # Expected Free Energy per action +t[1,type=int] # Discrete time step + +## Connections +D>s +s-A +A-o +s-B +B>u +u>s_prime +C>G +E>π +G>π +π>u +s>s_prime + +## InitialParameterization +A={ + (0.85000, 0.01875, 0.01875, 0.01875, 0.01875, 0.01875, 0.01875, 0.01875, 0.01875), + (0.01875, 0.85000, 0.01875, 0.01875, 0.01875, 0.01875, 0.01875, 0.01875, 0.01875), + (0.01875, 0.01875, 0.85000, 0.01875, 0.01875, 0.01875, 0.01875, 0.01875, 0.01875), + (0.01875, 0.01875, 0.01875, 0.85000, 0.01875, 0.01875, 0.01875, 0.01875, 0.01875), + (0.01875, 0.01875, 0.01875, 0.01875, 0.85000, 0.01875, 0.01875, 0.01875, 0.01875), + (0.01875, 0.01875, 0.01875, 0.01875, 0.01875, 0.85000, 0.01875, 0.01875, 0.01875), + (0.01875, 0.01875, 0.01875, 0.01875, 0.01875, 0.01875, 0.85000, 0.01875, 0.01875), + (0.01875, 0.01875, 0.01875, 0.01875, 0.01875, 0.01875, 0.01875, 0.85000, 0.01875), + (0.01875, 0.01875, 0.01875, 0.01875, 0.01875, 0.01875, 0.01875, 0.01875, 0.85000) +} + +B={ + ( + (1.0, 0.0, 1.0, 0.0, 1.0), + (0.0, 0.0, 1.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0), + (1.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0) + ), + ( + (0.0, 0.0, 0.0, 1.0, 0.0), + (1.0, 0.0, 0.0, 0.0, 1.0), + (0.0, 0.0, 1.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0), + (1.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0) + ), + ( + (0.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 1.0, 0.0), + (1.0, 0.0, 0.0, 1.0, 1.0), + (0.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0), + (1.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0) + ), + ( + (0.0, 1.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 1.0, 0.0, 1.0), + (0.0, 0.0, 1.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0), + (1.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0) + ), + ( + (0.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 1.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 1.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 1.0), + (0.0, 0.0, 1.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0), + (1.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0) + ), + ( + (0.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 1.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 1.0, 0.0), + (0.0, 0.0, 0.0, 1.0, 1.0), + (0.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0), + (1.0, 0.0, 0.0, 0.0, 0.0) + ), + ( + (0.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 1.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 1.0, 1.0, 0.0, 1.0), + (0.0, 0.0, 1.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0) + ), + ( + (0.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 1.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 1.0, 0.0), + (0.0, 1.0, 0.0, 0.0, 1.0), + (0.0, 0.0, 1.0, 0.0, 0.0) + ), + ( + (0.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 1.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 0.0, 0.0), + (0.0, 0.0, 0.0, 1.0, 0.0), + (0.0, 1.0, 0.0, 1.0, 1.0) + ) +} + +C={(0.0, 0.1, 0.3, 0.1, 0.4, 0.8, 0.3, 0.8, 3.0)} + +D={(1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)} + +E={(0.2, 0.2, 0.2, 0.2, 0.2)} + +## Equations +State inference uses the observation likelihood and previous predictive belief. Action selection minimizes expected free energy under the shared transition tensor. + +## Time +Time=t +Dynamic +Discrete +ModelTimeHorizon=15 + +## ActInfOntologyAnnotation +A=LikelihoodMatrix +B=TransitionMatrix +C=LogPreferenceVector +D=PriorOverHiddenStates +E=Habit +G=ExpectedFreeEnergy +s=HiddenState +s_prime=NextHiddenState +o=Observation +π=PolicyVector +u=Action +t=Time + +## ModelParameters +num_hidden_states: 9 +num_obs: 9 +num_actions: 5 +num_timesteps: 15 +random_seed: 42 +b_tensor_order: next_state_previous_state_action +grid_rows: 3 +grid_cols: 3 +goal_state: 8 +action_labels: up,down,left,right,stay + +## Footer +POMDP GridWorld 3x3 v1 - canonical cross-framework GNN fixture. diff --git a/src/cli/template_index.json b/src/cli/template_index.json new file mode 100644 index 000000000..61e9d7b52 --- /dev/null +++ b/src/cli/template_index.json @@ -0,0 +1,20 @@ +[ + { + "name": "actinf-pomdp-2state", + "description": "Two-state Active Inference POMDP starter model.", + "source": "template_assets/actinf_pomdp_2state.md", + "filename": "actinf_pomdp_2state.md" + }, + { + "name": "multi-agent-coordination", + "description": "Compact multi-agent coordination model for RxInfer experiments.", + "source": "template_assets/multi_agent_coordination.md", + "filename": "multi_agent_coordination.md" + }, + { + "name": "pomdp-gridworld-3x3", + "description": "3x3 gridworld POMDP fixture for cross-framework acceptance checks.", + "source": "template_assets/pomdp_gridworld_3x3.md", + "filename": "pomdp_gridworld_3x3.md" + } +] diff --git a/src/cli/templates.py b/src/cli/templates.py new file mode 100644 index 000000000..0a3f6cfc8 --- /dev/null +++ b/src/cli/templates.py @@ -0,0 +1,218 @@ +"""Maintained local template library for the ``gnn`` CLI.""" + +from __future__ import annotations + +import hashlib +import json +import shutil +from dataclasses import dataclass +from importlib import resources +from importlib.abc import Traversable +from pathlib import Path, PurePosixPath +from typing import Any, Dict, List + +REPO_ROOT = Path(__file__).resolve().parents[2] +PACKAGE_NAME = __package__ or "cli" +TEMPLATE_INDEX_RESOURCE = "template_index.json" + + +@dataclass(frozen=True) +class TemplateRecord: + """One maintained template entry exposed through ``gnn pull``.""" + + name: str + description: str + source: str + filename: str + + @property + def source_resource(self) -> Traversable: + """Return the packaged template source resource.""" + resource = resources.files(PACKAGE_NAME) + for part in self.source.split("/"): + resource = resource.joinpath(part) + return resource + + @property + def checksum(self) -> str: + """Return the SHA256 checksum for the template source.""" + return _sha256_resource(self.source_resource) + + def as_dict(self) -> Dict[str, str]: + """Return a JSON-serializable description.""" + return { + "name": self.name, + "description": self.description, + "source": f"package://{PACKAGE_NAME}/{self.source}", + "filename": self.filename, + "sha256": self.checksum, + } + + +def _load_template_index() -> Dict[str, TemplateRecord]: + """Load maintained template records from the JSON index.""" + index_text = ( + resources.files(PACKAGE_NAME) + .joinpath(TEMPLATE_INDEX_RESOURCE) + .read_text(encoding="utf-8") + ) + raw_records = json.loads(index_text) + if not isinstance(raw_records, list): + raise ValueError("Template index must be a list of records") + records: Dict[str, TemplateRecord] = {} + for raw in raw_records: + record = _template_record_from_raw(raw) + if record.name in records: + raise ValueError(f"Duplicate template name: {record.name}") + records[record.name] = record + return records + + +def _template_record_from_raw(raw: Any) -> TemplateRecord: + """Validate and construct one maintained template index record.""" + if not isinstance(raw, dict): + raise ValueError("Template index entries must be objects") + record = TemplateRecord( + name=str(raw["name"]), + description=str(raw["description"]), + source=str(raw["source"]), + filename=str(raw["filename"]), + ) + _validate_template_record(record) + return record + + +def _validate_template_record(record: TemplateRecord) -> None: + """Reject template records that could escape package/output boundaries.""" + filename_path = Path(record.filename) + if ( + filename_path.is_absolute() + or filename_path.name != record.filename + or ".." in filename_path.parts + ): + raise ValueError(f"Template filename must be a basename: {record.filename}") + + if "\\" in record.source: + raise ValueError( + f"Template source must use package-relative POSIX paths: {record.source}" + ) + source_path = PurePosixPath(record.source) + if ( + source_path.is_absolute() + or ".." in source_path.parts + or len(source_path.parts) < 2 + or source_path.parts[0] != "template_assets" + or source_path.suffix != ".md" + ): + raise ValueError( + "Template source must stay under package template_assets/*.md: " + f"{record.source}" + ) + + +TEMPLATE_INDEX: Dict[str, TemplateRecord] = _load_template_index() + + +def _sha256(path: Path) -> str: + digest = hashlib.sha256() + with path.open("rb") as handle: + for chunk in iter(lambda: handle.read(1024 * 1024), b""): + digest.update(chunk) + return digest.hexdigest() + + +def _sha256_resource(resource: Traversable) -> str: + digest = hashlib.sha256() + with resource.open("rb") as handle: + for chunk in iter(lambda: handle.read(1024 * 1024), b""): + digest.update(chunk) + return digest.hexdigest() + + +def list_templates() -> List[Dict[str, str]]: + """List available templates with checksums.""" + return [ + record.as_dict() + for record in sorted(TEMPLATE_INDEX.values(), key=lambda r: r.name) + ] + + +def show_template(name: str) -> Dict[str, str]: + """Return one template record with checksum metadata.""" + if name not in TEMPLATE_INDEX: + available = ", ".join(sorted(TEMPLATE_INDEX)) + raise KeyError(f"Unknown template '{name}'. Available templates: {available}") + return TEMPLATE_INDEX[name].as_dict() + + +def pull_template( + name: str, + output_dir: Path, + *, + dry_run: bool = False, + overwrite: bool = False, +) -> Dict[str, Any]: + """Copy a template into ``output_dir`` with collision and checksum handling.""" + if name not in TEMPLATE_INDEX: + available = ", ".join(sorted(TEMPLATE_INDEX)) + raise KeyError(f"Unknown template '{name}'. Available templates: {available}") + + record = TEMPLATE_INDEX[name] + source_resource = record.source_resource + if not source_resource.is_file(): + raise FileNotFoundError( + f"Template source does not exist: {record.as_dict()['source']}" + ) + + destination = output_dir / record.filename + source_checksum = record.checksum + result: Dict[str, Any] = { + "template": name, + "source": record.as_dict()["source"], + "destination": str(destination), + "sha256": source_checksum, + "dry_run": dry_run, + "overwritten": False, + "copied": False, + } + + if destination.is_symlink(): + raise FileExistsError(f"Refusing to write through symlink: {destination}") + + if destination.exists(): + existing_checksum = _sha256(destination) + result["existing_sha256"] = existing_checksum + if existing_checksum == source_checksum: + result["message"] = "Template already present with matching checksum" + return result + if not overwrite: + raise FileExistsError( + f"Destination exists with different checksum: {destination}. " + "Pass --overwrite to replace it." + ) + result["overwritten"] = True + + if dry_run: + result["message"] = "Dry run: no files copied" + return result + + output_dir.mkdir(parents=True, exist_ok=True) + if destination.is_symlink(): + raise FileExistsError(f"Refusing to write through symlink: {destination}") + temp_destination = output_dir / f".{record.filename}.tmp" + if temp_destination.is_symlink(): + raise FileExistsError(f"Refusing to use symlink temp path: {temp_destination}") + if temp_destination.exists(): + temp_destination.unlink() + with resources.as_file(source_resource) as source_path: + shutil.copy2(source_path, temp_destination) + copied_checksum = _sha256(temp_destination) + if copied_checksum != source_checksum: + temp_destination.unlink(missing_ok=True) + raise OSError( + f"Checksum mismatch after copy: expected {source_checksum}, got {copied_checksum}" + ) + temp_destination.replace(destination) + result["copied"] = True + result["message"] = "Template copied" + return result diff --git a/src/execute/processor.py b/src/execute/processor.py index e2626e3d2..f978abe97 100644 --- a/src/execute/processor.py +++ b/src/execute/processor.py @@ -6,11 +6,13 @@ """ import copy +import hashlib import json import logging import os import subprocess # nosec B404 import sys +import tomllib from concurrent.futures import ProcessPoolExecutor from dataclasses import dataclass from datetime import datetime @@ -147,6 +149,103 @@ def determine_script_framework( ) +def _load_rxinfer_execution_metadata_sidecar(script_path: Path) -> Dict[str, Any]: + """Load declared RxInfer execution metadata from JSON sidecar artifacts.""" + candidates = [ + script_path.with_suffix(".metadata.json"), + script_path.with_name(f"{script_path.stem}_metadata.json"), + ] + seen: set[Path] = set() + for metadata_path in candidates: + if metadata_path in seen or not metadata_path.exists(): + continue + seen.add(metadata_path) + try: + data = json.loads(metadata_path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + continue + if not isinstance(data, dict): + continue + if data.get("schema") != "gnn_rxinfer_execution_metadata_v1": + continue + if data.get("script_sha256") != _sha256_file(script_path): + continue + if "agent_count" not in data and "topology" not in data: + continue + topology = data.get("topology") + if not isinstance(topology, dict): + topology = {} + topology.setdefault("source", str(metadata_path)) + data["topology"] = topology + data["agent_count"] = int(data.get("agent_count") or 0) + data["metadata_provenance"] = data.get( + "metadata_provenance", "rendered_rxinfer_sidecar" + ) + data["metadata_verification"] = "script_sha256_match" + return data + return {} + + +def _load_rxinfer_execution_metadata_from_script(script_path: Path) -> Dict[str, Any]: + """Load agent population metadata from rendered RxInfer metadata artifacts.""" + if not script_path.exists(): + return {} + sidecar_metadata = _load_rxinfer_execution_metadata_sidecar(script_path) + if sidecar_metadata: + return sidecar_metadata + toml_candidates = [script_path.with_suffix(".toml")] + seen_toml: set[Path] = set() + for toml_path in toml_candidates: + if toml_path in seen_toml or not toml_path.exists(): + continue + seen_toml.add(toml_path) + try: + data = tomllib.loads(toml_path.read_text(encoding="utf-8")) + except (OSError, tomllib.TOMLDecodeError): + continue + agents = data.get("agents", []) + model = data.get("model", {}) + agent_count = ( + len(agents) if isinstance(agents, list) else model.get("nr_agents") + ) + agent_ids = [ + agent.get("id") + for agent in agents + if isinstance(agent, dict) and "id" in agent + ] + topology_data = data.get("topology", {}) + topology: Dict[str, Any] = { + "type": "agent_population", + "agent_ids": agent_ids, + "source": str(toml_path), + } + if isinstance(topology_data, dict): + topology["type"] = str(topology_data.get("type") or topology["type"]) + topology["agent_ids"] = topology_data.get("agent_ids") or agent_ids + if "edges" in topology_data: + topology["edges"] = topology_data["edges"] + if "clusters" in topology_data: + topology["clusters"] = topology_data["clusters"] + if "message_passing" in topology_data: + topology["message_passing"] = topology_data["message_passing"] + return { + "agent_count": int(agent_count or 0), + "topology": topology, + "metadata_provenance": "rxinfer_toml_sidecar", + "metadata_verification": "exact_stem_toml", + } + return {} + + +def _sha256_file(path: Path) -> str: + """Return the SHA256 digest for an executable script.""" + digest = hashlib.sha256() + with path.open("rb") as handle: + for chunk in iter(lambda: handle.read(1024 * 1024), b""): + digest.update(chunk) + return digest.hexdigest() + + def _is_python_framework_dependency_available( framework: str, executor: str, logger: Any ) -> bool: @@ -192,6 +291,11 @@ def _make_skipped_result( "timestamp": datetime.now().isoformat(), "error": reason, "error_type": "DependencyNotInstalled", + "execution_metadata": _load_rxinfer_execution_metadata_from_script( + Path(script_info["path"]) + ) + if framework == "rxinfer" + else {}, } @@ -502,6 +606,7 @@ def _slim_execution_detail(detail: Dict[str, Any]) -> Dict[str, Any]: "structured_result_file", "output_file", "implementation_directory", + "execution_metadata", ) slim: Dict[str, Any] = {} for k in keys_keep: @@ -1088,6 +1193,10 @@ def execute_single_script( # Prepare execution result exec_result = _new_execution_result(context) + if framework == "rxinfer": + exec_result["execution_metadata"] = ( + _load_rxinfer_execution_metadata_from_script(script_path) + ) try: if verbose: @@ -1322,6 +1431,7 @@ def __init__(self, returncode: int, stdout: str, stderr: str) -> None: "stdout_length": len(result.stdout), "stderr_length": len(result.stderr), "output_directory": str(impl_specific_dir.parent), + **exec_result.get("execution_metadata", {}), }, } for bench_key in ( diff --git a/src/gnn/pomdp_extractor.py b/src/gnn/pomdp_extractor.py index 90e28406c..5cc93f372 100644 --- a/src/gnn/pomdp_extractor.py +++ b/src/gnn/pomdp_extractor.py @@ -55,6 +55,7 @@ class POMDPStateSpace: matrix_provenance: Optional[Dict[str, Dict[str, Any]]] = None passive_model: bool = False adapter_notes: Optional[List[str]] = None + initial_parameterization: Optional[Dict[str, Any]] = None def to_dict(self) -> Dict[str, Any]: """Convert to dictionary representation.""" @@ -83,6 +84,7 @@ def to_dict(self) -> Dict[str, Any]: "matrix_provenance": self.matrix_provenance, "passive_model": self.passive_model, "adapter_notes": self.adapter_notes, + "initial_parameterization": self.initial_parameterization, } @@ -227,6 +229,7 @@ def extract_from_gnn_content(self, content: str) -> Optional[POMDPStateSpace]: matrix_provenance=matrix_provenance, passive_model=passive_model, adapter_notes=adapter_notes, + initial_parameterization=initial_params, ) # Validate if strict validation enabled @@ -614,12 +617,31 @@ def _parse_initial_parameterization(self, content: str) -> Dict[str, Any]: if line.startswith("#") or not line: continue - # Check if this line starts a parameter definition + # Check if this line starts a matrix/vector block parameter definition. if "={" in line and not in_param_block: # Start of parameter block param_name = line.split("={")[0].strip() + raw_value = line.split("=", 1)[1].strip() + raw_inner = ( + raw_value[1:-1].strip() + if raw_value.startswith("{") and raw_value.endswith("}") + else "" + ) + if ":" in raw_inner: + try: + params[param_name] = self._parse_assignment_value(raw_value) + except Exception as e: + self.logger.warning( + f"Failed to parse parameter {param_name}: {e}" + ) + current_param = None + current_value = "" + continue + current_param = param_name - current_value = line.split("={")[1] + current_value = ( + raw_value[1:] if raw_value.startswith("{") else raw_value + ) # Check if parameter ends on the same line if "}" in current_value: @@ -638,6 +660,17 @@ def _parse_initial_parameterization(self, content: str) -> Dict[str, Any]: # Multi-line parameter in_param_block = True + elif "=" in line and not in_param_block: + param_name, raw_value = line.split("=", 1) + param_name = param_name.strip() + raw_value = raw_value.strip() + if not param_name: + continue + try: + params[param_name] = self._parse_assignment_value(raw_value) + except Exception as e: + self.logger.warning(f"Failed to parse parameter {param_name}: {e}") + elif in_param_block and current_param: # Continue collecting parameter value if "}" in line: @@ -659,7 +692,16 @@ def _parse_initial_parameterization(self, content: str) -> Dict[str, Any]: return params - def _parse_parameter_value(self, value_str: str) -> Union[List, float, int]: + def _parse_assignment_value(self, value_str: str) -> Any: + """Parse a complete InitialParameterization assignment value.""" + value_str = value_str.strip() + if value_str.startswith("{") and value_str.endswith("}"): + inner = value_str[1:-1].strip() + if ":" not in inner: + value_str = inner + return self._parse_parameter_value(value_str) + + def _parse_parameter_value(self, value_str: str) -> Any: """Parse parameter value string into appropriate data structure.""" import ast diff --git a/src/gui/oxdraw/processor.py b/src/gui/oxdraw/processor.py index 78073f88a..88eb70018 100644 --- a/src/gui/oxdraw/processor.py +++ b/src/gui/oxdraw/processor.py @@ -16,6 +16,7 @@ from typing import Any, Dict, Optional from gnn.processor import discover_gnn_files +from gui.websocket_bridge import build_initial_messages from .mermaid_converter import convert_gnn_file_to_mermaid from .mermaid_parser import convert_mermaid_file_to_gnn @@ -81,6 +82,15 @@ def process_oxdraw( "files_processed": [], "gnn_to_mermaid_conversions": [], "mermaid_to_gnn_conversions": [], + "websocket_bridge": { + "enabled": False, + "message_contract_available": True, + "server_running": False, + "host": host, + "port": port, + "messages": [], + "status": "message_contract_only", + }, "errors": [], } @@ -199,6 +209,27 @@ def process_oxdraw( } ) + successful_payloads = [] + for conversion in results["gnn_to_mermaid_conversions"]: + if not conversion.get("success") or not conversion.get("mermaid_file"): + continue + mermaid_file = Path(conversion["mermaid_file"]) + payload = { + "model_id": mermaid_file.stem, + "format": "mermaid", + "gnn_file": conversion["gnn_file"], + "mermaid_file": str(mermaid_file), + } + try: + payload["mermaid"] = mermaid_file.read_text(encoding="utf-8") + except OSError as exc: + payload["load_warning"] = f"Unable to read Mermaid artifact: {exc}" + successful_payloads.append(payload) + results["websocket_bridge"]["messages"] = [ + json.loads(message.to_json()) + for message in build_initial_messages(successful_payloads) + ] + # Save processing results results_file = output_dir / "oxdraw_processing_results.json" with open(results_file, "w") as f: diff --git a/src/gui/websocket_bridge.py b/src/gui/websocket_bridge.py new file mode 100644 index 000000000..325d901ec --- /dev/null +++ b/src/gui/websocket_bridge.py @@ -0,0 +1,233 @@ +"""Local-only WebSocket message contracts for reactive GUI synchronization.""" + +from __future__ import annotations + +import asyncio +import copy +import json +from dataclasses import dataclass, field +from typing import Any, Dict, Iterable, List + +GUI_WEBSOCKET_MESSAGE_TYPES = frozenset( + {"model.load", "matrix.patch", "validation.result", "model.export", "error"} +) + + +@dataclass(frozen=True) +class GUIWebSocketMessage: + """Validated JSON message exchanged by the Step 22 GUI bridge.""" + + type: str + payload: Dict[str, Any] = field(default_factory=dict) + request_id: str | None = None + + def to_json(self) -> str: + """Serialize to a JSON message.""" + validate_gui_message_type(self.type) + data: Dict[str, Any] = {"type": self.type, "payload": self.payload} + if self.request_id is not None: + data["request_id"] = self.request_id + return json.dumps(data, sort_keys=True) + + @classmethod + def from_json(cls, raw: str) -> "GUIWebSocketMessage": + """Parse and validate a JSON message.""" + data = json.loads(raw) + if not isinstance(data, dict): + raise ValueError("GUI WebSocket message must be a JSON object") + msg_type = data.get("type") + if not isinstance(msg_type, str): + raise ValueError("GUI WebSocket message requires a string 'type'") + validate_gui_message_type(msg_type) + payload = data.get("payload", {}) + if not isinstance(payload, dict): + raise ValueError("GUI WebSocket message payload must be an object") + request_id = data.get("request_id") + if request_id is not None and not isinstance(request_id, str): + raise ValueError("GUI WebSocket request_id must be a string when present") + return cls(type=msg_type, payload=payload, request_id=request_id) + + +def validate_gui_message_type(message_type: str) -> None: + """Raise if ``message_type`` is not part of the public GUI contract.""" + if message_type not in GUI_WEBSOCKET_MESSAGE_TYPES: + allowed = ", ".join(sorted(GUI_WEBSOCKET_MESSAGE_TYPES)) + raise ValueError( + f"Unsupported GUI WebSocket message type '{message_type}'. Allowed: {allowed}" + ) + + +def build_initial_messages( + model_payloads: Iterable[Dict[str, Any]], +) -> List[GUIWebSocketMessage]: + """Build initial ``model.load`` messages for converted oxdraw/Mermaid models.""" + return [ + GUIWebSocketMessage(type="model.load", payload=dict(payload)) + for payload in model_payloads + ] + + +@dataclass +class GUIBridgeState: + """In-memory local bridge state for Step 22 reactive GUI sessions.""" + + models: Dict[str, Dict[str, Any]] = field(default_factory=dict) + patches: List[Dict[str, Any]] = field(default_factory=list) + validation_results: List[Dict[str, Any]] = field(default_factory=list) + errors: List[Dict[str, Any]] = field(default_factory=list) + + def apply_message(self, message: GUIWebSocketMessage) -> GUIWebSocketMessage | None: + """Apply a validated GUI message and return an optional response.""" + if message.type == "model.load": + model_id = _model_id_from_payload(message.payload, len(self.models) + 1) + payload = copy.deepcopy(message.payload) + payload.setdefault("model_id", model_id) + self.models[model_id] = payload + return GUIWebSocketMessage( + type="validation.result", + request_id=message.request_id, + payload={"model_id": model_id, "valid": True, "errors": []}, + ) + if message.type == "matrix.patch": + return self._apply_matrix_patch(message) + if message.type == "validation.result": + self.validation_results.append(copy.deepcopy(message.payload)) + return None + if message.type == "model.export": + requested_model = message.payload.get("model_id") + models = ( + {requested_model: self.models[requested_model]} + if requested_model in self.models + else self.models + ) + return GUIWebSocketMessage( + type="model.export", + request_id=message.request_id, + payload={ + "format": message.payload.get("format", "json"), + "models": copy.deepcopy(models), + }, + ) + if message.type == "error": + self.errors.append(copy.deepcopy(message.payload)) + return None + return None + + def _apply_matrix_patch(self, message: GUIWebSocketMessage) -> GUIWebSocketMessage: + payload = message.payload + model_id = payload.get("model_id") + if model_id is None and len(self.models) == 1: + model_id = next(iter(self.models)) + matrix_name = payload.get("matrix") + path = payload.get("path") + if model_id not in self.models: + return _bridge_error( + "Unknown model_id for matrix.patch", message.request_id + ) + if not isinstance(matrix_name, str) or not isinstance(path, list): + return _bridge_error( + "matrix.patch requires string matrix and list path", + message.request_id, + ) + model = self.models[model_id] + matrices = model.setdefault("matrices", {}) + if not isinstance(matrices, dict): + return _bridge_error( + "model matrices payload must be an object", message.request_id + ) + if matrix_name not in matrices: + matrices[matrix_name] = [] + try: + _patch_nested_value(matrices, [matrix_name, *path], payload.get("value")) + except (IndexError, TypeError, ValueError) as exc: + return _bridge_error(str(exc), message.request_id) + patch = copy.deepcopy(payload) + patch["model_id"] = model_id + self.patches.append(patch) + result = { + "model_id": model_id, + "matrix": matrix_name, + "valid": True, + "patch_count": len(self.patches), + "errors": [], + } + self.validation_results.append(result) + return GUIWebSocketMessage( + type="validation.result", request_id=message.request_id, payload=result + ) + + +async def run_local_gui_bridge( + host: str, port: int, initial_messages: Iterable[GUIWebSocketMessage] +) -> None: + """Run an optional local-only WebSocket bridge. + + The optional ``websockets`` package is imported only here so headless tests and + artifact generation do not require a live WebSocket runtime. + """ + if host not in {"127.0.0.1", "localhost", "::1"}: + raise ValueError( + "GUI WebSocket bridge is local-only; bind to 127.0.0.1, localhost, or ::1" + ) + try: + import websockets # type: ignore[import-not-found] + except ImportError as exc: + raise RuntimeError( + "Install the optional websockets package to launch the GUI bridge" + ) from exc + + initial = list(initial_messages) + state = GUIBridgeState() + for message in initial: + state.apply_message(message) + messages = [message.to_json() for message in initial] + + async def handler(websocket: Any) -> None: + for message in messages: + await websocket.send(message) + async for raw in websocket: + response = state.apply_message(GUIWebSocketMessage.from_json(raw)) + if response is not None: + await websocket.send(response.to_json()) + + async with websockets.serve(handler, host, port): + await asyncio.Future() + + +def _model_id_from_payload(payload: Dict[str, Any], fallback_index: int) -> str: + """Derive a stable model id from a model.load payload.""" + for key in ("model_id", "model_name", "name", "path"): + value = payload.get(key) + if isinstance(value, str) and value: + return value + return f"model-{fallback_index}" + + +def _bridge_error(message: str, request_id: str | None = None) -> GUIWebSocketMessage: + """Build a GUI bridge error response.""" + return GUIWebSocketMessage( + type="error", + request_id=request_id, + payload={"message": message}, + ) + + +def _patch_nested_value(container: Dict[str, Any], path: List[Any], value: Any) -> None: + """Patch a nested dict/list matrix value in place.""" + if not path: + raise ValueError("Patch path cannot be empty") + current: Any = container + for key in path[:-1]: + if isinstance(current, dict): + current = current[key] + elif isinstance(current, list) and isinstance(key, int): + current = current[key] + else: + raise TypeError(f"Cannot traverse patch segment {key!r}") + final_key = path[-1] + if isinstance(current, dict): + current[final_key] = value + elif isinstance(current, list) and isinstance(final_key, int): + current[final_key] = value + else: + raise TypeError(f"Cannot set patch segment {final_key!r}") diff --git a/src/main.py b/src/main.py index 2a27b5c1d..f1dd18cc4 100644 --- a/src/main.py +++ b/src/main.py @@ -980,6 +980,17 @@ def main( logger, ) = _prepare_pipeline_context(override_args, override_config) + if getattr(args, "autonomous", False): + from pipeline.autonomous import run_autonomous_proposal_loop + + report = run_autonomous_proposal_loop(args.target_dir, args.output_dir) + logger.info( + "Autonomous proposal loop wrote %d candidate(s) under %s/autonomous", + report.get("candidate_count", 0), + args.output_dir, + ) + return 0 + progress_tracker: Optional[PipelineProgressTracker] = None try: progress_tracker = _start_pipeline_run( diff --git a/src/mcp/MCP_DOCUMENTATION.md b/src/mcp/MCP_DOCUMENTATION.md index 5d8567071..b1559ee67 100644 --- a/src/mcp/MCP_DOCUMENTATION.md +++ b/src/mcp/MCP_DOCUMENTATION.md @@ -224,11 +224,16 @@ python -m src.mcp.cli --help python -m src.mcp.cli server --transport stdio ``` -**HTTP Transport (For network access):** +**HTTP Transport (Authenticated local orchestration):** ```bash -python -m src.mcp.cli server --transport http --host 0.0.0.0 --port 8080 +GNN_MCP_TOKEN=local-dev-token python -m src.mcp.cli server --transport http --host 127.0.0.1 --port 8080 ``` +HTTP exposes only safe tools by default. Resource reads are denied unless an +exact URI is listed in `GNN_MCP_SAFE_RESOURCES`; keep stdio transport for broad +local resource access. +The HTTP capability response is filtered to this exposed safe surface. + #### Using the CLI **List all available tools:** @@ -467,7 +472,13 @@ python -m src.mcp.cli server --transport stdio ### HTTP Server -The HTTP server provides network-based access: +The HTTP server provides authenticated JSON-RPC access. Keep it bound to +`127.0.0.1` unless a trusted reverse proxy provides TLS and access control. +Tool execution and resource reads are guarded by separate allowlists: +`GNN_MCP_SAFE_RESOURCES` must contain each URI that `mcp.resource.get` may +return over HTTP. +Rate limiting is checked before bearer authentication, so repeated missing or +invalid tokens are throttled by `GNN_MCP_RATE_LIMIT_PER_MINUTE`. ```python from mcp.server_http import MCPHTTPHandler @@ -480,7 +491,7 @@ server.serve_forever() **Usage:** ```bash -python -m src.mcp.cli server --transport http --host 0.0.0.0 --port 8080 +GNN_MCP_TOKEN=local-dev-token python -m src.mcp.cli server --transport http --host 127.0.0.1 --port 8080 ``` ## JSON-RPC API @@ -514,6 +525,9 @@ python -m src.mcp.cli server --transport http --host 0.0.0.0 --port 8080 ``` #### Get Resource +HTTP transport rejects resource reads unless the URI is explicitly allowlisted +through `GNN_MCP_SAFE_RESOURCES`; stdio/direct process use is unaffected. + ```json { "jsonrpc": "2.0", diff --git a/src/mcp/README.md b/src/mcp/README.md index ecaf02304..075958a4c 100644 --- a/src/mcp/README.md +++ b/src/mcp/README.md @@ -60,10 +60,13 @@ The central MCP server implementation that: - Ideal for local process communication #### HTTP Server (`server_http.py`) -- HTTP-based JSON-RPC server -- Supports both GET and POST requests -- Configurable host and port -- Suitable for network-based access +- HTTP-based JSON-RPC server for local orchestration +- Defaults to `127.0.0.1` +- Supports bearer-token auth through `GNN_MCP_TOKEN` +- Supports per-client rate limiting through `GNN_MCP_RATE_LIMIT_PER_MINUTE` +- Exposes only a safe tool allowlist by default; unsafe tools require explicit opt-in +- Denies resource reads by default; expose individual URIs with `GNN_MCP_SAFE_RESOURCES` +- Filters HTTP capability responses to the same safe tool/resource surface ### 3. Command Line Interface (`cli.py`) @@ -80,7 +83,7 @@ python -m src.mcp.cli status # Start server python -m src.mcp.cli server --transport stdio -python -m src.mcp.cli server --transport http --host 0.0.0.0 --port 8080 +GNN_MCP_TOKEN=local-dev-token python -m src.mcp.cli server --transport http --host 127.0.0.1 --port 8080 ``` ### 4. Meta-Tools (`meta_mcp.py`) @@ -160,11 +163,17 @@ Server introspection and diagnostic tools: python -m src.mcp.cli server --transport stdio ``` -#### HTTP Transport (For network access) +#### HTTP Transport (Local JSON-RPC orchestration) ```bash -python -m src.mcp.cli server --transport http --host 0.0.0.0 --port 8080 +GNN_MCP_TOKEN=local-dev-token python -m src.mcp.cli server --transport http --host 127.0.0.1 --port 8080 ``` +HTTP tool execution and resource reads are guarded separately. Tool calls are +limited to the safe HTTP allowlist, while `mcp.resource.get` returns an error +unless the requested URI is listed in `GNN_MCP_SAFE_RESOURCES`. +Rate limiting is evaluated before bearer authentication so bad-token traffic is +throttled by the same per-client limit. + ### 2. Using the CLI #### List all available tools @@ -276,15 +285,22 @@ python -m src.mcp.cli execute get_mcp_performance_metrics - **HTTP**: Network accessible, consider HTTPS for production ### Authentication -- No built-in authentication (relies on transport security) -- Implement authentication for network deployments -- Use stdio transport for maximum security +- HTTP transport requires `Authorization: Bearer ` by default. +- Missing or invalid bearer tokens receive `401` before JSON-RPC execution. +- Unauthenticated HTTP is only available for explicit loopback development with + `GNN_MCP_ALLOW_INSECURE_LOCAL=1`; non-loopback clients still receive `401`. +- Resource reads over HTTP are denied unless the exact URI is included in + `GNN_MCP_SAFE_RESOURCES`. +- HTTP capability discovery lists only tools and resources exposed by those + allowlists. +- Use stdio transport for maximum security. ### Recommendations 1. Use stdio transport for local-only access -2. Configure HTTPS for HTTP transport if needed -3. Implement authentication for untrusted networks -4. Monitor access logs and performance metrics +2. Keep HTTP bound to `127.0.0.1` unless a separate trusted reverse proxy is used +3. Set `GNN_MCP_TOKEN` for every HTTP run +4. Set `GNN_MCP_SAFE_RESOURCES` only for resource URIs that are safe to expose +5. Monitor access logs and performance metrics ## Development and Extension diff --git a/src/mcp/meta_mcp.py b/src/mcp/meta_mcp.py index 39dedb716..7ba5829e8 100644 --- a/src/mcp/meta_mcp.py +++ b/src/mcp/meta_mcp.py @@ -142,18 +142,20 @@ def get_mcp_auth_status(mcp_instance_ref: Any) -> Dict[str, Any]: """ return { "success": True, - "authentication_type": "none_implemented", - "access_level": "unrestricted_local_access", + "authentication_type": "bearer_token_for_http", + "access_level": "safe_http_tool_and_resource_allowlists", "transport_security": { "stdio": "local_process_only", - "http": "local_network_only", + "http": "bearer_token_required_by_default", "https": "not_configured", }, - "description": "Server does not implement explicit authentication. Relies on transport security.", + "description": "HTTP transport requires Authorization: Bearer by default. Unauthenticated development mode is restricted to explicit loopback opt-in. Tool execution and resource reads are separately allowlisted for HTTP.", "recommendations": [ "Use stdio transport for local-only access", - "Configure HTTPS for HTTP transport if needed", - "Implement authentication if exposing to untrusted networks", + "Keep HTTP bound to 127.0.0.1 unless a trusted reverse proxy handles TLS and access control", + "Set GNN_MCP_TOKEN for every HTTP run", + "Expose HTTP resources only through GNN_MCP_SAFE_RESOURCES", + "Use GNN_MCP_ALLOW_INSECURE_LOCAL=1 only for loopback development", ], } diff --git a/src/mcp/server_http.py b/src/mcp/server_http.py index b037888ae..505e775b9 100644 --- a/src/mcp/server_http.py +++ b/src/mcp/server_http.py @@ -12,15 +12,36 @@ - Extensible for meta-tools and future MCP extensions """ +import ipaddress import json import logging +import os import threading +import time import urllib.parse from http.server import BaseHTTPRequestHandler, HTTPServer -from typing import Any, Dict, Optional +from typing import Any, Dict, List, Optional # Configure logging logger = logging.getLogger(__name__) +DEFAULT_SAFE_HTTP_TOOL_NAMES = frozenset( + { + "cli.health", + "cli.preflight", + "get_pipeline_steps", + "get_pipeline_status", + "validate_pipeline_dependencies", + "get_pipeline_config_info", + "check_execute_dependencies", + "get_execute_module_info", + "get_logging_info", + "validate_dependencies", + } +) +DEFAULT_SAFE_HTTP_RESOURCE_URIS: frozenset[str] = frozenset() +_RATE_LIMIT_LOCK = threading.Lock() +_RATE_LIMIT_WINDOW_SECONDS = 60.0 +_RATE_LIMIT_STATE: Dict[str, List[float]] = {} # Import MCP try: @@ -29,6 +50,146 @@ from mcp import MCPError, initialize, mcp_instance +def get_required_bearer_token() -> Optional[str]: + """Return the configured MCP HTTP bearer token, if auth is enabled.""" + token = os.environ.get("GNN_MCP_TOKEN") + if token is None or token.strip() == "": + return None + return token + + +def allow_insecure_local_http() -> bool: + """Return True only for explicit local development without bearer auth.""" + return os.environ.get("GNN_MCP_ALLOW_INSECURE_LOCAL", "").lower() in { + "1", + "true", + "yes", + } + + +def is_loopback_client(client_host: str | None) -> bool: + """Return True only for loopback client addresses.""" + if not client_host: + return False + if client_host == "localhost": + return True + try: + return ipaddress.ip_address(client_host).is_loopback + except ValueError: + return False + + +def is_authorized(headers: Any, *, client_host: str | None = None) -> bool: + """Validate HTTP headers against ``GNN_MCP_TOKEN``. + + HTTP transport requires bearer authentication by default. Developers may + opt into unauthenticated loopback-only experimentation with + ``GNN_MCP_ALLOW_INSECURE_LOCAL=1``. + """ + token = get_required_bearer_token() + if token is None: + return allow_insecure_local_http() and is_loopback_client(client_host) + auth_header = headers.get("Authorization", "") + return bool(auth_header == f"Bearer {token}") + + +def get_rate_limit_per_minute() -> int: + """Return the configured per-client HTTP rate limit, or 0 when disabled.""" + raw_value = os.environ.get("GNN_MCP_RATE_LIMIT_PER_MINUTE", "0") + try: + return max(0, int(raw_value)) + except ValueError: + logger.warning("Invalid GNN_MCP_RATE_LIMIT_PER_MINUTE=%r; disabling", raw_value) + return 0 + + +def is_rate_limited(client_id: str, *, now: float | None = None) -> bool: + """Return True when ``client_id`` has exceeded the configured rate limit.""" + limit = get_rate_limit_per_minute() + if limit <= 0: + return False + timestamp = time.time() if now is None else now + cutoff = timestamp - _RATE_LIMIT_WINDOW_SECONDS + with _RATE_LIMIT_LOCK: + recent = [ + seen_at + for seen_at in _RATE_LIMIT_STATE.get(client_id, []) + if seen_at >= cutoff + ] + if len(recent) >= limit: + _RATE_LIMIT_STATE[client_id] = recent + return True + recent.append(timestamp) + _RATE_LIMIT_STATE[client_id] = recent + return False + + +def get_safe_http_tool_names() -> set[str] | None: + """Return the default/extra safe HTTP tool names, or None if unsafe tools are allowed.""" + if os.environ.get("GNN_MCP_ALLOW_UNSAFE_TOOLS", "").lower() in { + "1", + "true", + "yes", + }: + return None + configured = { + item.strip() + for item in os.environ.get("GNN_MCP_SAFE_TOOLS", "").split(",") + if item.strip() + } + return set(DEFAULT_SAFE_HTTP_TOOL_NAMES) | configured + + +def get_safe_http_resource_uris() -> set[str]: + """Return resource URIs explicitly exposed over HTTP.""" + configured = { + item.strip() + for item in os.environ.get("GNN_MCP_SAFE_RESOURCES", "").split(",") + if item.strip() + } + return set(DEFAULT_SAFE_HTTP_RESOURCE_URIS) | configured + + +def is_safe_http_tool(tool_name: str) -> bool: + """Return True when a tool may be executed over HTTP by default.""" + safe_tools = get_safe_http_tool_names() + return True if safe_tools is None else tool_name in safe_tools + + +def is_safe_http_resource(uri: str) -> bool: + """Return True when a resource URI is explicitly exposed over HTTP.""" + return uri in get_safe_http_resource_uris() + + +def get_http_capabilities() -> Dict[str, Any]: + """Return capabilities filtered to the HTTP-exposed allowlists.""" + capabilities = mcp_instance.get_capabilities() + safe_tools = get_safe_http_tool_names() + safe_resources = get_safe_http_resource_uris() + tools = capabilities.get("tools", []) + resources = capabilities.get("resources", []) + if safe_tools is not None: + tools = [ + tool + for tool in tools + if isinstance(tool, dict) and str(tool.get("name")) in safe_tools + ] + resources = [ + resource + for resource in resources + if isinstance(resource, dict) + and str(resource.get("uri_template")) in safe_resources + ] + server = dict(capabilities.get("server", {})) + server["http_access"] = { + "safe_tools_only": safe_tools is not None, + "safe_tool_count": len(tools), + "safe_resource_count": len(resources), + "resource_allowlist_env": "GNN_MCP_SAFE_RESOURCES", + } + return {"tools": tools, "resources": resources, "server": server} + + class MCPHTTPHandler(BaseHTTPRequestHandler): """ HTTP request handler for MCP JSON-RPC 2.0 requests. @@ -39,6 +200,16 @@ def do_POST(self) -> Any: """Handle POST requests (JSON-RPC 2.0).""" urllib.parse.urlparse(self.path) content_length = int(self.headers.get("Content-Length", 0)) + client_host = self.client_address[0] if self.client_address else None + client_id = client_host or "unknown" + if is_rate_limited(client_id): + self._discard_request_body(content_length) + self._send_error(429, "MCP HTTP rate limit exceeded") + return + if not is_authorized(self.headers, client_host=client_host): + self._discard_request_body(content_length) + self._send_error(401, "Missing or invalid bearer token") + return if content_length <= 0: self._send_error(400, "Missing request body") return @@ -72,7 +243,7 @@ def _handle_jsonrpc(self, request: Dict[str, Any]) -> Any: try: # Standard MCP methods if method in ("mcp.capabilities", "get_mcp_server_capabilities"): - result = mcp_instance.get_capabilities() + result = get_http_capabilities() self._send_jsonrpc_result(request_id, result) elif method == "mcp.tool.execute": if not ( @@ -84,6 +255,13 @@ def _handle_jsonrpc(self, request: Dict[str, Any]) -> Any: return tool_name = params["name"] tool_params = params["params"] + if not is_safe_http_tool(str(tool_name)): + self._send_jsonrpc_error( + request_id, + -32001, + f"Tool not exposed over MCP HTTP by default: {tool_name}", + ) + return result = mcp_instance.execute_tool(tool_name, tool_params) self._send_jsonrpc_result(request_id, result) elif method == "mcp.resource.get": @@ -93,6 +271,13 @@ def _handle_jsonrpc(self, request: Dict[str, Any]) -> Any: ) return uri = params["uri"] + if not is_safe_http_resource(str(uri)): + self._send_jsonrpc_error( + request_id, + -32002, + f"Resource not exposed over MCP HTTP by default: {uri}", + ) + return result = mcp_instance.get_resource(uri) self._send_jsonrpc_result(request_id, result) # Direct tool invocation (meta-tools, registered tools, etc.) @@ -102,6 +287,13 @@ def _handle_jsonrpc(self, request: Dict[str, Any]) -> Any: request_id, -32602, "Params must be an object (dictionary)" ) return + if not is_safe_http_tool(str(method)): + self._send_jsonrpc_error( + request_id, + -32001, + f"Tool not exposed over MCP HTTP by default: {method}", + ) + return result = mcp_instance.execute_tool(method, params) self._send_jsonrpc_result(request_id, result) else: @@ -158,6 +350,17 @@ def _send_error(self, status_code: int, message: str) -> Any: error_body = json.dumps({"error": message}).encode("utf-8") self.wfile.write(error_body) + def _discard_request_body(self, content_length: int) -> None: + """Drain a rejected request body so clients can read the error cleanly.""" + if content_length <= 0: + return + try: + self.rfile.read(content_length) + except OSError: + logger.debug( + "Could not drain rejected MCP HTTP request body", exc_info=True + ) + def log_message(self, format: Any, *args: Any) -> Any: """Override log_message to use our logger.""" logger.info( diff --git a/src/pipeline/autonomous.py b/src/pipeline/autonomous.py new file mode 100644 index 000000000..f1d32b967 --- /dev/null +++ b/src/pipeline/autonomous.py @@ -0,0 +1,229 @@ +"""Bounded autonomous proposal artifacts for the GNN pipeline. + +Autonomous mode writes reports and candidate patch files only; it does not edit +repository source files, commit changes, or mutate live infrastructure. +""" + +from __future__ import annotations + +import json +from datetime import datetime +from pathlib import Path +from typing import Any, Dict, List + + +def collect_observation_streams(target_dir: Path) -> List[Dict[str, Any]]: + """Describe file-backed observation streams without opening live devices.""" + streams: List[Dict[str, Any]] = [] + for path in sorted(target_dir.rglob("*")) if target_dir.exists() else []: + if path.is_file() and path.suffix.lower() in { + ".json", + ".csv", + ".npy", + ".npz", + ".md", + }: + suffix = path.suffix.lower() + if suffix in {".npy", ".npz"}: + kind = "array_file" + elif suffix == ".json" and "manifest" in path.name.lower(): + kind = "manifest_file" + else: + kind = "file" + streams.append( + { + "path": str(path), + "kind": kind, + "suffix": suffix, + "size_bytes": path.stat().st_size, + } + ) + return streams + + +def build_container_plan(target_dir: Path) -> Dict[str, Any]: + """Return a generated container plan, never a live orchestration mutation.""" + return { + "schema": "gnn_container_plan_v1", + "target_dir": str(target_dir), + "services": [ + { + "name": "gnn-worker", + "image": "python:3.12-slim", + "command": "uv run python src/main.py --target-dir /workspace/input/gnn_files", + "replicas": 1, + } + ], + "dry_run": True, + "mutation_performed": False, + "cluster_mutation_performed": False, + } + + +def run_autonomous_proposal_loop( + target_dir: Path, output_dir: Path, *, max_candidates: int = 3 +) -> Dict[str, Any]: + """Write bounded candidate-evaluation artifacts without modifying source files.""" + autonomous_dir = output_dir / "autonomous" + autonomous_dir.mkdir(parents=True, exist_ok=True) + gnn_files = sorted(target_dir.rglob("*.md")) if target_dir.exists() else [] + candidates = [ + { + "candidate_id": f"candidate-{index + 1}", + "source_file": str(path), + "proposal": "Evaluate matrix dimensions, execution telemetry, and validation errors before applying any model patch.", + "patch_artifact": str(autonomous_dir / f"candidate-{index + 1}.gnn.patch"), + "source_mutation_performed": False, + } + for index, path in enumerate(gnn_files[:max_candidates]) + ] + for candidate in candidates: + patch_body = _candidate_patch_text(candidate) + Path(str(candidate["patch_artifact"])).write_text(patch_body, encoding="utf-8") + evaluation_report = _build_evaluation_report(candidates, target_dir, output_dir) + report: Dict[str, Any] = { + "schema": "gnn_autonomous_proposal_loop_v1", + "created_at": datetime.now().isoformat(), + "target_dir": str(target_dir), + "candidate_count": len(candidates), + "candidates": candidates, + "observation_streams": collect_observation_streams(target_dir), + "container_plan": build_container_plan(target_dir), + "evaluation_report": evaluation_report, + "source_mutation_performed": False, + "cluster_mutation_performed": False, + } + (autonomous_dir / "autonomous_proposals.json").write_text( + json.dumps(report, indent=2), encoding="utf-8" + ) + (autonomous_dir / "autonomous_evaluation_report.json").write_text( + json.dumps(evaluation_report, indent=2), encoding="utf-8" + ) + (autonomous_dir / "autonomous_evaluation_report.md").write_text( + _evaluation_report_markdown(evaluation_report), encoding="utf-8" + ) + (autonomous_dir / "candidate_patch.diff").write_text( + "\n".join( + Path(str(candidate["patch_artifact"])).read_text(encoding="utf-8") + for candidate in candidates + ) + or "# No candidate patches generated. Autonomous mode only writes proposals.\n", + encoding="utf-8", + ) + return report + + +def _candidate_patch_text(candidate: Dict[str, Any]) -> str: + """Build a non-applied candidate GNN patch artifact.""" + source_file = candidate["source_file"] + return ( + f"diff --git a/{source_file} b/{source_file}\n" + f"--- a/{source_file}\n" + f"+++ b/{source_file}\n" + "@@\n" + "# Proposal only: inspect validation, telemetry, and matrix dimensions before editing this GNN file.\n" + ) + + +def _build_evaluation_report( + candidates: List[Dict[str, Any]], target_dir: Path, output_dir: Path +) -> Dict[str, Any]: + """Build a bounded evaluation report for autonomous candidates.""" + evidence = collect_evaluation_evidence(output_dir) + return { + "schema": "gnn_autonomous_evaluation_report_v1", + "target_dir": str(target_dir), + "candidate_count": len(candidates), + "evidence": evidence, + "decisions": [ + { + "candidate_id": candidate["candidate_id"], + "status": "proposal_only", + "patch_artifact": candidate["patch_artifact"], + "score": score_candidate_proposal(candidate, evidence), + "source_mutation_performed": False, + } + for candidate in candidates + ], + "source_mutation_performed": False, + "cluster_mutation_performed": False, + } + + +def collect_evaluation_evidence(output_dir: Path) -> Dict[str, Any]: + """Collect existing validator and execution artifacts used for scoring.""" + execution_summaries = sorted(output_dir.rglob("execution_summary.json")) + validation_artifacts = sorted(output_dir.rglob("*validation*.json")) + parsed_execution_summaries = [ + _load_json_object(path) for path in execution_summaries + ] + success_rates = [ + float(summary["success_rate"]) + for summary in parsed_execution_summaries + if isinstance(summary.get("success_rate"), (int, float)) + ] + return { + "validator_commands": [ + "uv run --extra dev python scripts/check_capability_contracts.py --strict", + "uv run --extra dev python doc/development/docs_audit.py --strict --check-anchors --no-write", + "uv run --extra dev python -m pytest src/tests/pipeline/test_autonomous_contract.py -q", + ], + "execution_summary_files": [str(path) for path in execution_summaries], + "validation_artifact_files": [str(path) for path in validation_artifacts], + "execution_success_rate_mean": ( + sum(success_rates) / len(success_rates) if success_rates else None + ), + } + + +def score_candidate_proposal( + candidate: Dict[str, Any], evidence: Dict[str, Any] +) -> Dict[str, Any]: + """Score a candidate patch artifact without applying it.""" + score = 40 + reasons: List[str] = ["proposal_only"] + if Path(str(candidate.get("source_file", ""))).exists(): + score += 20 + reasons.append("source_exists") + if Path(str(candidate.get("patch_artifact", ""))).exists(): + score += 15 + reasons.append("patch_artifact_written") + success_rate = evidence.get("execution_success_rate_mean") + if isinstance(success_rate, (int, float)): + score += min(15, int(success_rate // 10)) + reasons.append("execution_summary_available") + if evidence.get("validation_artifact_files"): + score += 10 + reasons.append("validation_artifacts_available") + bounded_score = max(0, min(100, score)) + return { + "value": bounded_score, + "scale": "0-100", + "recommendation": ( + "review_with_validators" if bounded_score >= 70 else "needs_more_evidence" + ), + "reasons": reasons, + } + + +def _load_json_object(path: Path) -> Dict[str, Any]: + """Load a JSON object, returning an empty dict on invalid content.""" + try: + payload = json.loads(path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + return {} + return payload if isinstance(payload, dict) else {} + + +def _evaluation_report_markdown(report: Dict[str, Any]) -> str: + """Render a compact Markdown evaluation report.""" + lines = [ + "# Autonomous Evaluation Report", + "", + f"- Schema: {report['schema']}", + f"- Target directory: {report['target_dir']}", + f"- Candidate count: {report['candidate_count']}", + "- Source mutation performed: false", + "- Cluster mutation performed: false", + ] + return "\n".join(lines) + "\n" diff --git a/src/pipeline/model_family_acceptance.py b/src/pipeline/model_family_acceptance.py new file mode 100644 index 000000000..c48b90ee4 --- /dev/null +++ b/src/pipeline/model_family_acceptance.py @@ -0,0 +1,671 @@ +"""Manifest-driven model-family acceptance harness for v1.9 evidence.""" + +from __future__ import annotations + +import json +import os +import re +import shutil +import subprocess +import sys +from dataclasses import dataclass +from datetime import datetime +from pathlib import Path +from typing import Any, Callable, Dict, Iterable, List, Sequence + +from analysis.interpretability import ( + build_family_interpretability_summary, + render_family_interpretability_markdown, +) +from report.model_family import render_model_family_acceptance_markdown + +REPO_ROOT = Path(__file__).resolve().parents[2] +DEFAULT_EVIDENCE_STEPS = "3,5,6,11,12,15,16,23" +PIPELINE_STEPS = tuple(range(25)) +PASSING_STEP_STATUSES = {"SUCCESS", "PASSED", "PASS", "OK"} +SKIPPED_STEP_STATUSES = {"SKIPPED", "SKIP", "NOT_RUN", "NOT RUN"} +ACCEPTABLE_SUMMARY_STATUSES = { + "SUCCESS", + "SUCCESS_WITH_WARNINGS", + "PARTIAL_SUCCESS", + "COMPLETED_WITH_WARNINGS", + "WARNING", + "WARNINGS", +} +STEP_ARTIFACT_REQUIREMENTS = { + "3": ( + "3_gnn_output/gnn_processing_summary.json", + "3_gnn_output/gnn_processing_results.json", + ), + "5": ("5_type_checker_output/type_check_results.json",), + "6": ( + "6_validation_output/validation_summary.json", + "6_validation_output/validation_results.json", + ), + "11": ("11_render_output/render_processing_summary.json",), + "12": ("12_execute_output/summaries/execution_summary.json",), + "15": ("15_audio_output/audio_results.json",), + "16": ("16_analysis_output/analysis_results.json",), + "23": ("23_report_output/report_processing_summary.json",), +} +DEFAULT_ACCEPTANCE_PROFILE = { + "required_steps": [3, 5, 6, 15, 16, 23], + "evidence_steps": [11, 12], + "allow_unsupported_steps": [], + "allow_unsupported_reason_patterns": [], +} +DEFAULT_FAMILY_TIMEOUT_SECONDS = int( + os.environ.get("GNN_MODEL_FAMILY_TIMEOUT_SECONDS", "180") +) + + +@dataclass(frozen=True) +class ModelFamily: + """One family entry from the maintained model-family manifest.""" + + name: str + description: str + target_dir: Path + representative_files: tuple[str, ...] + frameworks: str | None = None + acceptance_profile: Dict[str, Any] | None = None + + +Runner = Callable[[Sequence[str]], subprocess.CompletedProcess[str]] + + +def load_model_family_manifest(manifest_path: Path) -> List[ModelFamily]: + """Load and validate the maintained model-family manifest.""" + payload = json.loads(manifest_path.read_text(encoding="utf-8")) + if payload.get("schema") != "gnn_model_family_manifest_v1": + raise ValueError("Unsupported model-family manifest schema") + families = payload.get("families") + if not isinstance(families, list): + raise ValueError("Model-family manifest requires a families list") + defaults = payload.get("acceptance_profile_defaults", DEFAULT_ACCEPTANCE_PROFILE) + if not isinstance(defaults, dict): + raise ValueError("Model-family manifest acceptance defaults must be an object") + return [_parse_family(raw, defaults) for raw in families] + + +def run_model_family_acceptance( + manifest_path: Path, + output_dir: Path, + *, + family_names: Iterable[str] | None = None, + only_steps: str | None = DEFAULT_EVIDENCE_STEPS, + frameworks: str | None = None, + strict: bool = False, + runner: Runner | None = None, +) -> Dict[str, Any]: + """Run representative model families and write acceptance ledgers.""" + output_dir.mkdir(parents=True, exist_ok=True) + requested = {name.strip() for name in family_names or [] if name.strip()} + families = load_model_family_manifest(manifest_path) + selected = [ + family for family in families if not requested or family.name in requested + ] + missing = sorted(requested - {family.name for family in families}) + if missing: + raise KeyError(f"Unknown model families: {', '.join(missing)}") + + ledger: Dict[str, Any] = { + "schema": "gnn_model_family_acceptance_ledger_v1", + "created_at": datetime.now().isoformat(), + "manifest": str(manifest_path), + "strict": strict, + "only_steps": only_steps, + "frameworks": frameworks, + "family_count": len(selected), + "families": [], + } + failures: list[str] = [] + for family in selected: + _validate_requested_steps_cover_profile(family, only_steps) + family_result = _run_one_family( + family, + output_dir, + only_steps=only_steps, + frameworks=frameworks or family.frameworks, + runner=runner or _subprocess_runner, + ) + ledger["families"].append(family_result) + if family_result["status"] == "failed": + failures.append(family.name) + ledger["status"] = "failed" if failures else "passed" + ledger["failed_families"] = failures + _write_ledger_artifacts(ledger, output_dir) + if strict and failures: + raise RuntimeError(f"Model-family acceptance failed: {', '.join(failures)}") + return ledger + + +def _parse_family(raw: Any, acceptance_defaults: Dict[str, Any]) -> ModelFamily: + if not isinstance(raw, dict): + raise ValueError("Model-family entries must be objects") + target_dir = REPO_ROOT / str(raw["target_dir"]) + representatives = tuple(str(item) for item in raw.get("representative_files", [])) + if not representatives: + raise ValueError(f"Model family {raw.get('name')} has no representative files") + raw_profile = raw.get("acceptance_profile", {}) + if raw_profile and not isinstance(raw_profile, dict): + raise ValueError( + f"Model family {raw.get('name')} acceptance_profile must be an object" + ) + return ModelFamily( + name=str(raw["name"]), + description=str(raw["description"]), + target_dir=target_dir, + representative_files=representatives, + frameworks=str(raw["frameworks"]) if raw.get("frameworks") else None, + acceptance_profile=_normalize_acceptance_profile( + acceptance_defaults, raw_profile if isinstance(raw_profile, dict) else {} + ), + ) + + +def _normalize_acceptance_profile( + defaults: Dict[str, Any], override: Dict[str, Any] +) -> Dict[str, Any]: + profile = dict(DEFAULT_ACCEPTANCE_PROFILE) + profile.update(defaults) + profile.update(override) + return { + "required_steps": _coerce_step_list(profile.get("required_steps")), + "evidence_steps": _coerce_step_list(profile.get("evidence_steps")), + "allow_unsupported_steps": _coerce_step_list( + profile.get("allow_unsupported_steps") + ), + "allow_unsupported_reason_patterns": [ + str(item) + for item in profile.get("allow_unsupported_reason_patterns", []) + if str(item).strip() + ], + } + + +def _coerce_step_list(value: Any) -> List[int]: + if value is None: + return [] + if isinstance(value, str): + return _parse_step_list(value) + if not isinstance(value, list): + raise ValueError("Acceptance profile step lists must be arrays or strings") + return [int(item) for item in value] + + +def _run_one_family( + family: ModelFamily, + output_dir: Path, + *, + only_steps: str | None, + frameworks: str | None, + runner: Runner, +) -> Dict[str, Any]: + family_dir = output_dir / family.name + pipeline_output = family_dir / "pipeline_output" + staged_input = family_dir / "input" + _reset_family_dir(family_dir) + family_dir.mkdir(parents=True, exist_ok=True) + staged_input.mkdir(parents=True, exist_ok=True) + copied_files = _stage_representative_files(family, staged_input) + command = _pipeline_command(staged_input, pipeline_output, only_steps, frameworks) + completed = runner(command) + return_code = int(completed.returncode) + pipeline_summary = _load_pipeline_summary(pipeline_output) + acceptance_profile = family.acceptance_profile or dict(DEFAULT_ACCEPTANCE_PROFILE) + raw_step_status = _build_step_status(only_steps, return_code, pipeline_summary) + missing_summary_steps = _missing_summary_steps(only_steps, pipeline_summary) + step_evidence = _build_step_evidence( + raw_step_status, pipeline_output, missing_summary_steps + ) + step_status = _apply_acceptance_profile( + raw_step_status, step_evidence, acceptance_profile + ) + if pipeline_summary is not None: + pipeline_passed = _selected_steps_passed( + step_status, only_steps, acceptance_profile, step_evidence + ) and _pipeline_run_outcome_acceptable( + return_code, pipeline_summary, step_evidence + ) + else: + pipeline_passed = False + interpretability_summary = build_family_interpretability_summary( + family.name, staged_input, pipeline_output + ) + (family_dir / "interpretability_summary.json").write_text( + json.dumps(interpretability_summary, indent=2), + encoding="utf-8", + ) + (family_dir / "interpretability_summary.md").write_text( + render_family_interpretability_markdown(interpretability_summary), + encoding="utf-8", + ) + return { + "name": family.name, + "description": family.description, + "source_target_dir": str(family.target_dir), + "staged_target_dir": str(staged_input), + "representative_files": [str(path) for path in copied_files], + "command": command, + "return_code": return_code, + "status": "passed" if pipeline_passed else "failed", + "acceptance_profile": acceptance_profile, + "pipeline_summary": _summarize_pipeline(pipeline_summary), + "stdout_tail": _tail_text(completed.stdout), + "stderr_tail": _tail_text(completed.stderr), + "steps": step_status, + "raw_steps": raw_step_status, + "step_evidence": step_evidence, + "step_status_counts": _count_step_statuses(step_status), + "artifact_links": _collect_family_artifacts(pipeline_output), + "interpretability_summary": interpretability_summary, + } + + +def _reset_family_dir(family_dir: Path) -> None: + """Create a fresh per-family output boundary before each acceptance run.""" + if family_dir.is_symlink() or family_dir.is_file(): + family_dir.unlink() + return + if family_dir.exists(): + shutil.rmtree(family_dir) + + +def _stage_representative_files(family: ModelFamily, staged_input: Path) -> List[Path]: + copied: list[Path] = [] + for relative_name in family.representative_files: + source = family.target_dir / relative_name + if not source.exists(): + raise FileNotFoundError(f"Representative fixture not found: {source}") + destination = staged_input / source.name + shutil.copy2(source, destination) + copied.append(destination) + return copied + + +def _pipeline_command( + target_dir: Path, + output_dir: Path, + only_steps: str | None, + frameworks: str | None, +) -> List[str]: + command = [ + sys.executable, + "src/main.py", + "--target-dir", + str(target_dir), + "--output-dir", + str(output_dir), + ] + if only_steps: + command.extend(["--only-steps", only_steps]) + if frameworks: + command.extend(["--frameworks", frameworks]) + command.append("--skip-llm") + return command + + +def _subprocess_runner(command: Sequence[str]) -> subprocess.CompletedProcess[str]: + try: + return subprocess.run( + list(command), + cwd=REPO_ROOT, + text=True, + capture_output=True, + check=False, + timeout=DEFAULT_FAMILY_TIMEOUT_SECONDS, + ) + except subprocess.TimeoutExpired as exc: + return subprocess.CompletedProcess( + args=list(command), + returncode=124, + stdout=_coerce_timeout_output(exc.stdout), + stderr=( + _coerce_timeout_output(exc.stderr) + + f"\nTimed out after {DEFAULT_FAMILY_TIMEOUT_SECONDS}s" + ).strip(), + ) + + +def _build_step_status( + only_steps: str | None, + return_code: int, + pipeline_summary: Dict[str, Any] | None = None, +) -> Dict[str, str]: + selected = set(PIPELINE_STEPS if not only_steps else _parse_step_list(only_steps)) + summary_statuses = _extract_summary_step_statuses(pipeline_summary) + status: Dict[str, str] = {} + for step in PIPELINE_STEPS: + if step not in selected: + status[str(step)] = "skipped" + elif str(step) in summary_statuses: + status[str(step)] = summary_statuses[str(step)] + elif pipeline_summary is not None: + status[str(step)] = "failed" + else: + status[str(step)] = "passed" if return_code == 0 else "failed" + return status + + +def _validate_requested_steps_cover_profile( + family: ModelFamily, only_steps: str | None +) -> None: + """Reject acceptance runs that omit profile-required evidence steps.""" + if only_steps is None: + return + selected = set(_parse_step_list(only_steps)) + profile = family.acceptance_profile or dict(DEFAULT_ACCEPTANCE_PROFILE) + required = _profile_required_steps(profile) + missing = sorted(required - selected) + if missing: + raise ValueError( + f"Model family {family.name} acceptance profile requires steps " + f"{','.join(str(step) for step in sorted(required))}; " + f"--only-steps omitted {','.join(str(step) for step in missing)}" + ) + + +def _profile_required_steps(acceptance_profile: Dict[str, Any]) -> set[int]: + """Return all steps that must be selected to produce release evidence.""" + required = set(_coerce_step_list(acceptance_profile["required_steps"])) + required.update(_coerce_step_list(acceptance_profile["evidence_steps"])) + required.update(_coerce_step_list(acceptance_profile["allow_unsupported_steps"])) + return required + + +def _missing_summary_steps( + only_steps: str | None, pipeline_summary: Dict[str, Any] | None +) -> set[str]: + """Return selected step ids absent from an available pipeline summary.""" + if pipeline_summary is None: + return set() + selected = set(PIPELINE_STEPS if not only_steps else _parse_step_list(only_steps)) + summary_statuses = _extract_summary_step_statuses(pipeline_summary) + return {str(step) for step in selected if str(step) not in summary_statuses} + + +def _load_pipeline_summary(pipeline_output: Path) -> Dict[str, Any] | None: + summary_path = ( + pipeline_output / "00_pipeline_summary" / "pipeline_execution_summary.json" + ) + if not summary_path.exists(): + return None + try: + payload = json.loads(summary_path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + return None + return payload if isinstance(payload, dict) else None + + +def _summarize_pipeline(pipeline_summary: Dict[str, Any] | None) -> Dict[str, Any]: + if not pipeline_summary: + return {"available": False} + return { + "available": True, + "overall_status": pipeline_summary.get("overall_status"), + "total_duration_seconds": pipeline_summary.get("total_duration_seconds"), + "performance_summary": pipeline_summary.get("performance_summary", {}), + } + + +def _extract_summary_step_statuses( + pipeline_summary: Dict[str, Any] | None, +) -> Dict[str, str]: + if not pipeline_summary: + return {} + statuses: Dict[str, str] = {} + for raw_step in pipeline_summary.get("steps", []): + if not isinstance(raw_step, dict): + continue + step_number = _step_number_from_summary(raw_step) + if step_number is None: + continue + statuses[str(step_number)] = _normalize_step_status( + str(raw_step.get("status", "")) + ) + return statuses + + +def _step_number_from_summary(step: Dict[str, Any]) -> int | None: + script_name = str(step.get("script_name", "")) + match = re.match(r"(?P\d+)_", script_name) + if match: + return int(match.group("number")) + raw_step = step.get("step_number") + if isinstance(raw_step, int) and raw_step in PIPELINE_STEPS: + return int(raw_step) + return None + + +def _normalize_step_status(status: str) -> str: + normalized = status.strip().upper().replace("-", "_") + if normalized in PASSING_STEP_STATUSES: + return "passed" + if normalized in SKIPPED_STEP_STATUSES: + return "skipped" + if "SKIP" in normalized: + return "skipped" + if "SUCCESS" in normalized and "PARTIAL" not in normalized: + return "passed" + return "failed" + + +def _build_step_evidence( + raw_step_status: Dict[str, str], + pipeline_output: Path, + missing_summary_steps: set[str] | None = None, +) -> Dict[str, Dict[str, Any]]: + evidence: Dict[str, Dict[str, Any]] = {} + missing_summary_steps = missing_summary_steps or set() + render_reason = _render_skip_or_failure_reason(pipeline_output) + execution_reason = _execution_skip_or_failure_reason(pipeline_output) + for step, status in raw_step_status.items(): + reason = None + evidence_status = status + if step in missing_summary_steps: + reason = "missing_summary_evidence" + evidence_status = "failed" + elif step == "11": + reason = render_reason + elif step == "12": + reason = execution_reason + artifact_links = _step_artifact_links(step, pipeline_output) + if ( + status == "passed" + and step in STEP_ARTIFACT_REQUIREMENTS + and not artifact_links + ): + reason = "missing_artifact_evidence" + evidence_status = "failed" + evidence[step] = { + "raw_status": status, + "status": evidence_status, + "acceptance": "required", + "reason": reason, + "artifact_links": artifact_links, + } + return evidence + + +def _apply_acceptance_profile( + raw_step_status: Dict[str, str], + step_evidence: Dict[str, Dict[str, Any]], + acceptance_profile: Dict[str, Any], +) -> Dict[str, str]: + effective = { + step: str(step_evidence.get(step, {}).get("status", raw_status)) + for step, raw_status in raw_step_status.items() + } + allowed_steps = set( + _coerce_step_list(acceptance_profile["allow_unsupported_steps"]) + ) + patterns = [ + str(pattern) + for pattern in acceptance_profile.get("allow_unsupported_reason_patterns", []) + ] + for step in allowed_steps: + key = str(step) + evidence = step_evidence.get(key, {}) + reason = str(evidence.get("reason") or "") + if ( + effective.get(key) in {"failed", "skipped"} + and reason + and _matches_any_pattern(reason, patterns) + ): + effective[key] = "skipped" + evidence["status"] = "skipped" + evidence["acceptance"] = "allowed_unsupported" + evidence["reason"] = reason + return effective + + +def _selected_steps_passed( + step_status: Dict[str, str], + only_steps: str | None, + acceptance_profile: Dict[str, Any], + step_evidence: Dict[str, Dict[str, Any]], +) -> bool: + selected = set(PIPELINE_STEPS if not only_steps else _parse_step_list(only_steps)) + allowed_steps = set( + _coerce_step_list(acceptance_profile["allow_unsupported_steps"]) + ) + for step in selected: + key = str(step) + status = step_status.get(key) + if status == "passed": + continue + if ( + status == "skipped" + and step in allowed_steps + and step_evidence.get(key, {}).get("acceptance") == "allowed_unsupported" + ): + continue + return False + return True + + +def _pipeline_run_outcome_acceptable( + return_code: int, + pipeline_summary: Dict[str, Any], + step_evidence: Dict[str, Dict[str, Any]], +) -> bool: + """Reject contradictory run outcomes unless all failures are profiled skips.""" + if return_code in {0, 2} and _summary_status_is_acceptable(pipeline_summary): + return True + return any( + evidence.get("acceptance") == "allowed_unsupported" + for evidence in step_evidence.values() + ) + + +def _summary_status_is_acceptable(pipeline_summary: Dict[str, Any]) -> bool: + status = str(pipeline_summary.get("overall_status", "")).strip().upper() + if not status: + return True + return status in ACCEPTABLE_SUMMARY_STATUSES + + +def _matches_any_pattern(reason: str, patterns: Sequence[str]) -> bool: + return any(pattern and pattern in reason for pattern in patterns) + + +def _render_skip_or_failure_reason(pipeline_output: Path) -> str | None: + summary = _load_first_json( + pipeline_output / "11_render_output", ("render_processing_summary.json",) + ) + successful = summary.get("successful_framework_renderings") + messages = [ + str(item.get("message", "")) + for item in summary.get("failed_framework_renderings", []) + if isinstance(item, dict) and item.get("message") + ] + if successful == 0 and messages: + return "; ".join(messages) + if successful == 0: + return str(summary.get("message") or "no compatible renderings") + if messages: + return "partial_render_failure" + return None + + +def _execution_skip_or_failure_reason(pipeline_output: Path) -> str | None: + summary = _load_first_json( + pipeline_output / "12_execute_output", ("execution_summary.json",) + ) + parts: list[str] = [] + for key in ("skipped_reason", "message", "failure_reason", "error"): + if summary.get(key): + parts.append(str(summary[key])) + for item in summary.get("render_failures", []): + if isinstance(item, dict) and item.get("message"): + parts.append(str(item["message"])) + return "; ".join(parts) if parts else None + + +def _load_first_json(root: Path, names: Sequence[str]) -> Dict[str, Any]: + if not root.exists(): + return {} + for path in sorted(root.rglob("*.json")): + if path.name in set(names): + try: + payload = json.loads(path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + return {} + return payload if isinstance(payload, dict) else {} + return {} + + +def _step_artifact_links(step: str, pipeline_output: Path) -> List[str]: + requirements = STEP_ARTIFACT_REQUIREMENTS.get(step, ()) + links = [] + for relative in requirements: + path = pipeline_output / relative + if not path.is_file(): + return [] + links.append(str(path)) + return links + + +def _parse_step_list(raw: str) -> List[int]: + return [int(part.strip()) for part in raw.split(",") if part.strip()] + + +def _count_step_statuses(step_status: Dict[str, str]) -> Dict[str, int]: + counts = {"available": 0, "passed": 0, "skipped": 0, "failed": 0} + for status in step_status.values(): + counts[status] = counts.get(status, 0) + 1 + return counts + + +def _collect_family_artifacts(pipeline_output: Path) -> List[str]: + if not pipeline_output.exists(): + return [] + return [str(path) for path in sorted(pipeline_output.rglob("*")) if path.is_file()][ + :50 + ] + + +def _write_ledger_artifacts(ledger: Dict[str, Any], output_dir: Path) -> None: + (output_dir / "model_family_acceptance_ledger.json").write_text( + json.dumps(ledger, indent=2), + encoding="utf-8", + ) + (output_dir / "model_family_acceptance_ledger.md").write_text( + render_model_family_acceptance_markdown(ledger), + encoding="utf-8", + ) + + +def _tail_text(text: str | None, max_chars: int = 4000) -> str: + if not text: + return "" + return text[-max_chars:] + + +def _coerce_timeout_output(value: str | bytes | None) -> str: + if value is None: + return "" + if isinstance(value, bytes): + return value.decode(errors="replace") + return value diff --git a/src/render/discopy/AGENTS.md b/src/render/discopy/AGENTS.md index 7f8b1e3ce..00990940a 100644 --- a/src/render/discopy/AGENTS.md +++ b/src/render/discopy/AGENTS.md @@ -20,6 +20,7 @@ 2. Map GNN variables to DisCoPy `Box`es and connections to wires. 3. Emit a `main()` in the generated script that assembles and (when `discopy.drawing` is present) renders the diagram. 4. Surface warnings for missing `InitialParameterization`, `ModelParameters`, and `Connections` sections rather than hard-failing. +5. Validate and emit matrix permutation metadata when supplied by parsed GNN data; current permutation support is metadata-only and does not reorder generated diagram wires or boxes. ### What this module is **not** @@ -48,7 +49,7 @@ def render_gnn_to_discopy( **Parameters**: - `gnn_spec` — Parsed GNN spec dict (output of `gnn.parse_gnn_file`). - `output_path` — Destination `.py` path for the generated script. Parent directories are created. -- `options` — Optional dict forwarded to `DisCoPyRenderer`. Currently unused by the generator but reserved for future template knobs. +- `options` — Optional dict forwarded to `DisCoPyRenderer`. `matrix_permutations` is consumed for metadata validation; other keys are reserved for future template knobs. **Returns**: `(success, message, warnings)` where `warnings` is a list of human-readable strings describing missing optional sections in the spec. @@ -93,6 +94,7 @@ python -m render.render --target discopy --gnn-file input/gnn_files/actinf_pomdp A single Python file (``) containing: - Imports (`discopy`, `discopy.drawing`, `numpy`). - Box and type declarations derived from `gnn_spec.variables`. +- `MATRIX_PERMUTATION_METADATA` and `MATRIX_PERMUTATION_APPLIED_TO_DIAGRAM = False` when valid matrix permutation records are present. - A `main()` that composes the diagram and calls `diagram.draw(...)` when DisCoPy drawing backends are available. - A `__main__` guard so the script is runnable standalone. @@ -150,6 +152,7 @@ GNN spec → render_gnn_to_discopy → .py → execute.discopy runs scri - The generated script assumes DisCoPy's Python API; categorical features that require `pytket` or `lambeq` are not emitted. These were aspirational in prior drafts and have been removed from this document. - JAX-backed diagram evaluation is not supported. See `render/jax/` for a JAX-specific renderer with a different code-path. +- Matrix permutation support is currently a validated metadata contract. The emitted DisCoPy diagram is still built from the parsed variable and connection structure; non-identity permutation metadata is not yet reflected as reordered diagram construction. --- diff --git a/src/render/discopy/discopy_renderer.py b/src/render/discopy/discopy_renderer.py index 4d08e0edb..075d86d84 100644 --- a/src/render/discopy/discopy_renderer.py +++ b/src/render/discopy/discopy_renderer.py @@ -15,11 +15,14 @@ Date: 2024 """ +import json import logging from datetime import datetime from pathlib import Path from typing import Any, Dict, List, Optional, Tuple +from .symmetry import build_matrix_permutation_metadata + class DisCoPyRenderer: """ @@ -162,6 +165,10 @@ def _generate_discopy_diagram_code( # Extract connections if available gnn_spec.get("connections", []) + symmetry_metadata = build_matrix_permutation_metadata( + gnn_spec, self.options.get("matrix_permutations") + ) + symmetry_metadata_json = json.dumps(symmetry_metadata, indent=2) # Generate the Python code code = f'''#!/usr/bin/env python3 @@ -175,43 +182,12 @@ def _generate_discopy_diagram_code( """ import sys -import subprocess -# Ensure DisCoPy is installed before importing try: import discopy - print("✅ DisCoPy is available") except ImportError: - print("📦 DisCoPy not found - installing...") - try: - # Try UV first (as per project rules) - result = subprocess.run( - [sys.executable, "-m", "uv", "pip", "install", "discopy"], - capture_output=True, - text=True, - timeout=180 - ) - if result.returncode != 0: - # Recovery to pip if UV fails - print("⚠️ UV install failed, trying pip...") - result = subprocess.run( - [sys.executable, "-m", "pip", "install", "discopy"], - capture_output=True, - text=True, - timeout=180 - ) - if result.returncode == 0: - print("✅ DisCoPy installed successfully") - import discopy - else: - print(f"❌ Failed to install DisCoPy: {{result.stderr}}") - sys.exit(1) - except subprocess.TimeoutExpired: - print("❌ DisCoPy installation timed out") - sys.exit(1) - except Exception as e: - print(f"❌ Error installing DisCoPy: {{e}}") - sys.exit(1) + print("DisCoPy is not installed. Install the project render extras before running this script.") + sys.exit(1) from discopy import * from discopy.monoidal import Ty, Box, Id @@ -226,6 +202,28 @@ def _generate_discopy_diagram_code( NUM_STATES = {num_states} NUM_OBSERVATIONS = {num_observations} NUM_ACTIONS = {num_actions} +MATRIX_PERMUTATION_METADATA = {symmetry_metadata_json} +MATRIX_PERMUTATION_APPLIED_TO_DIAGRAM = False + +def validate_matrix_permutation_metadata(metadata): + """Validate generated matrix permutation metadata before exporting diagrams.""" + if not isinstance(metadata, dict): + raise ValueError("Matrix permutation metadata must be a dictionary") + for matrix_name, record in metadata.items(): + if record.get("axis") != "rows": + raise ValueError(f"Unsupported permutation axis for {{matrix_name}}") + shape = record.get("shape") + permutation = record.get("permutation") + if not isinstance(shape, list) or not isinstance(permutation, list): + raise ValueError(f"Invalid permutation metadata for {{matrix_name}}") + if not shape: + raise ValueError(f"Missing matrix shape for {{matrix_name}}") + if len(permutation) != int(shape[0]): + raise ValueError(f"Permutation length mismatch for {{matrix_name}}") + if sorted(int(item) for item in permutation) != list(range(int(shape[0]))): + raise ValueError(f"Permutation indices mismatch for {{matrix_name}}") + +validate_matrix_permutation_metadata(MATRIX_PERMUTATION_METADATA) print("🔬 DisCoPy Categorical Diagram Generation") print(f"📊 State Space: {{NUM_STATES}} states, {{NUM_OBSERVATIONS}} observations, {{NUM_ACTIONS}} actions") @@ -378,6 +376,8 @@ def export_circuit_data(circuit_dict, analysis_results, output_dir="discopy_diag 'num_actions': NUM_ACTIONS }}, 'components': list(circuit_dict['components'].keys()), + 'matrix_permutation_metadata': MATRIX_PERMUTATION_METADATA, + 'matrix_permutation_applied_to_diagram': MATRIX_PERMUTATION_APPLIED_TO_DIAGRAM, 'analysis': analysis_results }} diff --git a/src/render/discopy/symmetry.py b/src/render/discopy/symmetry.py new file mode 100644 index 000000000..2674c3c4b --- /dev/null +++ b/src/render/discopy/symmetry.py @@ -0,0 +1,73 @@ +"""Matrix permutation metadata for DisCoPy symmetry rendering.""" + +from __future__ import annotations + +from typing import Any, Dict, List, Sequence + + +def nested_shape(value: Any) -> List[int]: + """Return the rectangular nested-list shape for a matrix-like value.""" + shape: List[int] = [] + current = value + while isinstance(current, list): + shape.append(len(current)) + current = current[0] if current else [] + return shape + + +def validate_permutation(shape: Sequence[int], permutation: Sequence[int]) -> None: + """Validate a row-axis permutation against a matrix/tensor shape.""" + if not shape: + raise ValueError("Cannot permute scalar matrix data") + expected = int(shape[0]) + if len(permutation) != expected: + raise ValueError( + f"Permutation length {len(permutation)} does not match first matrix dimension {expected}" + ) + if sorted(int(item) for item in permutation) != list(range(expected)): + raise ValueError( + f"Permutation must contain each row index exactly once: {permutation}" + ) + + +def validate_matrix_permutation_metadata(metadata: Dict[str, Dict[str, Any]]) -> None: + """Validate generated matrix permutation metadata records.""" + if not isinstance(metadata, dict): + raise ValueError("Matrix permutation metadata must be a dictionary") + for matrix_name, record in metadata.items(): + if not isinstance(record, dict): + raise ValueError(f"Permutation metadata for {matrix_name} must be a dict") + if record.get("axis") != "rows": + raise ValueError(f"Unsupported permutation axis for {matrix_name}") + shape = record.get("shape") + permutation = record.get("permutation") + if not isinstance(shape, list) or not isinstance(permutation, list): + raise ValueError( + f"Permutation metadata for {matrix_name} requires shape and permutation lists" + ) + validate_permutation(shape, permutation) + + +def build_matrix_permutation_metadata( + gnn_spec: Dict[str, Any], + matrix_permutations: Dict[str, Sequence[int]] | None = None, +) -> Dict[str, Dict[str, Any]]: + """Build permutation metadata from canonical parsed GNN parameter matrices.""" + params = ( + gnn_spec.get("initial_parameterization") + or gnn_spec.get("initialparameterization") + or {} + ) + permutations = matrix_permutations or params.get("matrix_permutations") or {} + metadata: Dict[str, Dict[str, Any]] = {} + for matrix_name, permutation in permutations.items(): + if matrix_name not in params: + raise ValueError(f"Permutation references missing matrix '{matrix_name}'") + shape = nested_shape(params[matrix_name]) + validate_permutation(shape, permutation) + metadata[matrix_name] = { + "axis": "rows", + "shape": shape, + "permutation": [int(item) for item in permutation], + } + return metadata diff --git a/src/render/pomdp_contract.py b/src/render/pomdp_contract.py index 78ffd33ba..76afdb1cf 100644 --- a/src/render/pomdp_contract.py +++ b/src/render/pomdp_contract.py @@ -188,6 +188,13 @@ def _require_keys(mapping: Dict[str, Any], keys: Iterable[str]) -> None: raise ValueError(f"Missing required initialparameterization keys: {missing}") +def _is_active_inference_matrix_key(key: str) -> bool: + """Return whether an InitialParameterization key names a matrix/vector contract.""" + return key in {"A", "B", "C", "D", "E"} or any( + key.startswith(f"{prefix}_") for prefix in ("A", "B", "C", "D", "E") + ) + + def build_canonical_pomdp_spec(gnn_spec: Dict[str, Any]) -> Dict[str, Any]: """Return a copied GNN spec with canonical POMDP matrices and provenance.""" spec = deepcopy(gnn_spec) @@ -238,6 +245,9 @@ def build_canonical_pomdp_spec(gnn_spec: Dict[str, Any]) -> Dict[str, Any]: } if "E" in initial: canonical_initial["E"] = normalise_vector(initial["E"], name="E") + for key, value in initial.items(): + if key not in canonical_initial and not _is_active_inference_matrix_key(key): + canonical_initial[key] = deepcopy(value) model_parameters.update( { diff --git a/src/render/pomdp_processor.py b/src/render/pomdp_processor.py index c5b5ba754..191899dee 100644 --- a/src/render/pomdp_processor.py +++ b/src/render/pomdp_processor.py @@ -767,6 +767,19 @@ def _pomdp_to_gnn_spec( initial_parameterization, matrix_provenance, canonical_model_parameters = ( self._build_canonical_initialparameterization(pomdp_space) ) + raw_initial_parameterization = ( + getattr(pomdp_space, "initial_parameterization", None) or {} + ) + matrix_keys = set(getattr(pomdp_space, "matrices", None) or {}) + preserved_initial_metadata = { + key: value + for key, value in raw_initial_parameterization.items() + if key not in matrix_keys + } + initial_parameterization = { + **initial_parameterization, + **preserved_initial_metadata, + } raw_model_parameters = getattr(pomdp_space, "model_parameters", None) or {} state_factors = getattr(pomdp_space, "state_factors", None) or [] observation_modalities = ( @@ -906,10 +919,15 @@ def _call_pymdp_renderer( if not post_validation["valid"]: warnings.extend(post_validation.get("warnings", [])) + artifacts = [str(output_file)] if success else [] + metadata_file = output_file.with_suffix(".metadata.json") + if success and metadata_file.exists(): + artifacts.append(str(metadata_file)) + return { "success": success, "message": message, - "artifacts": [str(output_file)] if success else [], + "artifacts": artifacts, "warnings": warnings, } diff --git a/src/render/processor.py b/src/render/processor.py index 5e11fb79c..87fbc895d 100644 --- a/src/render/processor.py +++ b/src/render/processor.py @@ -733,7 +733,11 @@ def render_gnn_spec( success, msg, _warnings = render_gnn_to_rxinfer( canonical_spec, output_file, options ) - return (True, msg, [str(output_file)]) if success else (False, msg, []) + artifacts = [str(output_file)] + metadata_file = output_file.with_suffix(".metadata.json") + if metadata_file.exists(): + artifacts.append(str(metadata_file)) + return (True, msg, artifacts) if success else (False, msg, []) if target_lower == "activeinference_jl": from .activeinference_jl.activeinference_renderer import ( diff --git a/src/render/rxinfer/rxinfer_renderer.py b/src/render/rxinfer/rxinfer_renderer.py index 398026347..d581cde96 100644 --- a/src/render/rxinfer/rxinfer_renderer.py +++ b/src/render/rxinfer/rxinfer_renderer.py @@ -16,8 +16,10 @@ """ import base64 +import hashlib import json import logging +import re from datetime import datetime from pathlib import Path from typing import Any, Dict, List, Optional, Tuple @@ -69,6 +71,7 @@ def render_file(self, gnn_file_path: Path, output_path: Path) -> Tuple[bool, str output_path.parent.mkdir(parents=True, exist_ok=True) with open(output_path, "w", encoding="utf-8") as f: f.write(rxinfer_code) + _write_rxinfer_execution_metadata(output_path, gnn_spec) self.logger.info(f"Generated RxInfer.jl simulation: {output_path}") return True, "Successfully generated RxInfer.jl simulation code" @@ -858,9 +861,12 @@ def render_gnn_to_rxinfer( # Write output file try: + metadata = build_rxinfer_execution_metadata(gnn_spec) output_path.parent.mkdir(parents=True, exist_ok=True) with open(output_path, "w", encoding="utf-8") as f: f.write(rxinfer_code) + if metadata: + _write_rxinfer_execution_metadata(output_path, gnn_spec, metadata) except Exception as write_error: logger.error(f"Failed to write output file: {write_error}") return False, f"Error writing RxInfer.jl script: {write_error}", [] @@ -869,7 +875,10 @@ def render_gnn_to_rxinfer( warnings: list[Any] = [] # Check for potential issues - if not gnn_spec.get("initial_parameterization"): + if not ( + gnn_spec.get("initial_parameterization") + or gnn_spec.get("initialparameterization") + ): warnings.append("No initial parameterization found - using defaults") if not gnn_spec.get("model_parameters"): @@ -881,3 +890,107 @@ def render_gnn_to_rxinfer( except Exception as e: logger.error(f"Unexpected error in render_gnn_to_rxinfer: {e}", exc_info=True) return False, f"Error generating RxInfer.jl script: {e}", [] + + +def build_rxinfer_execution_metadata(gnn_spec: Dict[str, Any]) -> Dict[str, Any]: + """Build Step 12 execution metadata for declared RxInfer agent populations.""" + initial = gnn_spec.get("initialparameterization") or gnn_spec.get( + "initial_parameterization" + ) + if not isinstance(initial, dict): + return {} + + agents = _extract_declared_rxinfer_agents(initial) + if not agents: + return {} + + from .toml_generator import _extract_agent_topology + + topology = _extract_agent_topology(initial, agents) + return { + "schema": "gnn_rxinfer_execution_metadata_v1", + "agent_count": len(agents), + "agents": agents, + "topology": topology, + } + + +def _extract_declared_rxinfer_agents(params: Dict[str, Any]) -> List[Dict[str, Any]]: + """Extract explicitly declared agents without inventing default agents.""" + from .toml_generator import ( + _coerce_positive_int, + _extract_compact_agents, + _extract_indexed_agents, + ) + + nr_agents = _coerce_positive_int(params.get("nr_agents")) + if nr_agents > 0: + compact_agents = _extract_compact_agents(params, nr_agents) + if compact_agents is not None: + return compact_agents + + indexed_agents = _extract_indexed_agents(params, nr_agents) + if len(indexed_agents) == nr_agents: + return indexed_agents + + raise ValueError( + "nr_agents was provided but agent configuration is incomplete. " + "Provide compact agent_ids/agent_initial_positions/agent_target_positions " + "or complete agent{i}_id/agent{i}_initial_position/agent{i}_target_position keys." + ) + + indexed_count = _infer_indexed_agent_count(params) + if indexed_count <= 0: + return [] + indexed_agents = _extract_indexed_agents(params, indexed_count) + if len(indexed_agents) != indexed_count: + raise ValueError( + "Indexed agent configuration is incomplete. Provide complete " + "agent{i}_id/agent{i}_initial_position/agent{i}_target_position keys." + ) + return indexed_agents + + +def _infer_indexed_agent_count(params: Dict[str, Any]) -> int: + """Infer agent count from agent{i}_... keys when nr_agents is omitted.""" + agent_indices: set[int] = set() + for key in params: + match = re.match(r"agent(\d+)_", str(key)) + if match: + agent_indices.add(int(match.group(1))) + return max(agent_indices) if agent_indices else 0 + + +def _write_rxinfer_execution_metadata( + output_path: Path, + gnn_spec: Dict[str, Any], + metadata: Optional[Dict[str, Any]] = None, +) -> Optional[Path]: + """Write a sibling execution metadata JSON artifact when metadata exists.""" + metadata = ( + metadata if metadata is not None else build_rxinfer_execution_metadata(gnn_spec) + ) + if not metadata: + return None + metadata_path = output_path.with_suffix(".metadata.json") + metadata = dict(metadata) + metadata["script_path"] = str(output_path) + metadata["script_sha256"] = _sha256_file(output_path) + metadata["metadata_provenance"] = "rendered_rxinfer_sidecar" + topology = dict(metadata.get("topology") or {}) + topology.setdefault("source", str(metadata_path)) + metadata["topology"] = topology + metadata_path.write_text( + json.dumps(metadata, indent=2, sort_keys=True), + encoding="utf-8", + ) + return metadata_path + + +def _sha256_file(path: Path) -> str: + """Return the SHA256 digest for a rendered script.""" + digest = hashlib.sha256() + with path.open("rb") as handle: + for chunk in iter(lambda: handle.read(1024 * 1024), b""): + digest.update(chunk) + return digest.hexdigest() diff --git a/src/render/rxinfer/toml_generator.py b/src/render/rxinfer/toml_generator.py index bf53c8a8f..49706a819 100644 --- a/src/render/rxinfer/toml_generator.py +++ b/src/render/rxinfer/toml_generator.py @@ -653,7 +653,7 @@ def _write_toml_with_exact_formatting(f: Any, config: Any) -> Any: f.write("# Number of inference iterations\n") f.write(f"nr_iterations = {config['model']['nr_iterations']}\n\n") - f.write("# Number of agents in the simulation (currently fixed at 4)\n") + f.write("# Number of agents in the simulation\n") f.write(f"nr_agents = {config['model']['nr_agents']}\n\n") f.write("# Temperature parameter for the softmin function\n") @@ -797,11 +797,32 @@ def _write_toml_with_exact_formatting(f: Any, config: Any) -> Any: for agent in config["agents"]: f.write("[[agents]]\n") - f.write(f"id = {agent['id']}\n") + f.write(f"id = {json_dumps_scalar(agent['id'])}\n") f.write(f"radius = {agent['radius']}\n") f.write(f"initial_position = {agent['initial_position']}\n") f.write(f"target_position = {agent['target_position']}\n\n") + # Agent topology section + topology = config.get("topology") + if isinstance(topology, dict): + f.write("#\n# Agent topology\n#\n") + f.write("[topology]\n") + f.write(f'type = "{topology.get("type", "agent_population")}"\n') + f.write(f"agent_ids = {_toml_array(topology.get('agent_ids', []))}\n") + if topology.get("message_passing"): + f.write(f'message_passing = "{topology["message_passing"]}"\n') + f.write("\n") + for edge in topology.get("edges", []): + if isinstance(edge, dict): + f.write("[[topology.edges]]\n") + f.write(f"source = {json_dumps_scalar(edge['source'])}\n") + f.write(f"target = {json_dumps_scalar(edge['target'])}\n\n") + for cluster in topology.get("clusters", []): + if isinstance(cluster, dict): + f.write("[[topology.clusters]]\n") + f.write(f'name = "{cluster["name"]}"\n') + f.write(f"agent_ids = {_toml_array(cluster.get('agent_ids', []))}\n\n") + # Experiments section f.write("#\n# Experiment configurations\n#\n") f.write("[experiments]\n") @@ -842,7 +863,8 @@ def _create_toml_config_structure( Returns: Dictionary representing the TOML configuration """ - params = gnn_spec.get("initialparameterization", {}) + params = _initial_parameterization(gnn_spec) + agents = _extract_agents(gnn_spec) # Start with a standard structure based on the config.toml example toml_config: dict[str, Any] = { @@ -851,7 +873,7 @@ def _create_toml_config_structure( "gamma": params.get("gamma", 1.0), "nr_steps": params.get("nr_steps", 40), "nr_iterations": params.get("nr_iterations", 350), - "nr_agents": params.get("nr_agents", 4), + "nr_agents": len(agents), "softmin_temperature": params.get("softmin_temperature", 10.0), "intermediate_steps": params.get("intermediate_steps", 10), "save_intermediates": str(params.get("save_intermediates", False)) @@ -879,17 +901,49 @@ def _create_toml_config_structure( "color_palette": params.get("color_palette", "tab10"), }, "environments": _extract_environments(gnn_spec), - "agents": _extract_agents(gnn_spec), + "agents": agents, + "topology": _extract_agent_topology(params, agents), "experiments": _extract_experiments(gnn_spec), } return toml_config +def _toml_array(value: Any) -> str: + """Return a TOML-compatible array literal for simple scalar lists.""" + if not isinstance(value, list): + return "[]" + return "[" + ", ".join(json_dumps_scalar(item) for item in value) + "]" + + +def json_dumps_scalar(value: Any) -> str: + """Serialize scalar values using TOML-compatible JSON scalar syntax.""" + import json + + if isinstance(value, (str, int, float, bool)) or value is None: + return json.dumps(value) + if isinstance(value, list): + return _toml_array(value) + return json.dumps(str(value)) + + +def _initial_parameterization(gnn_spec: Dict[str, Any]) -> Dict[str, Any]: + """Return normalized initial-parameterization keys from public GNN specs.""" + for key in ( + "initialparameterization", + "initial_parameterization", + "InitialParameterization", + ): + value = gnn_spec.get(key) + if isinstance(value, dict): + return value + return {} + + def _extract_matrices(gnn_spec: Dict[str, Any]) -> Dict[str, Any]: """Extract state space matrices from the GNN specification.""" matrices: dict[Any, Any] = {} - params = gnn_spec.get("initialparameterization", {}) + params = _initial_parameterization(gnn_spec) # Use provided matrices if available, otherwise use defaults if "A" in params: @@ -920,7 +974,7 @@ def _extract_matrices(gnn_spec: Dict[str, Any]) -> Dict[str, Any]: def _extract_environments(gnn_spec: Dict[str, Any]) -> Dict[str, Any]: """Extract environment definitions from the GNN specification.""" - params = gnn_spec.get("initialparameterization", {}) + params = _initial_parameterization(gnn_spec) environments: dict[str, Any] = { "door": { @@ -987,28 +1041,23 @@ def _extract_environments(gnn_spec: Dict[str, Any]) -> Dict[str, Any]: def _extract_agents(gnn_spec: Dict[str, Any]) -> List[Dict[str, Any]]: """Extract agent configurations from the GNN specification.""" - params = gnn_spec.get("initialparameterization", {}) - nr_agents = params.get("nr_agents", 0) - agents: list[Any] = [] + params = _initial_parameterization(gnn_spec) + nr_agents = _coerce_positive_int(params.get("nr_agents")) if nr_agents > 0: - for i in range(1, nr_agents + 1): - agent_id = params.get(f"agent{i}_id") - radius = params.get(f"agent{i}_radius") - initial_pos = params.get(f"agent{i}_initial_position") - target_pos = params.get(f"agent{i}_target_position") - - if all(v is not None for v in [agent_id, radius, initial_pos, target_pos]): - agents.append( - { - "id": agent_id, - "radius": radius, - "initial_position": initial_pos, - "target_position": target_pos, - } - ) - if len(agents) == nr_agents: - return agents + compact_agents = _extract_compact_agents(params, nr_agents) + if compact_agents is not None: + return compact_agents + + indexed_agents = _extract_indexed_agents(params, nr_agents) + if len(indexed_agents) == nr_agents: + return indexed_agents + + raise ValueError( + "nr_agents was provided but agent configuration is incomplete. " + "Provide compact agent_ids/agent_initial_positions/agent_target_positions " + "or complete agent{i}_id/agent{i}_initial_position/agent{i}_target_position keys." + ) # Recovery to default agents if extraction fails return [ @@ -1039,9 +1088,228 @@ def _extract_agents(gnn_spec: Dict[str, Any]) -> List[Dict[str, Any]]: ] +def _coerce_positive_int(value: Any) -> int: + """Coerce a value to a positive int, returning 0 for missing/invalid values.""" + try: + coerced = int(value) + except (TypeError, ValueError): + return 0 + return max(0, coerced) + + +def _as_list(value: Any) -> list[Any] | None: + """Return value as a list when it is list-like enough for TOML config.""" + return value if isinstance(value, list) else None + + +def _extract_compact_agents( + params: Dict[str, Any], nr_agents: int +) -> List[Dict[str, Any]] | None: + """Extract agents from compact vectorized InitialParameterization keys.""" + agent_ids = _as_list(params.get("agent_ids")) + initial_positions = _as_list(params.get("agent_initial_positions")) + target_positions = _as_list(params.get("agent_target_positions")) + if agent_ids is None and initial_positions is None and target_positions is None: + return None + radii = _as_list(params.get("agent_radii")) or _as_list(params.get("agent_radius")) + default_radius = params.get("agent_default_radius", 1.0) + required = { + "agent_ids": agent_ids, + "agent_initial_positions": initial_positions, + "agent_target_positions": target_positions, + } + missing = [name for name, value in required.items() if value is None] + if missing: + raise ValueError(f"Missing compact multi-agent keys: {', '.join(missing)}") + assert agent_ids is not None + assert initial_positions is not None + assert target_positions is not None + lengths = { + "agent_ids": len(agent_ids), + "agent_initial_positions": len(initial_positions), + "agent_target_positions": len(target_positions), + } + if any(length != nr_agents for length in lengths.values()): + raise ValueError( + f"Compact multi-agent lengths must match nr_agents={nr_agents}: {lengths}" + ) + if radii is not None and len(radii) != nr_agents: + raise ValueError( + f"agent_radii length {len(radii)} must match nr_agents={nr_agents}" + ) + return [ + { + "id": agent_ids[index], + "radius": radii[index] if radii is not None else default_radius, + "initial_position": initial_positions[index], + "target_position": target_positions[index], + } + for index in range(nr_agents) + ] + + +def _extract_indexed_agents( + params: Dict[str, Any], nr_agents: int +) -> List[Dict[str, Any]]: + """Extract indexed agent{i}_... agent definitions.""" + agents: List[Dict[str, Any]] = [] + for i in range(1, nr_agents + 1): + agent_id = params.get(f"agent{i}_id") + radius = params.get(f"agent{i}_radius", params.get("agent_default_radius", 1.0)) + initial_pos = params.get(f"agent{i}_initial_position") + target_pos = params.get(f"agent{i}_target_position") + + if all(v is not None for v in [agent_id, radius, initial_pos, target_pos]): + agents.append( + { + "id": agent_id, + "radius": radius, + "initial_position": initial_pos, + "target_position": target_pos, + } + ) + return agents + + +def _extract_agent_topology( + params: Dict[str, Any], agents: List[Dict[str, Any]] +) -> Dict[str, Any]: + """Extract explicit multi-agent topology metadata for TOML and execution.""" + agent_ids = [agent["id"] for agent in agents if "id" in agent] + raw_edges, edges_present = _first_present( + params, ("agent_edges", "topology_edges", "edges"), [] + ) + raw_clusters, clusters_present = _first_present( + params, ("agent_clusters", "topology_clusters", "clusters"), {} + ) + edges = _normalize_topology_edges(raw_edges, strict=edges_present) + clusters = _normalize_topology_clusters(raw_clusters, strict=clusters_present) + topology_type = params.get("agent_topology_type") or params.get("topology_type") + if topology_type is None: + if clusters: + topology_type = "clustered" + elif edges: + topology_type = "network" + else: + topology_type = "agent_population" + topology: Dict[str, Any] = { + "type": str(topology_type), + "agent_ids": agent_ids, + "edges": edges, + "clusters": clusters, + } + message_passing = params.get("message_passing") or params.get( + "agent_message_passing" + ) + if message_passing: + topology["message_passing"] = str(message_passing) + _validate_topology_references(topology, agent_ids) + return topology + + +def _validate_topology_references( + topology: Dict[str, Any], agent_ids: List[Any] +) -> None: + """Reject topology records that reference undeclared agents.""" + declared = set(agent_ids) + for edge in topology.get("edges", []): + if not isinstance(edge, dict): + continue + for endpoint_name in ("source", "target"): + endpoint = edge.get(endpoint_name) + if endpoint not in declared: + raise ValueError( + f"Topology edge {endpoint_name} references undeclared agent {endpoint!r}" + ) + for cluster in topology.get("clusters", []): + if not isinstance(cluster, dict): + continue + for agent_id in cluster.get("agent_ids", []): + if agent_id not in declared: + raise ValueError( + f"Topology cluster {cluster.get('name', '')} " + f"references undeclared agent {agent_id!r}" + ) + + +def _first_present( + params: Dict[str, Any], keys: tuple[str, ...], default: Any +) -> tuple[Any, bool]: + """Return the first present parameter value and whether any key was present.""" + for key in keys: + if key in params: + return params[key], True + return default, False + + +def _normalize_topology_edges( + raw_edges: Any, *, strict: bool = False +) -> List[Dict[str, Any]]: + """Normalize compact edge lists into explicit source/target records.""" + if not isinstance(raw_edges, list): + if strict: + raise ValueError("Topology edges must be a list") + return [] + edges: List[Dict[str, Any]] = [] + for raw_edge in raw_edges: + if isinstance(raw_edge, dict): + source = raw_edge.get("source", raw_edge.get("from")) + target = raw_edge.get("target", raw_edge.get("to")) + elif isinstance(raw_edge, (list, tuple)) and len(raw_edge) >= 2: + source, target = raw_edge[0], raw_edge[1] + else: + if strict: + raise ValueError(f"Malformed topology edge: {raw_edge!r}") + continue + if source is None or target is None: + if strict: + raise ValueError( + f"Topology edge requires source and target: {raw_edge!r}" + ) + continue + edges.append({"source": source, "target": target}) + return edges + + +def _normalize_topology_clusters( + raw_clusters: Any, *, strict: bool = False +) -> List[Dict[str, Any]]: + """Normalize dict/list cluster definitions into TOML table records.""" + clusters: List[Dict[str, Any]] = [] + if isinstance(raw_clusters, dict): + for name, agent_ids in raw_clusters.items(): + if isinstance(agent_ids, list): + clusters.append({"name": str(name), "agent_ids": agent_ids}) + elif strict: + raise ValueError(f"Topology cluster {name!r} members must be a list") + return clusters + if isinstance(raw_clusters, list): + for index, raw_cluster in enumerate(raw_clusters, start=1): + if isinstance(raw_cluster, dict): + agent_ids = raw_cluster.get("agent_ids") or raw_cluster.get("agents") + if isinstance(agent_ids, list): + clusters.append( + { + "name": str(raw_cluster.get("name", f"cluster_{index}")), + "agent_ids": agent_ids, + } + ) + elif strict: + raise ValueError( + f"Topology cluster {raw_cluster.get('name', index)!r} " + "members must be a list" + ) + elif strict: + raise ValueError(f"Malformed topology cluster: {raw_cluster!r}") + return clusters + if strict: + raise ValueError("Topology clusters must be a dict or list") + return clusters + + def _extract_experiments(gnn_spec: Dict[str, Any]) -> Dict[str, Any]: """Extract experiment configurations from the GNN specification.""" - params = gnn_spec.get("initialparameterization", {}) + params = _initial_parameterization(gnn_spec) # Use experiment settings from GNN spec if available, otherwise use defaults experiments: dict[str, Any] = { diff --git a/src/report/model_family.py b/src/report/model_family.py new file mode 100644 index 000000000..d4879c47f --- /dev/null +++ b/src/report/model_family.py @@ -0,0 +1,45 @@ +"""Report helpers for model-family acceptance ledgers.""" + +from __future__ import annotations + +from typing import Any, Dict + + +def render_model_family_acceptance_markdown(ledger: Dict[str, Any]) -> str: + """Render a compact Markdown report from a model-family acceptance ledger.""" + lines = [ + "# GNN Model Family Acceptance Ledger", + "", + f"- Schema: {ledger['schema']}", + f"- Families: {ledger['family_count']}", + f"- Strict: {str(ledger['strict']).lower()}", + f"- Only steps: {ledger['only_steps'] or 'full pipeline'}", + f"- Frameworks: {ledger.get('frameworks') or 'pipeline default'}", + "", + "| Family | Status | Models | Passed Steps | Failed Steps | Skipped Steps | Raw Failed Steps | Allowed Unsupported |", + "| --- | --- | ---: | ---: | ---: | ---: | ---: | ---: |", + ] + for family in ledger["families"]: + counts = family["step_status_counts"] + raw_failed = sum( + 1 for status in family.get("raw_steps", {}).values() if status == "failed" + ) + allowed_unsupported = sum( + 1 + for evidence in family.get("step_evidence", {}).values() + if isinstance(evidence, dict) + and evidence.get("acceptance") == "allowed_unsupported" + ) + lines.append( + "| {name} | {status} | {models} | {passed} | {failed} | {skipped} | {raw_failed} | {allowed} |".format( + name=family["name"], + status=family["status"], + models=family["interpretability_summary"]["model_count"], + passed=counts.get("passed", 0), + failed=counts.get("failed", 0), + skipped=counts.get("skipped", 0), + raw_failed=raw_failed, + allowed=allowed_unsupported, + ) + ) + return "\n".join(lines) + "\n" diff --git a/src/setup/package_names.py b/src/setup/package_names.py new file mode 100644 index 000000000..a73fc42f3 --- /dev/null +++ b/src/setup/package_names.py @@ -0,0 +1,15 @@ +"""Package/import-name contracts used by setup diagnostics.""" + +from __future__ import annotations + +from typing import Dict + +IMPORT_TO_PACKAGE: Dict[str, str] = { + "yaml": "PyYAML", + "sklearn": "scikit-learn", +} + + +def package_name_for_import(import_name: str) -> str: + """Return the installable package name for a Python import name.""" + return IMPORT_TO_PACKAGE.get(import_name, import_name) diff --git a/src/tests/README.md b/src/tests/README.md index 7010043e9..0802d05e4 100644 --- a/src/tests/README.md +++ b/src/tests/README.md @@ -33,15 +33,14 @@ pytest -m slow # slow/performance tests only ## Test Statistics -- **Total test files**: 171 (142 in subdirectories + 29 at root) -- **Test collection baseline**: 2,296 collected with the command-of-record +- **Total test files**: 184 (155 in subdirectories + 29 at root) +- **Test collection baseline**: 2,397 collected with the command-of-record collect pass and the two local Ollama integration files ignored - **Pass/skip baseline**: the latest recorded command-of-record full run with the - two local Ollama integration files ignored passed on 2026-06-09: - 2,281 passed, 14 skipped, 1 xfailed in 744.50s + two local Ollama integration files ignored is 2,379 passed, 17 skipped, 1 xfailed - **Fast-test duration**: 1-3 minutes - **Full-suite duration**: varies by optional backend availability; latest - command-of-record run completed in 12:24 + command-of-record run completed in 12:09 ## Directory Layout (Phase 7) @@ -57,7 +56,7 @@ src/tests/ ├── test_data/ # on-disk fixtures consumed by tests │ ├── /test_*.py # per-module tests mirroring src// -│ (34 first-level subdirectories; 31 contain direct test files) +│ (34 maintained first-level subdirectories; 32 contain direct test files) │ └── test_*.py # cross-cutting / meta-tests at root (coverage assessments, environment probes, runner self-tests, @@ -646,9 +645,9 @@ If issues persist: ### Test Coverage -- **171 test files** across root and module-specific directories -- **2,296 collected tests** in the current command-of-record collect pass with Ollama integration tests ignored -- **Latest recorded full suite** passed on 2026-06-09 with the same Ollama integration excludes: 2,281 passed, 14 skipped, 1 xfailed in 744.50s +- **184 test files** across root and module-specific directories +- **2,397 collected tests** in the current command-of-record collect pass with Ollama integration tests ignored +- **Latest recorded full suite evidence** with the same Ollama integration excludes: 2,379 passed, 17 skipped, 1 xfailed - **Comprehensive module coverage** for all major modules - **Specialized test areas** for specific functionality - **Integration tests** for cross-module functionality @@ -683,7 +682,7 @@ Module coverage mirrors the maintained source tree. Use `rg --files src/tests -g ## Test Execution Results -Latest measured collect-only inventory (2026-06-09): 171 `test_*.py` files and 2,296 collected tests with the Ollama integration files excluded. Latest recorded full command-of-record run with the same excludes passed on 2026-06-09: 2,281 passed, 14 skipped, 1 xfailed in 744.50s. +Latest measured collect-only inventory (2026-06-12): 184 `test_*.py` files and 2,397 collected tests with the Ollama integration files excluded. Latest recorded full command-of-record evidence with the same excludes: 2,379 passed, 17 skipped, 1 xfailed. ```bash uv run --extra dev python -m pytest src/tests/ -q --tb=no \ diff --git a/src/tests/TEST_SUITE_SUMMARY.md b/src/tests/TEST_SUITE_SUMMARY.md index 01002918e..6749fc2d6 100644 --- a/src/tests/TEST_SUITE_SUMMARY.md +++ b/src/tests/TEST_SUITE_SUMMARY.md @@ -1,6 +1,6 @@ # GNN Pipeline Test Suite - Comprehensive Summary -**Last Updated**: 2026-06-09 +**Last Updated**: 2026-06-11 **Status**: ✅ Production Ready **Test Infrastructure Version**: 2.0.1 @@ -12,12 +12,12 @@ The GNN Processing Pipeline test suite provides comprehensive coverage across al ### Key Metrics -- **Total Test Files**: 171 `test_*.py` files -- **Directory Layout**: 34 first-level directories under `src/tests/`; 31 contain direct test files +- **Total Test Files**: 184 `test_*.py` files +- **Directory Layout**: 34 maintained first-level directories under `src/tests/`; 32 contain direct test files - **Root-Level Tests**: 29 `test_*.py` files at `src/tests/` -- **Subdirectory Tests**: 142 `test_*.py` files under module/helper directories -- **Collected Tests**: 2,296 with `uv run --extra dev python -m pytest --collect-only src/tests/ -q --tb=no --ignore=src/tests/llm/test_llm_ollama.py --ignore=src/tests/llm/test_llm_ollama_integration.py` (2026-06-09) -- **Latest Full Run**: `uv run --extra dev python -m pytest src/tests/ -q --tb=no --ignore=src/tests/llm/test_llm_ollama.py --ignore=src/tests/llm/test_llm_ollama_integration.py` passed on 2026-06-09: 2,281 passed, 14 skipped, 1 xfailed in 744.50s. +- **Subdirectory Tests**: 155 `test_*.py` files under module/helper directories +- **Collected Tests**: 2,397 with `uv run --extra dev python -m pytest --collect-only src/tests/ -q --tb=no --ignore=src/tests/llm/test_llm_ollama.py --ignore=src/tests/llm/test_llm_ollama_integration.py` (2026-06-12) +- **Latest Full Run Evidence**: `uv run --extra dev python -m pytest src/tests/ -q --tb=no --ignore=src/tests/llm/test_llm_ollama.py --ignore=src/tests/llm/test_llm_ollama_integration.py`: 2,379 passed, 17 skipped, 1 xfailed. --- @@ -33,8 +33,8 @@ src/tests/ ├── __init__.py # Module exports and utilities ├── README.md # Comprehensive documentation ├── AGENTS.md # Technical API documentation -├── /test_*.py # 138 module/helper test files -└── test_*.py # 28 cross-cutting root test files +├── /test_*.py # 155 module/helper test files +└── test_*.py # 29 cross-cutting root test files ``` ### Execution Modes @@ -283,9 +283,9 @@ output/2_tests_output/ The GNN Processing Pipeline test suite provides comprehensive, production-ready testing infrastructure with: -- 171 test files across root and module-specific directories -- 2,296 collected tests in the current command-of-record collect pass with Ollama integration tests ignored -- Latest recorded full command-of-record run passed on 2026-06-09: 2,281 passed, 14 skipped, 1 xfailed in 744.50s +- 184 test files across root and module-specific directories +- 2,397 collected tests in the current command-of-record collect pass with Ollama integration tests ignored +- Latest recorded full command-of-record evidence: 2,379 passed, 17 skipped, 1 xfailed - Real data and real implementations throughout core paths - Comprehensive error handling and recovery testing - Module coverage for all 25 pipeline steps diff --git a/src/tests/analysis/test_interpretability_summary.py b/src/tests/analysis/test_interpretability_summary.py new file mode 100644 index 000000000..7d30b44da --- /dev/null +++ b/src/tests/analysis/test_interpretability_summary.py @@ -0,0 +1,85 @@ +from __future__ import annotations + +import json +from pathlib import Path + +from analysis.interpretability import ( + build_family_interpretability_summary, + build_model_interpretability_summary, + render_family_interpretability_markdown, +) + + +def test_model_interpretability_summary_extracts_variables_edges_and_traces( + tmp_path: Path, +) -> None: + model = tmp_path / "demo_model.md" + model.write_text( + "\n".join( + [ + "## ModelName", + "Demo Model", + "", + "## StateSpaceBlock", + "s[2,type=float]", + "o[2,type=float]", + "", + "## Connections", + "s>o", + "", + "## InitialParameterization", + "A={(0.9,0.1),(0.1,0.9)}", + "D={(0.5,0.5)}", + ] + ), + encoding="utf-8", + ) + execution_dir = tmp_path / "12_execute_output" / "demo_model" + execution_dir.mkdir(parents=True) + summary_dir = tmp_path / "00_pipeline_summary" + summary_dir.mkdir() + (summary_dir / "pipeline_execution_summary.json").write_text( + json.dumps( + { + "steps": [ + {"script_name": "11_render.py", "status": "SUCCESS"}, + {"script_name": "12_execute.py", "status": "SUCCESS"}, + ] + } + ), + encoding="utf-8", + ) + (execution_dir / "simulation_results.json").write_text( + json.dumps({"free_energy_trace": [3.0, 2.0, 1.0], "actions": [0, 1]}), + encoding="utf-8", + ) + + summary = build_model_interpretability_summary(model, tmp_path) + + assert summary["model_name"] == "Demo Model" + assert summary["variable_count"] == 2 + assert summary["connection_count"] == 1 + assert summary["matrix_shapes"]["A"] == [2, 2] + assert summary["pipeline_evidence"]["render_status"] == "passed" + assert summary["pipeline_evidence"]["execution_status"] == "passed" + assert summary["telemetry_present"] is True + assert summary["telemetry_preview"]["free_energy_trace"] == [3.0, 2.0, 1.0] + assert summary["artifact_links"] + + +def test_family_interpretability_markdown_is_compact(tmp_path: Path) -> None: + model = tmp_path / "demo.md" + model.write_text( + "## ModelName\nDemo\n\n## StateSpaceBlock\ns[2]\n\n## Connections\n", + encoding="utf-8", + ) + + summary = build_family_interpretability_summary("demo-family", tmp_path, tmp_path) + markdown = render_family_interpretability_markdown(summary) + + assert summary["family"] == "demo-family" + assert summary["model_count"] == 1 + assert summary["totals"]["models_with_telemetry"] == 0 + assert "# Model Family Interpretability: demo-family" in markdown + assert "Models with telemetry" in markdown + assert "| Demo |" in markdown diff --git a/src/tests/audio/test_audio_streaming.py b/src/tests/audio/test_audio_streaming.py new file mode 100644 index 000000000..914fcd7f4 --- /dev/null +++ b/src/tests/audio/test_audio_streaming.py @@ -0,0 +1,237 @@ +from __future__ import annotations + +import json +from pathlib import Path + +from audio.processor import _process_audio_streaming +from audio.streaming import ( + chunks_from_frames, + frames_from_execution_trace, + write_stream_summary, +) + + +def test_audio_telemetry_frames_from_execution_trace() -> None: + frames = frames_from_execution_trace( + { + "free_energy": [1.0, 0.5], + "beliefs": [[0.2, 0.8], [0.7, 0.3]], + "actions": [1, 0], + } + ) + assert len(frames) == 2 + assert frames[0].belief == [0.2, 0.8] + assert frames[1].action == 0 + + +def test_audio_stream_chunks_are_device_free(tmp_path: Path) -> None: + frames = frames_from_execution_trace( + { + "expected_free_energy": [1.0, 2.0, 3.0], + "beliefs": [[0.6, 0.4], [0.1, 0.9], [0.5, 0.5]], + "actions": [0, 1, 1], + } + ) + chunks = chunks_from_frames(frames, chunk_size=2) + assert [chunk.frame_count for chunk in chunks] == [2, 1] + payload = write_stream_summary(chunks, tmp_path / "chunks.json") + assert payload["schema"] == "gnn_audio_stream_chunks_v1" + assert payload["streaming_safe"] is True + assert payload["frame_count"] == 3 + assert (tmp_path / "chunks.json").exists() + + +def test_audio_stream_summary_empty_chunks_is_not_success_like(tmp_path: Path) -> None: + payload = write_stream_summary([], tmp_path / "empty_chunks.json") + + assert payload["status"] == "no_frames" + assert payload["streaming_safe"] is False + assert payload["chunk_count"] == 0 + assert (tmp_path / "empty_chunks.json").exists() + + +def test_audio_frames_accept_step12_simulation_data_aliases() -> None: + frames = frames_from_execution_trace( + { + "simulation_data": { + "free_energy_trace": [2.0, 1.0], + "belief_history": [ + {"state_beliefs": [0.4, 0.6]}, + {"state_beliefs": [0.8, 0.2]}, + ], + "action_history": [{"selected_action": 1}, {"selected_action": 0}], + } + } + ) + + assert [frame.free_energy for frame in frames] == [2.0, 1.0] + assert frames[0].belief == [0.4, 0.6] + assert frames[1].action == 0 + + +def test_audio_frames_accept_vector_free_energy_and_action_samples() -> None: + frames = frames_from_execution_trace( + { + "expected_free_energy": [[1.0, 3.0], [2.0, 4.0]], + "beliefs": [[0.2, 0.8], [0.6, 0.4]], + "actions": [[1, 0], [0, 1]], + } + ) + + assert [frame.free_energy for frame in frames] == [2.0, 3.0] + assert [frame.action for frame in frames] == [1, 0] + + +def test_audio_streaming_loads_execution_output_dir(tmp_path: Path) -> None: + execution_dir = tmp_path / "12_execute_output" + execution_dir.mkdir() + (execution_dir / "demo_results.json").write_text( + """ + { + "simulation_data": { + "free_energy_trace": [1.0], + "belief_history": [[0.7, 0.3]], + "action_history": [1] + } + } + """, + encoding="utf-8", + ) + summary = _process_audio_streaming( + {"execution_output_dir": execution_dir, "audio_chunk_size": 1}, + tmp_path / "audio", + __import__("logging").getLogger("test"), + ) + + assert summary["telemetry_source_count"] == 1 + assert summary["chunk_count"] == 1 + assert (tmp_path / "audio" / "audio_stream_chunks.json").exists() + + +def test_audio_streaming_follows_step12_summary_structured_result( + tmp_path: Path, +) -> None: + execution_dir = tmp_path / "12_execute_output" + summaries_dir = execution_dir / "summaries" + structured_dir = execution_dir / "demo" / "pymdp" / "execution_logs" + structured_dir.mkdir(parents=True) + summaries_dir.mkdir(parents=True) + structured_result = structured_dir / "demo_results.json" + structured_result.write_text( + """ + { + "simulation_data": { + "free_energy_trace": [3.0, 1.0], + "belief_history": [[0.2, 0.8], [0.9, 0.1]], + "action_history": [1, 0] + } + } + """, + encoding="utf-8", + ) + (summaries_dir / "execution_summary.json").write_text( + f""" + {{ + "execution_details": [ + {{"structured_result_file": "{structured_result}"}} + ] + }} + """, + encoding="utf-8", + ) + + summary = _process_audio_streaming( + {"execution_output_dir": execution_dir, "audio_chunk_size": 2}, + tmp_path / "audio", + __import__("logging").getLogger("test"), + ) + + assert summary["telemetry_source_count"] == 1 + assert summary["frame_count"] == 2 + assert summary["telemetry_provenance"] == [str(structured_result)] + assert summary["chunks"][0]["metadata"]["last_action"] == 0 + + +def test_audio_streaming_rejects_summary_pointer_outside_execution_dir( + tmp_path: Path, +) -> None: + execution_dir = tmp_path / "12_execute_output" + summaries_dir = execution_dir / "summaries" + summaries_dir.mkdir(parents=True) + outside_result = tmp_path / "outside_results.json" + outside_result.write_text( + """ + { + "simulation_data": { + "free_energy_trace": [3.0], + "belief_history": [[0.2, 0.8]], + "action_history": [1] + } + } + """, + encoding="utf-8", + ) + (summaries_dir / "execution_summary.json").write_text( + f""" + {{ + "execution_details": [ + {{"structured_result_file": "{outside_result}"}} + ] + }} + """, + encoding="utf-8", + ) + + summary = _process_audio_streaming( + {"execution_output_dir": execution_dir, "audio_chunk_size": 1}, + tmp_path / "audio", + __import__("logging").getLogger("test"), + ) + + assert summary == {} + assert not (tmp_path / "audio" / "audio_stream_chunks.json").exists() + + +def test_audio_streaming_discovers_sibling_step12_output(tmp_path: Path) -> None: + output_root = tmp_path / "output" + audio_dir = output_root / "15_audio_output" + execution_dir = output_root / "12_execute_output" + execution_dir.mkdir(parents=True) + (execution_dir / "demo_results.json").write_text( + """ + { + "simulation_data": { + "free_energy_trace": [1.0], + "belief_history": [[0.6, 0.4]], + "action_history": [1] + } + } + """, + encoding="utf-8", + ) + + summary = _process_audio_streaming( + {"audio_chunk_size": 1}, + audio_dir, + __import__("logging").getLogger("test"), + ) + + assert summary["telemetry_source_count"] == 1 + assert (audio_dir / "audio_stream_chunks.json").exists() + + +def test_audio_streaming_empty_telemetry_reports_no_frames(tmp_path: Path) -> None: + summary = _process_audio_streaming( + {"telemetry": {"simulation_data": {}}}, + tmp_path / "audio", + __import__("logging").getLogger("test"), + ) + + assert summary["status"] == "no_frames" + assert summary["frame_count"] == 0 + assert summary["streaming_safe"] is False + artifact = tmp_path / "audio" / "audio_stream_chunks.json" + assert artifact.exists() + persisted = json.loads(artifact.read_text(encoding="utf-8")) + assert persisted["status"] == "no_frames" + assert persisted["streaming_safe"] is False diff --git a/src/tests/cli/test_templates_cli.py b/src/tests/cli/test_templates_cli.py new file mode 100644 index 000000000..8fa365a13 --- /dev/null +++ b/src/tests/cli/test_templates_cli.py @@ -0,0 +1,234 @@ +from __future__ import annotations + +import json +import shutil +import subprocess +import sys +from pathlib import Path + +import pytest + +from cli import main +from cli.templates import ( + _template_record_from_raw, + list_templates, + pull_template, + show_template, +) + + +def test_template_index_contains_documented_pull_target() -> None: + names = {template["name"] for template in list_templates()} + assert "actinf-pomdp-2state" in names + assert "pomdp-gridworld-3x3" in names + + +def test_template_index_is_externalized_and_has_three_entries() -> None: + templates = list_templates() + assert len(templates) >= 3 + for template in templates: + assert template["source"].startswith("package://") + assert Path(template["source"]).suffix == ".md" + assert len(template["sha256"]) == 64 + + +@pytest.mark.parametrize( + "record", + [ + { + "name": "bad", + "description": "bad", + "source": "template_assets/actinf_pomdp_2state.md", + "filename": "../escape.md", + }, + { + "name": "bad", + "description": "bad", + "source": "template_assets/actinf_pomdp_2state.md", + "filename": "/tmp/escape.md", + }, + { + "name": "bad", + "description": "bad", + "source": "../input/gnn_files/demo.md", + "filename": "demo.md", + }, + { + "name": "bad", + "description": "bad", + "source": "input/gnn_files/demo.md", + "filename": "demo.md", + }, + { + "name": "bad", + "description": "bad", + "source": "template_assets/demo.txt", + "filename": "demo.md", + }, + ], +) +def test_template_index_rejects_paths_outside_package_contract( + record: dict[str, str], +) -> None: + with pytest.raises(ValueError): + _template_record_from_raw(record) + + +def test_show_template_returns_gridworld_record() -> None: + template = show_template("pomdp-gridworld-3x3") + assert template["filename"] == "pomdp_gridworld_3x3.md" + assert template["source"].endswith("pomdp_gridworld_3x3.md") + + +def test_pull_template_dry_run_does_not_copy(tmp_path: Path) -> None: + result = pull_template("actinf-pomdp-2state", tmp_path, dry_run=True) + assert result["dry_run"] is True + assert result["copied"] is False + assert not Path(result["destination"]).exists() + + +def test_pull_template_copies_with_checksum(tmp_path: Path) -> None: + result = pull_template("actinf-pomdp-2state", tmp_path) + destination = Path(result["destination"]) + assert destination.exists() + assert result["copied"] is True + assert len(result["sha256"]) == 64 + + +def test_pull_template_rejects_checksum_collision_without_overwrite( + tmp_path: Path, +) -> None: + first = pull_template("actinf-pomdp-2state", tmp_path) + destination = Path(first["destination"]) + destination.write_text("different content\n", encoding="utf-8") + + with pytest.raises(FileExistsError, match="--overwrite"): + pull_template("actinf-pomdp-2state", tmp_path) + + +def test_pull_template_overwrite_replaces_checksum_collision(tmp_path: Path) -> None: + first = pull_template("actinf-pomdp-2state", tmp_path) + destination = Path(first["destination"]) + destination.write_text("different content\n", encoding="utf-8") + + result = pull_template("actinf-pomdp-2state", tmp_path, overwrite=True) + + assert result["overwritten"] is True + assert result["copied"] is True + assert len(result["sha256"]) == 64 + + +def test_pull_template_rejects_symlink_destination(tmp_path: Path) -> None: + outside = tmp_path / "outside.md" + outside.write_text("outside\n", encoding="utf-8") + destination = tmp_path / "actinf_pomdp_2state.md" + try: + destination.symlink_to(outside) + except OSError: + pytest.skip("symlink creation is not available on this platform") + + with pytest.raises(FileExistsError, match="symlink"): + pull_template("actinf-pomdp-2state", tmp_path, dry_run=True) + + assert outside.read_text(encoding="utf-8") == "outside\n" + + +def test_templates_list_cli_outputs_json(capsys: pytest.CaptureFixture[str]) -> None: + assert main(["templates", "list"]) == 0 + captured = capsys.readouterr() + payload = json.loads(captured.out) + assert payload["templates"] + + +def test_pull_cli_dry_run_outputs_json( + tmp_path: Path, capsys: pytest.CaptureFixture[str] +) -> None: + assert ( + main( + ["pull", "actinf-pomdp-2state", "--output-dir", str(tmp_path), "--dry-run"] + ) + == 0 + ) + captured = capsys.readouterr() + payload = json.loads(captured.out) + assert payload["template"] == "actinf-pomdp-2state" + assert payload["dry_run"] is True + + +def test_templates_show_cli_outputs_one_template( + capsys: pytest.CaptureFixture[str], +) -> None: + assert main(["templates", "show", "pomdp-gridworld-3x3"]) == 0 + captured = capsys.readouterr() + payload = json.loads(captured.out) + assert payload["template"]["name"] == "pomdp-gridworld-3x3" + + +def test_pull_cli_unknown_template_fails() -> None: + assert main(["pull", "missing-template", "--dry-run"]) == 1 + + +@pytest.mark.slow +def test_template_cli_works_from_installed_wheel_outside_repo(tmp_path: Path) -> None: + uv = shutil.which("uv") + if uv is None: + pytest.skip("uv executable is required for wheel smoke") + + repo_root = Path(__file__).resolve().parents[3] + dist_dir = tmp_path / "dist" + subprocess.run( + [uv, "build", "--wheel", "--out-dir", str(dist_dir)], + cwd=repo_root, + check=True, + text=True, + capture_output=True, + ) + wheel = next(dist_dir.glob("*.whl")) + + venv_dir = tmp_path / "venv" + subprocess.run([sys.executable, "-m", "venv", str(venv_dir)], check=True) + bin_dir = venv_dir / ("Scripts" if sys.platform == "win32" else "bin") + pip = bin_dir / ("pip.exe" if sys.platform == "win32" else "pip") + gnn = bin_dir / ("gnn.exe" if sys.platform == "win32" else "gnn") + subprocess.run( + [str(pip), "install", "--no-deps", str(wheel)], + check=True, + text=True, + capture_output=True, + ) + + outside_repo = tmp_path / "outside" + outside_repo.mkdir() + list_result = subprocess.run( + [str(gnn), "templates", "list"], + cwd=outside_repo, + check=True, + text=True, + capture_output=True, + ) + assert "pomdp-gridworld-3x3" in list_result.stdout + + show_result = subprocess.run( + [str(gnn), "templates", "show", "pomdp-gridworld-3x3"], + cwd=outside_repo, + check=True, + text=True, + capture_output=True, + ) + assert "pomdp_gridworld_3x3.md" in show_result.stdout + + pull_result = subprocess.run( + [ + str(gnn), + "pull", + "pomdp-gridworld-3x3", + "--output-dir", + str(tmp_path / "pulled"), + "--dry-run", + ], + cwd=outside_repo, + check=True, + text=True, + capture_output=True, + ) + assert '"copied": false' in pull_result.stdout diff --git a/src/tests/docs/AGENTS.md b/src/tests/docs/AGENTS.md new file mode 100644 index 000000000..40fff52d2 --- /dev/null +++ b/src/tests/docs/AGENTS.md @@ -0,0 +1,11 @@ +# Docs Tests - Agent Scaffolding + +## Purpose + +Focused tests for repository documentation and capability-contract checks. + +## Verification + +```bash +uv run --extra dev python -m pytest src/tests/docs -q +``` diff --git a/src/tests/docs/README.md b/src/tests/docs/README.md new file mode 100644 index 000000000..8c4a883fe --- /dev/null +++ b/src/tests/docs/README.md @@ -0,0 +1,5 @@ +# Docs Tests + +This directory contains tests that exercise documentation and roadmap contract +audits. These tests are intentionally small and should not write generated +documentation artifacts. diff --git a/src/tests/docs/test_capability_contracts.py b/src/tests/docs/test_capability_contracts.py new file mode 100644 index 000000000..36b1bc092 --- /dev/null +++ b/src/tests/docs/test_capability_contracts.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +import pytest + +from scripts import check_capability_contracts +from scripts.check_capability_contracts import run_audit + + +def test_capability_contracts_are_current() -> None: + assert run_audit() == [] + + +def test_capability_contracts_fail_strict_by_default( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr( + check_capability_contracts, "run_audit", lambda: ["synthetic failure"] + ) + + assert check_capability_contracts.main([]) == 1 + assert check_capability_contracts.main(["--warn-only"]) == 0 diff --git a/src/tests/execute/test_discrete_models_pymdp.py b/src/tests/execute/test_discrete_models_pymdp.py index eb2daea92..7f4b3b783 100644 --- a/src/tests/execute/test_discrete_models_pymdp.py +++ b/src/tests/execute/test_discrete_models_pymdp.py @@ -294,6 +294,7 @@ def test_pymdp_analysis_extractor(model_file: str, tmp_path: Path) -> None: @pytest.mark.integration @pytest.mark.slow +@pytest.mark.timeout(900) def test_all_discrete_models_e2e(tmp_path: Path) -> None: """ Run the full pipeline (process_render → process_execute → process_analysis) diff --git a/src/tests/gui/test_oxdraw_integration.py b/src/tests/gui/test_oxdraw_integration.py index 0dd7fb879..b65926d95 100644 --- a/src/tests/gui/test_oxdraw_integration.py +++ b/src/tests/gui/test_oxdraw_integration.py @@ -390,6 +390,18 @@ def test_process_oxdraw_headless( assert "gnn_to_mermaid_conversions" in results assert len(results["gnn_to_mermaid_conversions"]) > 0 + assert results["websocket_bridge"]["message_contract_available"] is True + assert results["websocket_bridge"]["server_running"] is False + assert results["websocket_bridge"]["status"] == "message_contract_only" + messages = results["websocket_bridge"]["messages"] + assert len(messages) == 1 + load_message = messages[0] + assert load_message["type"] == "model.load" + assert load_message["payload"]["model_id"] == "test_model" + assert load_message["payload"]["format"] == "mermaid" + assert load_message["payload"]["mermaid_file"].endswith("test_model.mmd") + assert "flowchart TD" in load_message["payload"]["mermaid"] + def test_process_oxdraw_no_files(self, temp_dir: Any, capsys: Any) -> Any: """Test processing with no GNN files.""" import logging diff --git a/src/tests/gui/test_websocket_bridge.py b/src/tests/gui/test_websocket_bridge.py new file mode 100644 index 000000000..2b933dbd4 --- /dev/null +++ b/src/tests/gui/test_websocket_bridge.py @@ -0,0 +1,152 @@ +from __future__ import annotations + +import asyncio +import json +import socket +from typing import Any, cast + +import pytest + +from gui.websocket_bridge import ( + GUI_WEBSOCKET_MESSAGE_TYPES, + GUIBridgeState, + GUIWebSocketMessage, + build_initial_messages, + run_local_gui_bridge, +) + + +def test_gui_websocket_message_types_are_explicit() -> None: + assert GUI_WEBSOCKET_MESSAGE_TYPES == { + "model.load", + "matrix.patch", + "validation.result", + "model.export", + "error", + } + + +def test_gui_websocket_message_round_trips() -> None: + message = GUIWebSocketMessage( + type="matrix.patch", payload={"matrix": "A", "path": [0, 1], "value": 0.5} + ) + parsed = GUIWebSocketMessage.from_json(message.to_json()) + assert parsed == message + + +def test_gui_websocket_rejects_unknown_type() -> None: + with pytest.raises(ValueError, match="Unsupported"): + GUIWebSocketMessage.from_json(json.dumps({"type": "unknown", "payload": {}})) + + +def test_build_initial_messages_uses_model_load() -> None: + messages = build_initial_messages([{"model_name": "demo"}]) + assert messages[0].type == "model.load" + assert messages[0].payload["model_name"] == "demo" + + +def test_gui_bridge_state_load_patch_and_export_flow() -> None: + state = GUIBridgeState() + load_response = state.apply_message( + GUIWebSocketMessage( + type="model.load", + payload={"model_id": "demo", "matrices": {"A": [[1.0, 0.0]]}}, + ) + ) + assert load_response is not None + assert load_response.type == "validation.result" + + patch_response = state.apply_message( + GUIWebSocketMessage( + type="matrix.patch", + payload={ + "model_id": "demo", + "matrix": "A", + "path": [0, 1], + "value": 0.25, + }, + ) + ) + assert patch_response is not None + assert patch_response.payload["valid"] is True + assert state.models["demo"]["matrices"]["A"][0][1] == 0.25 + + export_response = state.apply_message( + GUIWebSocketMessage(type="model.export", payload={"model_id": "demo"}) + ) + assert export_response is not None + assert export_response.type == "model.export" + assert export_response.payload["models"]["demo"]["matrices"]["A"][0][1] == 0.25 + + +@pytest.mark.asyncio +async def test_gui_bridge_rejects_nonlocal_host() -> None: + with pytest.raises(ValueError, match="local-only"): + await run_local_gui_bridge("0.0.0.0", 8765, []) + + +@pytest.mark.asyncio +async def test_gui_bridge_runs_local_websocket_exchange() -> None: + websockets = pytest.importorskip("websockets") + port = _free_local_port() + initial = [ + GUIWebSocketMessage( + type="model.load", + payload={"model_id": "demo", "matrices": {"A": [[1.0, 0.0]]}}, + ) + ] + task = asyncio.create_task(run_local_gui_bridge("127.0.0.1", port, initial)) + try: + websocket = cast( + Any, await _connect_with_retry(websockets, f"ws://127.0.0.1:{port}") + ) + async with websocket: + loaded = GUIWebSocketMessage.from_json(await websocket.recv()) + assert loaded.type == "model.load" + await websocket.send( + GUIWebSocketMessage( + type="matrix.patch", + payload={ + "model_id": "demo", + "matrix": "A", + "path": [0, 1], + "value": 0.25, + }, + request_id="patch-1", + ).to_json() + ) + validation = GUIWebSocketMessage.from_json(await websocket.recv()) + assert validation.type == "validation.result" + assert validation.request_id == "patch-1" + assert validation.payload["valid"] is True + await websocket.send( + GUIWebSocketMessage( + type="model.export", + payload={"model_id": "demo"}, + request_id="export-1", + ).to_json() + ) + exported = GUIWebSocketMessage.from_json(await websocket.recv()) + assert exported.type == "model.export" + assert exported.payload["models"]["demo"]["matrices"]["A"][0][1] == 0.25 + finally: + task.cancel() + with pytest.raises(asyncio.CancelledError): + await task + + +def _free_local_port() -> int: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: + sock.bind(("127.0.0.1", 0)) + return int(sock.getsockname()[1]) + + +async def _connect_with_retry(websockets: Any, uri: str) -> Any: + last_error: Exception | None = None + for _ in range(20): + try: + return await websockets.connect(uri) + except OSError as exc: + last_error = exc + await asyncio.sleep(0.05) + raise AssertionError(f"WebSocket bridge did not start: {last_error}") diff --git a/src/tests/mcp/test_mcp_http_auth.py b/src/tests/mcp/test_mcp_http_auth.py new file mode 100644 index 000000000..49c886efe --- /dev/null +++ b/src/tests/mcp/test_mcp_http_auth.py @@ -0,0 +1,270 @@ +from __future__ import annotations + +import http.client +import json +import threading +from http.server import HTTPServer +from typing import Any, cast + +import pytest + +from mcp.meta_mcp import get_mcp_auth_status +from mcp.server_http import ( + _RATE_LIMIT_STATE, + MCPHTTPHandler, + allow_insecure_local_http, + get_http_capabilities, + get_safe_http_resource_uris, + get_safe_http_tool_names, + initialize, + is_authorized, + is_rate_limited, + is_safe_http_resource, + is_safe_http_tool, +) + + +def test_mcp_http_rejects_requests_without_token_by_default( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("GNN_MCP_TOKEN", raising=False) + monkeypatch.delenv("GNN_MCP_ALLOW_INSECURE_LOCAL", raising=False) + assert is_authorized({}) is False + + +def test_mcp_http_insecure_local_opt_in_allows_missing_token( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("GNN_MCP_TOKEN", raising=False) + monkeypatch.setenv("GNN_MCP_ALLOW_INSECURE_LOCAL", "1") + assert allow_insecure_local_http() is True + assert is_authorized({}, client_host="127.0.0.1") is True + assert is_authorized({}, client_host="::1") is True + assert is_authorized({}, client_host="192.0.2.10") is False + assert is_authorized({}) is False + + +def test_mcp_http_requires_matching_bearer_token( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setenv("GNN_MCP_TOKEN", "secret-token") + assert is_authorized({}) is False + assert is_authorized({"Authorization": "Bearer wrong"}) is False + assert is_authorized({"Authorization": "Bearer secret-token"}) is True + + +def test_mcp_http_rate_limit_is_disabled_by_default( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("GNN_MCP_RATE_LIMIT_PER_MINUTE", raising=False) + _RATE_LIMIT_STATE.clear() + + assert is_rate_limited("127.0.0.1", now=100.0) is False + + +def test_mcp_http_rate_limit_blocks_after_configured_count( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setenv("GNN_MCP_RATE_LIMIT_PER_MINUTE", "2") + _RATE_LIMIT_STATE.clear() + + assert is_rate_limited("127.0.0.1", now=100.0) is False + assert is_rate_limited("127.0.0.1", now=101.0) is False + assert is_rate_limited("127.0.0.1", now=102.0) is True + assert is_rate_limited("127.0.0.1", now=200.0) is False + + +def test_mcp_http_exposes_only_safe_tools_by_default( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("GNN_MCP_ALLOW_UNSAFE_TOOLS", raising=False) + monkeypatch.delenv("GNN_MCP_SAFE_TOOLS", raising=False) + + assert is_safe_http_tool("get_pipeline_status") is True + assert is_safe_http_tool("get_environment_info") is False + assert is_safe_http_tool("get_system_info") is False + assert is_safe_http_tool("process_execute") is False + + +def test_mcp_http_resources_are_denied_by_default( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("GNN_MCP_SAFE_RESOURCES", raising=False) + assert get_safe_http_resource_uris() == set() + assert is_safe_http_resource("gnn://pipeline/status") is False + + monkeypatch.setenv("GNN_MCP_SAFE_RESOURCES", "gnn://pipeline/status") + assert is_safe_http_resource("gnn://pipeline/status") is True + assert is_safe_http_resource("gnn://pipeline/config") is False + + +def test_mcp_http_capabilities_are_filtered_to_safe_surface( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("GNN_MCP_ALLOW_UNSAFE_TOOLS", raising=False) + monkeypatch.delenv("GNN_MCP_SAFE_RESOURCES", raising=False) + initialize(force_refresh=True) + + capabilities = get_http_capabilities() + tool_names = {tool["name"] for tool in capabilities["tools"]} + + assert "get_pipeline_status" in tool_names + assert "process_execute" not in tool_names + assert "get_environment_info" not in tool_names + assert capabilities["resources"] == [] + assert ( + capabilities["server"]["http_access"]["resource_allowlist_env"] + == "GNN_MCP_SAFE_RESOURCES" + ) + + +def test_mcp_auth_status_documents_resource_allowlist() -> None: + status = get_mcp_auth_status(None) + assert status["access_level"] == "safe_http_tool_and_resource_allowlists" + assert "GNN_MCP_SAFE_RESOURCES" in " ".join(status["recommendations"]) + + +def test_mcp_http_safe_tools_can_be_extended_or_disabled( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setenv("GNN_MCP_SAFE_TOOLS", "process_execute") + monkeypatch.delenv("GNN_MCP_ALLOW_UNSAFE_TOOLS", raising=False) + assert is_safe_http_tool("process_execute") is True + + monkeypatch.setenv("GNN_MCP_ALLOW_UNSAFE_TOOLS", "true") + assert get_safe_http_tool_names() is None + assert is_safe_http_tool("any_registered_tool") is True + + +def test_mcp_http_jsonrpc_smoke_auth_safe_tool_and_rate_limit( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setenv("GNN_MCP_TOKEN", "local-dev-token") + monkeypatch.delenv("GNN_MCP_RATE_LIMIT_PER_MINUTE", raising=False) + monkeypatch.delenv("GNN_MCP_ALLOW_UNSAFE_TOOLS", raising=False) + monkeypatch.delenv("GNN_MCP_SAFE_TOOLS", raising=False) + _RATE_LIMIT_STATE.clear() + initialize(force_refresh=True) + server, thread = _start_http_server() + try: + assert _post_jsonrpc(server.server_port, {}, token=None)[0] == 401 + assert _post_jsonrpc(server.server_port, {}, token="wrong")[0] == 401 + + unsafe_status, unsafe_payload = _post_jsonrpc( + server.server_port, + { + "jsonrpc": "2.0", + "id": "unsafe", + "method": "mcp.tool.execute", + "params": {"name": "process_execute", "params": {}}, + }, + token="local-dev-token", + ) + assert unsafe_status == 200 + assert unsafe_payload["error"]["code"] == -32001 + + resource_status, resource_payload = _post_jsonrpc( + server.server_port, + { + "jsonrpc": "2.0", + "id": "resource", + "method": "mcp.resource.get", + "params": {"uri": "gnn://pipeline/status"}, + }, + token="local-dev-token", + ) + assert resource_status == 200 + assert resource_payload["error"]["code"] == -32002 + + capabilities_status, capabilities_payload = _post_jsonrpc( + server.server_port, + {"jsonrpc": "2.0", "id": "caps", "method": "mcp.capabilities"}, + token="local-dev-token", + ) + assert capabilities_status == 200 + capability_tool_names = { + tool["name"] for tool in capabilities_payload["result"]["tools"] + } + assert "process_execute" not in capability_tool_names + assert capabilities_payload["result"]["resources"] == [] + + safe_status, safe_payload = _post_jsonrpc( + server.server_port, + {"jsonrpc": "2.0", "id": "safe", "method": "get_pipeline_status"}, + token="local-dev-token", + ) + assert safe_status == 200 + assert safe_payload["id"] == "safe" + assert "result" in safe_payload + finally: + server.shutdown() + server.server_close() + thread.join(timeout=2) + + monkeypatch.setenv("GNN_MCP_RATE_LIMIT_PER_MINUTE", "1") + _RATE_LIMIT_STATE.clear() + server, thread = _start_http_server() + try: + first_status, _ = _post_jsonrpc( + server.server_port, + {"jsonrpc": "2.0", "id": "first", "method": "mcp.capabilities"}, + token="local-dev-token", + ) + second_status, second_payload = _post_jsonrpc( + server.server_port, + {"jsonrpc": "2.0", "id": "second", "method": "mcp.capabilities"}, + token="local-dev-token", + ) + assert first_status == 200 + assert second_status == 429 + assert "rate limit" in second_payload["error"] + finally: + server.shutdown() + server.server_close() + thread.join(timeout=2) + + +def test_mcp_http_rate_limit_applies_before_auth( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setenv("GNN_MCP_TOKEN", "local-dev-token") + monkeypatch.setenv("GNN_MCP_RATE_LIMIT_PER_MINUTE", "1") + _RATE_LIMIT_STATE.clear() + + server, thread = _start_http_server() + try: + first_status, _ = _post_jsonrpc(server.server_port, {}, token="wrong") + second_status, second_payload = _post_jsonrpc( + server.server_port, {}, token="wrong" + ) + assert first_status == 401 + assert second_status == 429 + assert "rate limit" in second_payload["error"] + finally: + server.shutdown() + server.server_close() + thread.join(timeout=2) + + +def _start_http_server() -> tuple[HTTPServer, threading.Thread]: + server = HTTPServer(("127.0.0.1", 0), MCPHTTPHandler) + thread = threading.Thread(target=server.serve_forever, daemon=True) + thread.start() + return server, thread + + +def _post_jsonrpc( + port: int, payload: dict[str, Any], *, token: str | None +) -> tuple[int, dict[str, Any]]: + headers = {"Content-Type": "application/json"} + if token is not None: + headers["Authorization"] = f"Bearer {token}" + conn = http.client.HTTPConnection("127.0.0.1", port, timeout=5) + try: + conn.request("POST", "/", body=json.dumps(payload), headers=headers) + response = conn.getresponse() + body = response.read().decode("utf-8") + parsed = json.loads(body) if body else {} + return response.status, cast(dict[str, Any], parsed) + finally: + conn.close() diff --git a/src/tests/pipeline/test_autonomous_contract.py b/src/tests/pipeline/test_autonomous_contract.py new file mode 100644 index 000000000..dfe80294a --- /dev/null +++ b/src/tests/pipeline/test_autonomous_contract.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +from pathlib import Path + +from pipeline.autonomous import ( + collect_observation_streams, + run_autonomous_proposal_loop, +) + + +def test_autonomous_loop_writes_proposals_without_source_mutation( + tmp_path: Path, +) -> None: + target = tmp_path / "input" + target.mkdir() + model = target / "model.md" + model.write_text("## ModelName\nDemo\n", encoding="utf-8") + output = tmp_path / "output" + report = run_autonomous_proposal_loop(target, output) + assert report["source_mutation_performed"] is False + assert report["cluster_mutation_performed"] is False + assert report["container_plan"]["dry_run"] is True + assert report["container_plan"]["mutation_performed"] is False + assert (output / "autonomous" / "autonomous_proposals.json").exists() + assert (output / "autonomous" / "autonomous_evaluation_report.json").exists() + assert (output / "autonomous" / "candidate-1.gnn.patch").exists() + assert report["evaluation_report"]["decisions"][0]["status"] == "proposal_only" + assert report["evaluation_report"]["decisions"][0]["score"]["value"] >= 70 + assert ( + "uv run --extra dev python scripts/check_capability_contracts.py --strict" + in report["evaluation_report"]["evidence"]["validator_commands"] + ) + assert model.read_text(encoding="utf-8") == "## ModelName\nDemo\n" + + +def test_autonomous_observation_streams_include_array_and_manifest_files( + tmp_path: Path, +) -> None: + (tmp_path / "observations.npy").write_bytes(b"numpy") + (tmp_path / "manifest.json").write_text("{}", encoding="utf-8") + + streams = collect_observation_streams(tmp_path) + + kinds = {Path(stream["path"]).name: stream["kind"] for stream in streams} + assert kinds["observations.npy"] == "array_file" + assert kinds["manifest.json"] == "manifest_file" + + +def test_autonomous_scoring_uses_existing_execution_summary(tmp_path: Path) -> None: + target = tmp_path / "input" + target.mkdir() + (target / "model.md").write_text("## ModelName\nDemo\n", encoding="utf-8") + output = tmp_path / "output" + summaries = output / "12_execute_output" / "summaries" + summaries.mkdir(parents=True) + (summaries / "execution_summary.json").write_text( + '{"success_rate": 100.0, "execution_details": []}', + encoding="utf-8", + ) + + report = run_autonomous_proposal_loop(target, output, max_candidates=1) + decision = report["evaluation_report"]["decisions"][0] + + assert decision["score"]["recommendation"] == "review_with_validators" + assert "execution_summary_available" in decision["score"]["reasons"] + assert report["evaluation_report"]["evidence"]["execution_summary_files"] diff --git a/src/tests/pipeline/test_main_orchestrator.py b/src/tests/pipeline/test_main_orchestrator.py index 8adc136ca..3288383ab 100644 --- a/src/tests/pipeline/test_main_orchestrator.py +++ b/src/tests/pipeline/test_main_orchestrator.py @@ -22,6 +22,7 @@ """ import logging +import shutil import subprocess # nosec B404 import sys import tempfile @@ -219,19 +220,30 @@ def test_minimal_pipeline_execution(self) -> None: main_py = SRC_DIR / "main.py" with tempfile.TemporaryDirectory() as td: outdir = Path(td) / "output" + target_dir = PROJECT_ROOT / "input" / "gnn_files" / "discrete" cmd: list[Any] = [ sys.executable, str(main_py), "--only-steps", "3,5,7", "--target-dir", - str(PROJECT_ROOT / "input" / "gnn_files"), + str(target_dir), "--output-dir", str(outdir), ] - result = subprocess.run( - cmd, capture_output=True, text=True, cwd=str(PROJECT_ROOT) - ) # nosec B603 + try: + result = subprocess.run( + cmd, + capture_output=True, + text=True, + cwd=str(PROJECT_ROOT), + timeout=240, + ) # nosec B603 + except subprocess.TimeoutExpired as exc: + pytest.fail( + f"Minimal pipeline timed out for {target_dir}: " + f"stdout={exc.stdout!r}, stderr={exc.stderr!r}" + ) # Check for summary in the correct location (00_pipeline_summary subdirectory) summary = outdir / "00_pipeline_summary" / "pipeline_execution_summary.json" @@ -265,6 +277,12 @@ class TestEndToEndIntegration: def test_run_pipeline_subset(self) -> None: main_py = SRC_DIR / "main.py" with tempfile.TemporaryDirectory() as td: + target_dir = Path(td) / "input" + target_dir.mkdir() + shutil.copy2( + PROJECT_ROOT / "input" / "gnn_files" / "discrete" / "simple_mdp.md", + target_dir / "simple_mdp.md", + ) outdir = Path(td) / "output" cmd: list[Any] = [ sys.executable, @@ -272,7 +290,7 @@ def test_run_pipeline_subset(self) -> None: "--only-steps", "3,5,7,8", "--target-dir", - str(PROJECT_ROOT / "input" / "gnn_files"), + str(target_dir), "--output-dir", str(outdir), ] diff --git a/src/tests/pipeline/test_model_family_acceptance.py b/src/tests/pipeline/test_model_family_acceptance.py new file mode 100644 index 000000000..1c859c74d --- /dev/null +++ b/src/tests/pipeline/test_model_family_acceptance.py @@ -0,0 +1,509 @@ +from __future__ import annotations + +import json +import subprocess +from pathlib import Path +from typing import Sequence + +from pipeline.model_family_acceptance import ( + load_model_family_manifest, + run_model_family_acceptance, +) + + +def test_model_family_manifest_covers_required_families() -> None: + families = load_model_family_manifest(Path("input/model_family_manifest.json")) + names = {family.name for family in families} + + assert { + "basics", + "discrete", + "continuous", + "hierarchical", + "multiagent", + "precision", + "structured", + "gridworld", + "scaling-study", + }.issubset(names) + + +def test_model_family_acceptance_writes_ledger_with_step_statuses( + tmp_path: Path, +) -> None: + seen_commands: list[list[str]] = [] + + def runner(command: Sequence[str]) -> subprocess.CompletedProcess[str]: + seen_commands.append(list(command)) + _write_pipeline_summary(Path(command[command.index("--output-dir") + 1])) + return subprocess.CompletedProcess( + args=list(command), + returncode=0, + stdout="pipeline ok", + stderr="", + ) + + ledger = run_model_family_acceptance( + Path("input/model_family_manifest.json"), + tmp_path, + family_names=["basics", "structured"], + runner=runner, + strict=True, + ) + + assert ledger["schema"] == "gnn_model_family_acceptance_ledger_v1" + assert ledger["status"] == "passed" + assert ledger["family_count"] == 2 + assert len(seen_commands) == 2 + assert all("--skip-llm" in command for command in seen_commands) + for family in ledger["families"]: + assert family["steps"]["3"] == "passed" + assert family["steps"]["11"] == "passed" + assert family["steps"]["0"] == "skipped" + assert family["interpretability_summary"]["model_count"] >= 1 + assert (tmp_path / "model_family_acceptance_ledger.json").exists() + assert (tmp_path / "model_family_acceptance_ledger.md").exists() + + +def test_model_family_acceptance_strict_fails_on_pipeline_failure( + tmp_path: Path, +) -> None: + def runner(command: Sequence[str]) -> subprocess.CompletedProcess[str]: + return subprocess.CompletedProcess( + args=list(command), + returncode=1, + stdout="", + stderr="failed", + ) + + try: + run_model_family_acceptance( + Path("input/model_family_manifest.json"), + tmp_path, + family_names=["basics"], + runner=runner, + strict=True, + ) + except RuntimeError as exc: + assert "basics" in str(exc) + else: + raise AssertionError("strict model-family acceptance should fail") + + +def test_model_family_acceptance_strict_fails_without_pipeline_summary( + tmp_path: Path, +) -> None: + def runner(command: Sequence[str]) -> subprocess.CompletedProcess[str]: + return subprocess.CompletedProcess( + args=list(command), + returncode=0, + stdout="exit code alone is not acceptance evidence", + stderr="", + ) + + try: + run_model_family_acceptance( + Path("input/model_family_manifest.json"), + tmp_path, + family_names=["basics"], + runner=runner, + strict=True, + ) + except RuntimeError as exc: + assert "basics" in str(exc) + else: + raise AssertionError("strict acceptance should fail without pipeline summary") + + +def test_model_family_acceptance_rejects_omitted_profile_steps( + tmp_path: Path, +) -> None: + called = False + + def runner(command: Sequence[str]) -> subprocess.CompletedProcess[str]: + nonlocal called + called = True + return subprocess.CompletedProcess(args=list(command), returncode=0) + + try: + run_model_family_acceptance( + Path("input/model_family_manifest.json"), + tmp_path, + family_names=["basics"], + only_steps="3", + runner=runner, + strict=True, + ) + except ValueError as exc: + assert "omitted" in str(exc) + assert "5" in str(exc) + else: + raise AssertionError("acceptance should reject missing profile steps") + assert called is False + + +def test_model_family_acceptance_fails_when_summary_omits_selected_step( + tmp_path: Path, +) -> None: + def runner(command: Sequence[str]) -> subprocess.CompletedProcess[str]: + _write_pipeline_summary( + Path(command[command.index("--output-dir") + 1]), + present_steps={3}, + ) + return subprocess.CompletedProcess( + args=list(command), + returncode=0, + stdout="summary is truncated", + stderr="", + ) + + try: + run_model_family_acceptance( + Path("input/model_family_manifest.json"), + tmp_path, + family_names=["basics"], + runner=runner, + strict=True, + ) + except RuntimeError as exc: + assert "basics" in str(exc) + else: + raise AssertionError("strict acceptance should fail on missing step evidence") + + ledger = json.loads( + (tmp_path / "model_family_acceptance_ledger.json").read_text(encoding="utf-8") + ) + family = ledger["families"][0] + assert family["steps"]["5"] == "failed" + assert family["step_evidence"]["5"]["reason"] == "missing_summary_evidence" + + +def test_model_family_acceptance_clears_stale_family_output( + tmp_path: Path, +) -> None: + calls = 0 + + def runner(command: Sequence[str]) -> subprocess.CompletedProcess[str]: + nonlocal calls + calls += 1 + output_dir = Path(command[command.index("--output-dir") + 1]) + if calls == 1: + _write_pipeline_summary(output_dir) + return subprocess.CompletedProcess(args=list(command), returncode=0) + return subprocess.CompletedProcess(args=list(command), returncode=1) + + run_model_family_acceptance( + Path("input/model_family_manifest.json"), + tmp_path, + family_names=["basics"], + runner=runner, + strict=True, + ) + + try: + run_model_family_acceptance( + Path("input/model_family_manifest.json"), + tmp_path, + family_names=["basics"], + runner=runner, + strict=True, + ) + except RuntimeError as exc: + assert "basics" in str(exc) + else: + raise AssertionError("stale prior summaries must not certify a later run") + + ledger = json.loads( + (tmp_path / "model_family_acceptance_ledger.json").read_text(encoding="utf-8") + ) + assert ledger["families"][0]["pipeline_summary"]["available"] is False + + +def test_model_family_acceptance_fails_synthetic_success_without_artifacts( + tmp_path: Path, +) -> None: + def runner(command: Sequence[str]) -> subprocess.CompletedProcess[str]: + _write_pipeline_summary( + Path(command[command.index("--output-dir") + 1]), + include_artifacts=False, + ) + return subprocess.CompletedProcess(args=list(command), returncode=0) + + try: + run_model_family_acceptance( + Path("input/model_family_manifest.json"), + tmp_path, + family_names=["basics"], + runner=runner, + strict=True, + ) + except RuntimeError as exc: + assert "basics" in str(exc) + else: + raise AssertionError("strict acceptance should require concrete artifacts") + + ledger = json.loads( + (tmp_path / "model_family_acceptance_ledger.json").read_text(encoding="utf-8") + ) + family = ledger["families"][0] + assert family["steps"]["3"] == "failed" + assert family["step_evidence"]["3"]["reason"] == "missing_artifact_evidence" + + +def test_model_family_acceptance_treats_warning_exit_code_as_passed( + tmp_path: Path, +) -> None: + def runner(command: Sequence[str]) -> subprocess.CompletedProcess[str]: + _write_pipeline_summary( + Path(command[command.index("--output-dir") + 1]), + overall_status="SUCCESS_WITH_WARNINGS", + ) + return subprocess.CompletedProcess( + args=list(command), + returncode=2, + stdout="pipeline completed with warnings", + stderr="", + ) + + ledger = run_model_family_acceptance( + Path("input/model_family_manifest.json"), + tmp_path, + family_names=["basics"], + runner=runner, + strict=True, + ) + + assert ledger["status"] == "passed" + assert ledger["families"][0]["status"] == "passed" + assert ledger["families"][0]["steps"]["3"] == "passed" + + +def test_model_family_acceptance_fails_contradictory_failed_overall_status( + tmp_path: Path, +) -> None: + def runner(command: Sequence[str]) -> subprocess.CompletedProcess[str]: + _write_pipeline_summary( + Path(command[command.index("--output-dir") + 1]), + overall_status="FAILED", + ) + return subprocess.CompletedProcess( + args=list(command), + returncode=1, + stdout="summary rows contradict overall failure", + stderr="", + ) + + try: + run_model_family_acceptance( + Path("input/model_family_manifest.json"), + tmp_path, + family_names=["basics"], + runner=runner, + strict=True, + ) + except RuntimeError as exc: + assert "basics" in str(exc) + else: + raise AssertionError("strict acceptance should fail contradictory summaries") + + +def test_model_family_acceptance_fails_code_two_when_selected_step_failed( + tmp_path: Path, +) -> None: + def runner(command: Sequence[str]) -> subprocess.CompletedProcess[str]: + _write_pipeline_summary( + Path(command[command.index("--output-dir") + 1]), + failed_steps={12}, + ) + return subprocess.CompletedProcess( + args=list(command), + returncode=2, + stdout="pipeline completed with partial failure", + stderr="", + ) + + try: + run_model_family_acceptance( + Path("input/model_family_manifest.json"), + tmp_path, + family_names=["basics"], + runner=runner, + strict=True, + ) + except RuntimeError as exc: + assert "basics" in str(exc) + else: + raise AssertionError("strict acceptance should fail when Step 12 failed") + + +def test_model_family_acceptance_allows_profiled_unsupported_steps( + tmp_path: Path, +) -> None: + def runner(command: Sequence[str]) -> subprocess.CompletedProcess[str]: + output_dir = Path(command[command.index("--output-dir") + 1]) + _write_pipeline_summary(output_dir, failed_steps={11, 12}) + _write_unsupported_render_execute_summaries(output_dir) + return subprocess.CompletedProcess( + args=list(command), + returncode=1, + stdout="pipeline failed because selected renderer is unsupported", + stderr="", + ) + + ledger = run_model_family_acceptance( + Path("input/model_family_manifest.json"), + tmp_path, + family_names=["continuous"], + runner=runner, + strict=True, + ) + + family = ledger["families"][0] + assert ledger["status"] == "passed" + assert family["raw_steps"]["11"] == "failed" + assert family["raw_steps"]["12"] == "failed" + assert family["steps"]["11"] == "skipped" + assert family["steps"]["12"] == "skipped" + assert family["step_evidence"]["11"]["acceptance"] == "allowed_unsupported" + assert family["step_evidence"]["12"]["acceptance"] == "allowed_unsupported" + assert "POMDP not compatible" in family["step_evidence"]["11"]["reason"] + assert "no_executable_scripts" in family["step_evidence"]["12"]["reason"] + + +def test_model_family_acceptance_rejects_partial_render_as_unsupported_skip( + tmp_path: Path, +) -> None: + def runner(command: Sequence[str]) -> subprocess.CompletedProcess[str]: + output_dir = Path(command[command.index("--output-dir") + 1]) + _write_pipeline_summary(output_dir, failed_steps={11, 12}) + _write_unsupported_render_execute_summaries( + output_dir, + successful_framework_renderings=1, + ) + return subprocess.CompletedProcess( + args=list(command), + returncode=1, + stdout="partial renderer regression", + stderr="", + ) + + try: + run_model_family_acceptance( + Path("input/model_family_manifest.json"), + tmp_path, + family_names=["continuous"], + runner=runner, + strict=True, + ) + except RuntimeError as exc: + assert "continuous" in str(exc) + else: + raise AssertionError("partial renderer failures are not unsupported skips") + + +def _write_pipeline_summary( + output_dir: Path, + failed_steps: set[int] | None = None, + present_steps: set[int] | None = None, + overall_status: str | None = None, + include_artifacts: bool = True, +) -> None: + failed_steps = failed_steps or set() + present_steps = present_steps or {3, 5, 6, 11, 12, 15, 16, 23} + if include_artifacts: + _write_step_artifacts(output_dir, present_steps) + summary_dir = output_dir / "00_pipeline_summary" + summary_dir.mkdir(parents=True, exist_ok=True) + steps = [] + for step in sorted(present_steps): + steps.append( + { + "script_name": f"{step}_step.py", + "status": "FAILED" if step in failed_steps else "SUCCESS", + } + ) + (summary_dir / "pipeline_execution_summary.json").write_text( + json.dumps( + { + "overall_status": overall_status + or ("PARTIAL_SUCCESS" if failed_steps else "SUCCESS"), + "steps": steps, + "performance_summary": { + "failed_steps": len(failed_steps), + "successful_steps": len(steps) - len(failed_steps), + }, + } + ), + encoding="utf-8", + ) + + +def _write_step_artifacts(output_dir: Path, present_steps: set[int]) -> None: + artifact_payloads = { + 3: ( + "3_gnn_output/gnn_processing_summary.json", + "3_gnn_output/gnn_processing_results.json", + ), + 5: ("5_type_checker_output/type_check_results.json",), + 6: ( + "6_validation_output/validation_summary.json", + "6_validation_output/validation_results.json", + ), + 11: ("11_render_output/render_processing_summary.json",), + 12: ("12_execute_output/summaries/execution_summary.json",), + 15: ("15_audio_output/audio_results.json",), + 16: ("16_analysis_output/analysis_results.json",), + 23: ("23_report_output/report_processing_summary.json",), + } + for step in present_steps: + for relative in artifact_payloads.get(step, ()): + path = output_dir / relative + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text( + json.dumps({"step": step, "success": True}), + encoding="utf-8", + ) + + +def _write_unsupported_render_execute_summaries( + output_dir: Path, + *, + successful_framework_renderings: int = 0, +) -> None: + render_dir = output_dir / "11_render_output" + render_dir.mkdir(parents=True, exist_ok=True) + (render_dir / "render_processing_summary.json").write_text( + json.dumps( + { + "successful_framework_renderings": successful_framework_renderings, + "failed_framework_renderings": [ + { + "file": "continuous_navigation.md", + "framework": "jax", + "message": "POMDP not compatible with jax: Missing required matrices: ['D']", + } + ], + } + ), + encoding="utf-8", + ) + execute_dir = output_dir / "12_execute_output" / "summaries" + execute_dir.mkdir(parents=True, exist_ok=True) + (execute_dir / "execution_summary.json").write_text( + json.dumps( + { + "total_scripts_found": 0, + "success": True, + "skipped_reason": "no_executable_scripts", + "message": "No executable scripts found", + "render_failures": [ + { + "file": "continuous_navigation.md", + "framework": "jax", + "message": "POMDP not compatible with jax: Missing required matrices: ['D']", + } + ], + } + ), + encoding="utf-8", + ) diff --git a/src/tests/pipeline/test_pipeline_integration.py b/src/tests/pipeline/test_pipeline_integration.py index 0757e41f9..9122b0d4a 100644 --- a/src/tests/pipeline/test_pipeline_integration.py +++ b/src/tests/pipeline/test_pipeline_integration.py @@ -16,6 +16,7 @@ # Add src to path for imports sys.path.insert(0, str(Path(__file__).parent.parent.parent)) +PROJECT_ROOT = Path(__file__).parent.parent.parent.parent class TestPipelineStepIntegration: @@ -265,7 +266,11 @@ def test_recovery_from_step_failure(self, tmp_path: Any) -> None: logging.getLogger("test_pipeline") - orchestrator = PipelineOrchestrator(steps=["3"]) + orchestrator = PipelineOrchestrator( + target_dir=str(PROJECT_ROOT / "input" / "gnn_files" / "discrete"), + output_dir=str(tmp_path / "output"), + steps=["3"], + ) # Should be able to instantiate and run assert orchestrator is not None diff --git a/src/tests/pipeline/test_pipeline_overall.py b/src/tests/pipeline/test_pipeline_overall.py index 90c139332..88fdce91d 100644 --- a/src/tests/pipeline/test_pipeline_overall.py +++ b/src/tests/pipeline/test_pipeline_overall.py @@ -13,6 +13,7 @@ from pathlib import Path sys.path.insert(0, str(Path(__file__).parent.parent.parent)) +PROJECT_ROOT = Path(__file__).parent.parent.parent.parent pipeline = pytest.importorskip("pipeline") @@ -81,8 +82,11 @@ def test_pipeline_execution(self, comprehensive_test_data: Any) -> Any: from pipeline import PipelineOrchestrator orchestrator = PipelineOrchestrator() - pipeline_data = comprehensive_test_data.get("pipeline_data", {}) - pipeline_data["steps"] = [3] + pipeline_data = { + "target_dir": str(PROJECT_ROOT / "input" / "gnn_files" / "discrete"), + "output_dir": str(comprehensive_test_data["output_dir"]), + "steps": [3], + } result = orchestrator.execute_pipeline(pipeline_data) assert result is not None diff --git a/src/tests/pipeline/test_pipeline_render_execute_analyze.py b/src/tests/pipeline/test_pipeline_render_execute_analyze.py index 7236611c0..b8bd10ee5 100644 --- a/src/tests/pipeline/test_pipeline_render_execute_analyze.py +++ b/src/tests/pipeline/test_pipeline_render_execute_analyze.py @@ -133,7 +133,7 @@ def test_render_execute_analyze_flow( dirs["base_output"], verbose=True, frameworks="pymdp", - timeout=10, + timeout=30, render_output_dir=dirs["base_output"] / "11_render_output", ) assert isinstance(execute_result, (bool, int)) @@ -170,7 +170,7 @@ def test_step_output_handoffs(self, pipeline_directories: Dict[str, Any]) -> Non dirs["base_output"], verbose=True, frameworks="pymdp", - timeout=10, + timeout=30, render_output_dir=dirs["base_output"], ) assert execute_result is True diff --git a/src/tests/pipeline/test_pipeline_scripts.py b/src/tests/pipeline/test_pipeline_scripts.py index 636320186..32e8ad9cd 100644 --- a/src/tests/pipeline/test_pipeline_scripts.py +++ b/src/tests/pipeline/test_pipeline_scripts.py @@ -620,7 +620,7 @@ def test_pipeline_core_sequence(self) -> None: scripts: list[Any] = ["3_gnn.py", "5_type_checker.py", "7_export.py"] with tempfile.TemporaryDirectory() as td: tmp = Path(td) - input_dir = PROJECT_ROOT / "input" / "gnn_files" + input_dir = PROJECT_ROOT / "input" / "gnn_files" / "discrete" output_dir = tmp / "output" for script_name in scripts: script_path = SRC_DIR / script_name @@ -634,9 +634,19 @@ def test_pipeline_core_sequence(self) -> None: "--output-dir", str(output_dir), ] - result = subprocess.run( - cmd, capture_output=True, text=True, cwd=str(PROJECT_ROOT) - ) + try: + result = subprocess.run( + cmd, + capture_output=True, + text=True, + cwd=str(PROJECT_ROOT), + timeout=180, + ) + except subprocess.TimeoutExpired as exc: + pytest.fail( + f"{script_name} timed out for {input_dir}: " + f"stdout={exc.stdout!r}, stderr={exc.stderr!r}" + ) assert result.returncode in [0, 1] @pytest.mark.integration diff --git a/src/tests/pipeline/test_pomdp_gridworld_cross_framework.py b/src/tests/pipeline/test_pomdp_gridworld_cross_framework.py index 35b8f4669..e58a503e5 100644 --- a/src/tests/pipeline/test_pomdp_gridworld_cross_framework.py +++ b/src/tests/pipeline/test_pomdp_gridworld_cross_framework.py @@ -36,18 +36,36 @@ def _gridworld_spec() -> dict[str, Any]: def _assert_julia_packages() -> None: - result = subprocess.run( # nosec B603 B607 - [ - "julia", - "--startup-file=no", - "-e", - "using RxInfer, ActiveInference, JSON, Distributions, StatsBase", - ], - cwd=REPO_ROOT, - capture_output=True, - text=True, - timeout=120, - ) + cmd = [ + "julia", + "--startup-file=no", + "-e", + "using RxInfer, ActiveInference, JSON, Distributions, StatsBase", + ] + try: + result = subprocess.run( # nosec B603 B607 + cmd, + cwd=REPO_ROOT, + capture_output=True, + text=True, + timeout=120, + ) + except FileNotFoundError as exc: + pytest.skip(f"Julia executable not available for strict backend gate: {exc}") + except subprocess.TimeoutExpired as exc: + pytest.skip( + "Julia package gate timed out before backend execution; " + f"command={cmd!r} timeout={exc.timeout}s" + ) + if ( + result.returncode != 0 + and "Package " in result.stderr + and " not found" in result.stderr + ): + pytest.skip( + "Optional Julia backend packages are not installed for strict " + f"cross-framework execution: {result.stderr.strip()}" + ) assert result.returncode == 0, ( "Strict Julia package gate failed:\n" f"STDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}" @@ -72,6 +90,26 @@ def _assert_julia_parse(script: Path) -> None: ) +def test_julia_package_gate_skips_missing_optional_backend_packages( + monkeypatch: pytest.MonkeyPatch, +) -> None: + def fake_run(*_args: object, **_kwargs: object) -> subprocess.CompletedProcess[str]: + return subprocess.CompletedProcess( + args=["julia"], + returncode=1, + stdout="", + stderr=( + "ERROR: ArgumentError: Package RxInfer not found in current path.\n" + '- Run `import Pkg; Pkg.add("RxInfer")` to install the RxInfer package.' + ), + ) + + monkeypatch.setattr(subprocess, "run", fake_run) + + with pytest.raises(pytest.skip.Exception, match="Optional Julia backend packages"): + _assert_julia_packages() + + def _payload_for(exec_out: Path, framework: str) -> dict[str, Any]: matches = sorted( exec_out.glob(f"**/{framework}/simulation_data/*simulation_results.json") diff --git a/src/tests/render/test_discopy_symmetry_contract.py b/src/tests/render/test_discopy_symmetry_contract.py new file mode 100644 index 000000000..3d6e77a22 --- /dev/null +++ b/src/tests/render/test_discopy_symmetry_contract.py @@ -0,0 +1,109 @@ +from __future__ import annotations + +import importlib.util +import json +import subprocess +import sys +from pathlib import Path + +import pytest + +from render.discopy.discopy_renderer import DisCoPyRenderer +from render.discopy.symmetry import ( + build_matrix_permutation_metadata, + validate_matrix_permutation_metadata, +) + + +def test_discopy_matrix_permutation_metadata_uses_parsed_parameters() -> None: + spec = { + "initial_parameterization": { + "A": [[0.9, 0.1], [0.2, 0.8]], + "matrix_permutations": {"A": [1, 0]}, + } + } + metadata = build_matrix_permutation_metadata(spec) + assert metadata["A"]["shape"] == [2, 2] + assert metadata["A"]["permutation"] == [1, 0] + + +def test_discopy_matrix_permutation_rejects_mismatched_dimension() -> None: + spec = { + "initial_parameterization": { + "A": [[0.9, 0.1], [0.2, 0.8]], + "matrix_permutations": {"A": [0, 1, 2]}, + } + } + with pytest.raises(ValueError, match="does not match"): + build_matrix_permutation_metadata(spec) + + +def test_discopy_matrix_permutation_rejects_missing_matrix() -> None: + spec = {"initial_parameterization": {"matrix_permutations": {"B": [0, 1]}}} + + with pytest.raises(ValueError, match="missing matrix"): + build_matrix_permutation_metadata(spec) + + +def test_discopy_matrix_permutation_metadata_runtime_validator() -> None: + validate_matrix_permutation_metadata( + {"A": {"axis": "rows", "shape": [2, 2], "permutation": [1, 0]}} + ) + + with pytest.raises(ValueError, match="Permutation length"): + validate_matrix_permutation_metadata( + {"A": {"axis": "rows", "shape": [2, 2], "permutation": [0]}} + ) + + +def test_discopy_generated_script_does_not_install_dependencies() -> None: + renderer = DisCoPyRenderer(options={}) + code = renderer._generate_discopy_diagram_code( + {"model_name": "demo", "initial_parameterization": {}}, "demo" + ) + assert "pip install" not in code + assert "subprocess" not in code + assert "MATRIX_PERMUTATION_METADATA" in code + assert "MATRIX_PERMUTATION_APPLIED_TO_DIAGRAM = False" in code + assert "validate_matrix_permutation_metadata(MATRIX_PERMUTATION_METADATA)" in code + assert "'matrix_permutation_metadata': MATRIX_PERMUTATION_METADATA" in code + assert ( + "'matrix_permutation_applied_to_diagram': MATRIX_PERMUTATION_APPLIED_TO_DIAGRAM" + in code + ) + + +def test_discopy_generated_script_exports_permutation_metadata(tmp_path: Path) -> None: + if importlib.util.find_spec("discopy") is None: + pytest.skip("DisCoPy optional dependency is not installed") + renderer = DisCoPyRenderer(options={}) + code = renderer._generate_discopy_diagram_code( + { + "model_name": "demo", + "initial_parameterization": { + "A": [[0.9, 0.1], [0.2, 0.8]], + "matrix_permutations": {"A": [1, 0]}, + }, + }, + "demo", + ) + script_path = tmp_path / "demo_discopy.py" + script_path.write_text(code, encoding="utf-8") + + result = subprocess.run( + [sys.executable, script_path.name], + cwd=tmp_path, + capture_output=True, + text=True, + timeout=20, + ) + + assert result.returncode == 0, result.stderr + circuit_info = json.loads( + (tmp_path / "discopy_diagrams" / "circuit_info.json").read_text( + encoding="utf-8" + ) + ) + assert circuit_info["matrix_permutation_metadata"]["A"]["shape"] == [2, 2] + assert circuit_info["matrix_permutation_metadata"]["A"]["permutation"] == [1, 0] + assert circuit_info["matrix_permutation_applied_to_diagram"] is False diff --git a/src/tests/render/test_rxinfer_multiagent_contract.py b/src/tests/render/test_rxinfer_multiagent_contract.py new file mode 100644 index 000000000..711056587 --- /dev/null +++ b/src/tests/render/test_rxinfer_multiagent_contract.py @@ -0,0 +1,473 @@ +from __future__ import annotations + +import hashlib +import io +import json +import logging +import sys +import tomllib +from pathlib import Path + +import pytest + +from execute.processor import ( + _load_rxinfer_execution_metadata_from_script, + execute_single_script, +) +from gnn.pomdp_extractor import POMDPExtractor +from render.pomdp_processor import POMDPRenderProcessor +from render.rxinfer.rxinfer_renderer import ( + build_rxinfer_execution_metadata, + render_gnn_to_rxinfer, +) +from render.rxinfer.toml_generator import ( + _create_toml_config_structure, + _write_toml_with_exact_formatting, +) + + +def test_rxinfer_compact_multiagent_keys_drive_agent_count() -> None: + spec = { + "initialparameterization": { + "nr_agents": 3, + "agent_ids": [10, 20, 30], + "agent_initial_positions": [[0, 0], [1, 0], [0, 1]], + "agent_target_positions": [[2, 2], [3, 2], [2, 3]], + "agent_radii": [0.5, 0.6, 0.7], + } + } + config = _create_toml_config_structure(spec, {}) + assert config["model"]["nr_agents"] == 3 + assert [agent["id"] for agent in config["agents"]] == [10, 20, 30] + assert config["agents"][2]["target_position"] == [2, 3] + + +def test_rxinfer_nr_agents_does_not_silently_fallback_to_four_defaults() -> None: + spec = { + "initialparameterization": { + "nr_agents": 3, + "agent_ids": [1, 2, 3], + "agent_initial_positions": [[0, 0], [1, 0], [0, 1]], + } + } + with pytest.raises(ValueError, match="agent_target_positions"): + _create_toml_config_structure(spec, {}) + + +def test_rxinfer_clustered_topology_is_preserved_in_config() -> None: + spec = { + "initialparameterization": { + "nr_agents": 3, + "agent_ids": [10, 20, 30], + "agent_initial_positions": [[0, 0], [1, 0], [0, 1]], + "agent_target_positions": [[2, 2], [3, 2], [2, 3]], + "agent_edges": [[10, 20], {"source": 20, "target": 30}], + "agent_clusters": {"left": [10, 20], "right": [30]}, + "message_passing": "clustered_mean_field", + } + } + config = _create_toml_config_structure(spec, {}) + + assert config["topology"]["type"] == "clustered" + assert config["topology"]["edges"] == [ + {"source": 10, "target": 20}, + {"source": 20, "target": 30}, + ] + assert config["topology"]["clusters"][0] == { + "name": "left", + "agent_ids": [10, 20], + } + assert config["topology"]["message_passing"] == "clustered_mean_field" + + +def test_rxinfer_topology_rejects_unknown_edge_endpoint() -> None: + spec = { + "initialparameterization": { + "nr_agents": 2, + "agent_ids": ["a", "b"], + "agent_initial_positions": [[0, 0], [1, 0]], + "agent_target_positions": [[2, 2], [3, 2]], + "agent_edges": [["a", "missing"]], + } + } + + with pytest.raises(ValueError, match="undeclared agent"): + _create_toml_config_structure(spec, {}) + + +def test_rxinfer_topology_rejects_unknown_cluster_member() -> None: + spec = { + "initialparameterization": { + "nr_agents": 2, + "agent_ids": ["a", "b"], + "agent_initial_positions": [[0, 0], [1, 0]], + "agent_target_positions": [[2, 2], [3, 2]], + "agent_clusters": {"bad": ["a", "missing"]}, + } + } + + with pytest.raises(ValueError, match="undeclared agent"): + _create_toml_config_structure(spec, {}) + + +def test_rxinfer_topology_rejects_partial_edge_record() -> None: + spec = { + "initialparameterization": { + "nr_agents": 2, + "agent_ids": ["a", "b"], + "agent_initial_positions": [[0, 0], [1, 0]], + "agent_target_positions": [[2, 2], [3, 2]], + "agent_edges": [{"source": "a"}], + } + } + + with pytest.raises(ValueError, match="source and target"): + _create_toml_config_structure(spec, {}) + + +def test_rxinfer_topology_rejects_non_list_cluster_members() -> None: + spec = { + "initialparameterization": { + "nr_agents": 2, + "agent_ids": ["a", "b"], + "agent_initial_positions": [[0, 0], [1, 0]], + "agent_target_positions": [[2, 2], [3, 2]], + "agent_clusters": {"bad": "a,b"}, + } + } + + with pytest.raises(ValueError, match="members must be a list"): + _create_toml_config_structure(spec, {}) + + +def test_rxinfer_accepts_underscored_initial_parameterization() -> None: + config = _create_toml_config_structure( + { + "initial_parameterization": { + "nr_agents": 2, + "agent_ids": ["agent_a", "agent_b"], + "agent_initial_positions": [[0, 0], [1, 0]], + "agent_target_positions": [[2, 2], [3, 2]], + } + }, + {}, + ) + + assert config["model"]["nr_agents"] == 2 + assert [agent["id"] for agent in config["agents"]] == ["agent_a", "agent_b"] + + +def test_rxinfer_string_topology_edges_render_valid_toml() -> None: + config = _create_toml_config_structure( + { + "initial_parameterization": { + "nr_agents": 2, + "agent_ids": ["a", "b"], + "agent_initial_positions": [[0, 0], [1, 0]], + "agent_target_positions": [[2, 2], [3, 2]], + "agent_edges": [["a", "b"]], + } + }, + {}, + ) + buffer = io.StringIO() + _write_toml_with_exact_formatting(buffer, config) + parsed = tomllib.loads(buffer.getvalue()) + + assert parsed["agents"][0]["id"] == "a" + assert parsed["topology"]["edges"][0] == {"source": "a", "target": "b"} + + +def test_rxinfer_execution_metadata_reads_toml_topology(tmp_path: Path) -> None: + spec = { + "initialparameterization": { + "nr_agents": 2, + "agent_ids": [1, 2], + "agent_initial_positions": [[0, 0], [1, 0]], + "agent_target_positions": [[2, 2], [3, 2]], + "agent_edges": [[1, 2]], + } + } + config = _create_toml_config_structure(spec, {}) + script_path = tmp_path / "model.jl" + script_path.write_text("# rendered julia\n", encoding="utf-8") + with (tmp_path / "model.toml").open("w", encoding="utf-8") as handle: + _write_toml_with_exact_formatting(handle, config) + + metadata = _load_rxinfer_execution_metadata_from_script(script_path) + + assert metadata["agent_count"] == 2 + assert metadata["topology"]["type"] == "network" + assert metadata["topology"]["edges"] == [{"source": 1, "target": 2}] + + +def test_rxinfer_execution_metadata_ignores_unmatched_sidecar(tmp_path: Path) -> None: + script_path = tmp_path / "a_rxinfer.jl" + script_path.write_text("# rendered julia\n", encoding="utf-8") + (tmp_path / "b.metadata.json").write_text( + json.dumps({"agent_count": 99, "topology": {"type": "wrong"}}), + encoding="utf-8", + ) + + assert _load_rxinfer_execution_metadata_from_script(script_path) == {} + + +def test_rxinfer_parser_preserves_compact_multiagent_keys(tmp_path: Path) -> None: + content = """ +## ModelName +Compact Agents + +## StateSpaceBlock +s[2,1,type=categorical] +o[2,1,type=categorical] +u[2,1,type=categorical] + +## Connections +s > o +s > s +u > s + +## InitialParameterization +nr_agents=3 +agent_ids=[1,2,3] +agent_initial_positions=[[0.0,0.0],[1.0,0.0],[0.0,1.0]] +agent_target_positions=[[2.0,2.0],[3.0,2.0],[2.0,3.0]] +agent_edges=[[1,2],[2,3]] +agent_clusters=[{"name":"left","agent_ids":[1,2]},{"name":"right","agent_ids":[3]}] +message_passing=clustered_mean_field +A={(0.9,0.1),(0.1,0.9)} +B={((0.9,0.1),(0.1,0.9)),((0.1,0.9),(0.9,0.1))} +C={(1.0,0.0)} +D={(0.5,0.5)} +""" + pomdp_space = POMDPExtractor(strict_validation=True).extract_from_gnn_content( + content + ) + assert pomdp_space is not None + + initial = pomdp_space.initial_parameterization + assert initial is not None + assert initial["nr_agents"] == 3 + assert initial["agent_ids"] == [1, 2, 3] + assert initial["agent_edges"] == [[1, 2], [2, 3]] + assert initial["agent_clusters"][0]["name"] == "left" + + gnn_spec = POMDPRenderProcessor(tmp_path)._pomdp_to_gnn_spec(pomdp_space) + assert gnn_spec["initialparameterization"]["nr_agents"] == 3 + assert gnn_spec["initialparameterization"]["agent_clusters"][1]["agent_ids"] == [3] + + +def test_rxinfer_renderer_writes_execution_metadata_sidecar(tmp_path: Path) -> None: + spec = { + "name": "Compact Agents", + "model_parameters": { + "num_hidden_states": 2, + "num_obs": 2, + "num_actions": 2, + }, + "initialparameterization": { + "nr_agents": 3, + "agent_ids": [1, 2, 3], + "agent_initial_positions": [[0, 0], [1, 0], [0, 1]], + "agent_target_positions": [[2, 2], [3, 2], [2, 3]], + "agent_edges": [[1, 2], [2, 3]], + "agent_clusters": {"left": [1, 2], "right": [3]}, + "message_passing": "clustered_mean_field", + "A": [[0.9, 0.1], [0.1, 0.9]], + "B": [ + [[0.9, 0.1], [0.1, 0.9]], + [[0.1, 0.9], [0.9, 0.1]], + ], + "C": [1.0, 0.0], + "D": [0.5, 0.5], + }, + } + output_path = tmp_path / "compact_agents_rxinfer.jl" + + success, message, warnings = render_gnn_to_rxinfer(spec, output_path) + + assert success, message + assert warnings == [] + metadata_path = output_path.with_suffix(".metadata.json") + metadata = json.loads(metadata_path.read_text(encoding="utf-8")) + assert metadata["agent_count"] == 3 + assert metadata["topology"]["type"] == "clustered" + assert metadata["schema"] == "gnn_rxinfer_execution_metadata_v1" + assert len(metadata["script_sha256"]) == 64 + assert metadata["topology"]["edges"] == [ + {"source": 1, "target": 2}, + {"source": 2, "target": 3}, + ] + assert metadata["topology"]["message_passing"] == "clustered_mean_field" + + loaded = _load_rxinfer_execution_metadata_from_script(output_path) + assert loaded["agent_count"] == 3 + assert loaded["topology"]["source"] == str(metadata_path) + assert loaded["metadata_verification"] == "script_sha256_match" + + +def test_rxinfer_execution_metadata_supports_indexed_agents_without_fallback() -> None: + metadata = build_rxinfer_execution_metadata( + { + "initialparameterization": { + "agent1_id": 10, + "agent1_initial_position": [0, 0], + "agent1_target_position": [1, 1], + "agent2_id": 20, + "agent2_initial_position": [1, 0], + "agent2_target_position": [2, 1], + } + } + ) + + assert metadata["agent_count"] == 2 + assert metadata["topology"]["agent_ids"] == [10, 20] + + +def test_rxinfer_execution_metadata_ignores_schemaless_sidecar(tmp_path: Path) -> None: + script_path = tmp_path / "demo_rxinfer.py" + script_path.write_text("print('ok')\n", encoding="utf-8") + script_path.with_suffix(".metadata.json").write_text( + json.dumps({"agent_count": 99, "topology": {"type": "wrong"}}), + encoding="utf-8", + ) + + assert _load_rxinfer_execution_metadata_from_script(script_path) == {} + + +def test_rxinfer_execution_metadata_ignores_stale_sidecar_hash(tmp_path: Path) -> None: + script_path = tmp_path / "demo_rxinfer.py" + script_path.write_text("print('ok')\n", encoding="utf-8") + script_path.with_suffix(".metadata.json").write_text( + json.dumps( + { + "schema": "gnn_rxinfer_execution_metadata_v1", + "script_sha256": "0" * 64, + "agent_count": 99, + "topology": {"type": "wrong"}, + } + ), + encoding="utf-8", + ) + + assert _load_rxinfer_execution_metadata_from_script(script_path) == {} + + +def test_rxinfer_execution_metadata_ignores_unmatched_toml_sidecar( + tmp_path: Path, +) -> None: + script_path = tmp_path / "demo_rxinfer.py" + script_path.write_text("print('ok')\n", encoding="utf-8") + (tmp_path / "other.toml").write_text( + """ + [[agents]] + id = 99 + radius = 1.0 + initial_position = [0, 0] + target_position = [1, 1] + """, + encoding="utf-8", + ) + + assert _load_rxinfer_execution_metadata_from_script(script_path) == {} + + +def test_rxinfer_step12_result_records_agent_metadata_on_success( + tmp_path: Path, +) -> None: + script_path = tmp_path / "demo" / "rxinfer" / "demo_rxinfer.py" + script_path.parent.mkdir(parents=True) + script_path.write_text("print('ok')\n", encoding="utf-8") + script_path.with_suffix(".metadata.json").write_text( + json.dumps(_rxinfer_metadata(script_path, 3)), + encoding="utf-8", + ) + + result = execute_single_script( + { + "path": script_path, + "name": script_path.name, + "framework": "rxinfer", + "executor": sys.executable, + }, + tmp_path / "12_execute_output", + False, + logging.getLogger("test"), + timeout=10, + ) + + assert result["success"] is True + assert result["execution_metadata"]["agent_count"] == 3 + structured = json.loads( + (tmp_path / "12_execute_output") + .joinpath( + "demo", "rxinfer", "execution_logs", f"{script_path.name}_results.json" + ) + .read_text(encoding="utf-8") + ) + assert structured["success"] is True + assert structured["execution_metadata"]["agent_count"] == 3 + assert structured["execution_metadata"]["topology"]["type"] == "clustered" + + +def test_rxinfer_step12_result_records_agent_metadata_on_failure( + tmp_path: Path, +) -> None: + script_path = tmp_path / "demo" / "rxinfer" / "demo_rxinfer.py" + script_path.parent.mkdir(parents=True) + script_path.write_text("import sys\nsys.exit(2)\n", encoding="utf-8") + script_path.with_suffix(".metadata.json").write_text( + json.dumps( + _rxinfer_metadata( + script_path, + 2, + {"type": "agent_population", "agent_ids": [10, 20]}, + ) + ), + encoding="utf-8", + ) + + result = execute_single_script( + { + "path": script_path, + "name": script_path.name, + "framework": "rxinfer", + "executor": sys.executable, + }, + tmp_path / "12_execute_output", + False, + logging.getLogger("test"), + timeout=10, + ) + + assert result["success"] is False + assert result["return_code"] == 2 + assert result["execution_metadata"]["agent_count"] == 2 + structured = json.loads( + (tmp_path / "12_execute_output") + .joinpath( + "demo", "rxinfer", "execution_logs", f"{script_path.name}_results.json" + ) + .read_text(encoding="utf-8") + ) + assert structured["success"] is False + assert structured["execution_metadata"]["agent_count"] == 2 + + +def _rxinfer_metadata( + script_path: Path, + agent_count: int, + topology: dict[str, object] | None = None, +) -> dict[str, object]: + topology = topology or { + "type": "clustered", + "agent_ids": [1, 2, 3], + "edges": [{"source": 1, "target": 2}], + "message_passing": "clustered_mean_field", + } + return { + "schema": "gnn_rxinfer_execution_metadata_v1", + "script_sha256": hashlib.sha256(script_path.read_bytes()).hexdigest(), + "agent_count": agent_count, + "topology": topology, + } diff --git a/src/tests/report/test_model_family_report.py b/src/tests/report/test_model_family_report.py new file mode 100644 index 000000000..8ad1ae5f1 --- /dev/null +++ b/src/tests/report/test_model_family_report.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +from report.model_family import render_model_family_acceptance_markdown + + +def test_model_family_acceptance_report_renders_status_table() -> None: + ledger = { + "schema": "gnn_model_family_acceptance_ledger_v1", + "family_count": 1, + "strict": True, + "only_steps": "3,5,6", + "frameworks": "pymdp", + "families": [ + { + "name": "basics", + "status": "passed", + "step_status_counts": {"passed": 3, "failed": 0, "skipped": 22}, + "raw_steps": {"11": "failed", "12": "passed"}, + "step_evidence": { + "11": {"acceptance": "allowed_unsupported"}, + "12": {"acceptance": "required"}, + }, + "interpretability_summary": {"model_count": 2}, + } + ], + } + + markdown = render_model_family_acceptance_markdown(ledger) + + assert "# GNN Model Family Acceptance Ledger" in markdown + assert "| basics | passed | 2 | 3 | 0 | 22 | 1 | 1 |" in markdown diff --git a/src/tests/setup/test_windows_resource_contract.py b/src/tests/setup/test_windows_resource_contract.py new file mode 100644 index 000000000..2650f3298 --- /dev/null +++ b/src/tests/setup/test_windows_resource_contract.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +import ast +from pathlib import Path + +from setup.package_names import package_name_for_import + + +def test_pipeline_execution_does_not_import_posix_resource_module() -> None: + source = Path("src/pipeline/execution.py").read_text(encoding="utf-8") + tree = ast.parse(source) + imported_modules: list[str] = [] + for node in ast.walk(tree): + if isinstance(node, ast.Import): + imported_modules.extend(alias.name for alias in node.names) + elif isinstance(node, ast.ImportFrom) and node.module: + imported_modules.append(node.module) + assert "resource" not in imported_modules + + +def test_import_to_package_name_mapping_handles_common_mismatches() -> None: + assert package_name_for_import("yaml") == "PyYAML" + assert package_name_for_import("sklearn") == "scikit-learn" + assert package_name_for_import("numpy") == "numpy" diff --git a/src/tests/test_run_pymdp_gnn_scaling_estimate.py b/src/tests/test_run_pymdp_gnn_scaling_estimate.py index 548605a83..ac4651330 100644 --- a/src/tests/test_run_pymdp_gnn_scaling_estimate.py +++ b/src/tests/test_run_pymdp_gnn_scaling_estimate.py @@ -207,3 +207,24 @@ def fail_with_enospc(self: Any, *args: Any, **kwargs: Any) -> Any: monkeypatch.setattr(scaling_mod.Path, "write_text", fail_with_enospc) assert scaling_mod._write_resource_gate_file({"kind": "test"}) is None + + +def test_non_mapping_yaml_config_recovers_to_empty_dict( + scaling_mod: Any, tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + config_path = tmp_path / "pymdp_scaling_config.yaml" + config_path.write_text("- not\n- a\n- mapping\n", encoding="utf-8") + monkeypatch.setattr(scaling_mod, "CONFIG_FILE", config_path) + + assert scaling_mod._load_and_validate_config() == {} + + +def test_usage_snapshot_returns_stable_disk_fields( + scaling_mod: Any, tmp_path: Path +) -> None: + snapshot = scaling_mod._usage_snapshot(tmp_path) + + assert snapshot["path"] == str(tmp_path.resolve()) + assert snapshot["total_bytes"] >= snapshot["free_bytes"] + assert snapshot["used_bytes"] >= 0 + assert 0.0 <= snapshot["used_percent"] <= 100.0 diff --git a/src/tests/visualization/test_threejs_tensor_explorer.py b/src/tests/visualization/test_threejs_tensor_explorer.py new file mode 100644 index 000000000..16ebf9727 --- /dev/null +++ b/src/tests/visualization/test_threejs_tensor_explorer.py @@ -0,0 +1,120 @@ +from __future__ import annotations + +import functools +import json +import threading +from http.server import SimpleHTTPRequestHandler, ThreadingHTTPServer +from pathlib import Path + +import numpy as np +import pytest + +from visualization.matrix import MatrixVisualizer + + +def test_threejs_tensor_explorer_writes_html_and_json(tmp_path: Path) -> None: + tensor = np.zeros((2, 2, 2), dtype=float) + tensor[:, :, 0] = [[0.9, 0.1], [0.2, 0.8]] + output = tmp_path / "tensor.html" + visualizer = MatrixVisualizer() + assert visualizer.generate_threejs_tensor_explorer("B", tensor, output) is True + html = output.read_text(encoding="utf-8") + fallback = json.loads(output.with_suffix(".json").read_text(encoding="utf-8")) + assert "three@" in html + assert " None: + playwright_api = pytest.importorskip("playwright.sync_api") + tensor = np.zeros((2, 2, 2), dtype=float) + tensor[:, :, 0] = [[0.9, 0.1], [0.2, 0.8]] + output = tmp_path / "tensor.html" + assert MatrixVisualizer().generate_threejs_tensor_explorer("B", tensor, output) + server = _serve_directory(tmp_path) + try: + with playwright_api.sync_playwright() as p: + browser = p.chromium.launch() + try: + page = browser.new_page() + console_errors: list[str] = [] + page.on( + "console", + lambda msg: ( + console_errors.append(msg.text) + if msg.type == "error" and "favicon" not in msg.text.lower() + else None + ), + ) + page.goto(f"http://127.0.0.1:{server.server_port}/tensor.html") + page.wait_for_selector("canvas#scene") + page.wait_for_function( + """ + () => { + const canvas = document.querySelector('canvas#scene'); + return canvas && canvas.width > 0 && canvas.height > 0; + } + """ + ) + non_background_pixels = page.evaluate( + """ + () => { + const canvas = document.querySelector('canvas#scene'); + const gl = canvas.getContext('webgl2') || canvas.getContext('webgl'); + const pixels = new Uint8Array(canvas.width * canvas.height * 4); + gl.readPixels(0, 0, canvas.width, canvas.height, gl.RGBA, gl.UNSIGNED_BYTE, pixels); + let count = 0; + for (let i = 0; i < pixels.length; i += 4) { + if (!(pixels[i] === 16 && pixels[i + 1] === 24 && pixels[i + 2] === 32)) { + count += 1; + } + } + return count; + } + """ + ) + assert non_background_pixels > 0 + assert console_errors == [] + finally: + browser.close() + finally: + server.shutdown() + server.server_close() + + +def test_threejs_tensor_explorer_reports_fallback_when_cdn_blocked( + tmp_path: Path, +) -> None: + playwright_api = pytest.importorskip("playwright.sync_api") + tensor = np.zeros((2, 2, 2), dtype=float) + tensor[:, :, 0] = [[0.9, 0.1], [0.2, 0.8]] + output = tmp_path / "tensor.html" + assert MatrixVisualizer().generate_threejs_tensor_explorer("B", tensor, output) + server = _serve_directory(tmp_path) + try: + with playwright_api.sync_playwright() as p: + browser = p.chromium.launch() + try: + page = browser.new_page() + page.route("**/three.module.js", lambda route: route.abort()) + page.goto(f"http://127.0.0.1:{server.server_port}/tensor.html") + page.wait_for_function( + "() => document.body.dataset.threejsStatus === 'fallback'" + ) + assert page.locator("#fallback").is_visible() + assert "JSON fallback" in page.locator("#hud").inner_text() + finally: + browser.close() + finally: + server.shutdown() + server.server_close() + + +def _serve_directory(path: Path) -> ThreadingHTTPServer: + handler = functools.partial(SimpleHTTPRequestHandler, directory=str(path)) + server = ThreadingHTTPServer(("127.0.0.1", 0), handler) + thread = threading.Thread(target=server.serve_forever, daemon=True) + thread.start() + return server diff --git a/src/utils/argument_utils.py b/src/utils/argument_utils.py index 2495e98de..4170cbf39 100644 --- a/src/utils/argument_utils.py +++ b/src/utils/argument_utils.py @@ -92,6 +92,7 @@ class PipelineArguments: # Step control skip_steps: Optional[str] = None only_steps: Optional[str] = None + autonomous: bool = False # Type checking options strict: bool = False @@ -323,6 +324,15 @@ class ArgumentParser: default=None, help_text="Comma-separated list of steps to run exclusively", ), + "autonomous": ArgumentDefinition( + flag="--autonomous", + action="store_true", + default=False, + help_text=( + "Write bounded autonomous proposal artifacts under output/ " + "without editing source files" + ), + ), "skip_llm": ArgumentDefinition( flag="--skip-llm", action="store_true", diff --git a/src/visualization/core/process.py b/src/visualization/core/process.py index 62c4592f0..e43fd5b54 100644 --- a/src/visualization/core/process.py +++ b/src/visualization/core/process.py @@ -225,6 +225,9 @@ def process_single_gnn_file( m_name, m_data, m_path, tensor_type="transition" ): visualizations.append(str(m_path)) + html_path = model_dir / f"{model_name}_{m_name}_threejs.html" + if mv.generate_threejs_tensor_explorer(m_name, m_data, html_path): + visualizations.append(str(html_path)) analysis_path = model_dir / f"{model_name}_{m_name}_analysis.png" mv.generate_pomdp_transition_analysis(m_data, analysis_path) visualizations.append(str(analysis_path)) diff --git a/src/visualization/matrix/visualizer.py b/src/visualization/matrix/visualizer.py index f4aa9509c..7dd94c9bc 100644 --- a/src/visualization/matrix/visualizer.py +++ b/src/visualization/matrix/visualizer.py @@ -12,6 +12,7 @@ """ import csv +import json import logging from ..compat.viz_compat import MATPLOTLIB_AVAILABLE, np, plt, sns @@ -69,6 +70,105 @@ def _safe_figsize(width: float, height: float) -> tuple: return (clamped_w, clamped_h) +def _threejs_tensor_html( + page_title: str, payload: Dict[str, Any], fallback_name: str +) -> str: + """Return a self-contained Three.js tensor explorer page.""" + payload_json = json.dumps(payload) + return f""" + + + + + {page_title} + + + + +
+

{page_title}

+

+

Loading WebGL renderer...

+

JSON fallback data

+ +
+ + + +""" + + class MatrixVisualizer: """ Handles matrix visualization for GNN models. @@ -421,6 +521,38 @@ def generate_3d_tensor_visualization( plt.close() return False + def generate_threejs_tensor_explorer( + self, + tensor_name: str, + tensor: np.ndarray, + output_path: Path, + title: Optional[str] = None, + ) -> bool: + """Generate an interactive Three.js/WebGL explorer for a 3D tensor.""" + try: + if tensor.ndim != 3: + logger.warning( + "Tensor %s is not 3D (shape: %s)", tensor_name, tensor.shape + ) + return False + output_path.parent.mkdir(parents=True, exist_ok=True) + payload = { + "name": tensor_name, + "shape": list(tensor.shape), + "values": tensor.tolist(), + } + fallback_path = output_path.with_suffix(".json") + fallback_path.write_text(json.dumps(payload, indent=2), encoding="utf-8") + page_title = title or f"Three.js Tensor Explorer: {tensor_name}" + html = _threejs_tensor_html(page_title, payload, fallback_path.name) + output_path.write_text(html, encoding="utf-8") + return True + except Exception as e: + logger.error( + "Error generating Three.js tensor explorer for %s: %s", tensor_name, e + ) + return False + def _generate_tensor_statistics( self, tensor: np.ndarray, tensor_name: str, tensor_type: str ) -> str: diff --git a/uv.lock b/uv.lock index 9df20763d..86178ecac 100644 --- a/uv.lock +++ b/uv.lock @@ -1369,7 +1369,7 @@ wheels = [ [[package]] name = "generalized-notation-notation" -version = "1.6.0" +version = "1.8.0" source = { editable = "." } dependencies = [ { name = "aiohttp" },