tmart234 · tmart234 · May 18, 2026 · May 18, 2026
diff --git a/.github/workflows/export.yml b/.github/workflows/export.yml
@@ -0,0 +1,37 @@
+name: Mobile Export Parity
+
+# Heavy: installs tensorflow + coremltools. NOT in the default tests.yml
+# selection because (a) coremltools is a big install and (b) the parity
+# checks only matter when the export module or the model graph itself
+# changes. Runs on:
+#   - workflow_dispatch (manual sanity check before merging an export change)
+#   - PRs that touch export_mobile.py or model.py (catches regressions in
+#     the conversion paths or the embedding/LSTM graph that breaks them)
+
+on:
+  workflow_dispatch:
+  pull_request:
+    paths:
+      - 'openFlowML/export_mobile.py'
+      - 'openFlowML/model.py'
+      - 'tests/test_export_parity.py'
+
+jobs:
+  parity:
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - uses: actions/setup-python@v5
+      with:
+        python-version: '3.11'
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r openFlowML/requirements.txt
+        pip install pytest
+
+    - name: Run mobile export parity tests
+      run: python -m pytest -v tests/test_export_parity.py
diff --git a/.github/workflows/ml_training.yml b/.github/workflows/ml_training.yml
@@ -21,10 +21,18 @@ on:
 jobs:
   train:
     runs-on: ubuntu-latest
+    # contents:write is needed for softprops/action-gh-release to tag and
+    # publish the trained model artifacts as a GitHub Release.
+    permissions:
+      contents: write
 
     steps:
     - name: Checkout repository
       uses: actions/checkout@v4
+      with:
+        # Fetch tags so the release-tag collision check ("does model-YYYY.MM.DD
+        # already exist?") can see existing release tags.
+        fetch-depth: 0
 
     - name: Set up Python
       uses: actions/setup-python@v5
@@ -52,18 +60,68 @@ jobs:
         OPENFLOW_DISABLE_RESERVOIR: ${{ inputs.disable_reservoir && '1' || '' }}
       run: python openFlowML/train.py
 
-    # The model is not usable for inference without the scaler parameters
-    # (to invert the log1p + z-score flow transform), the station + basin
-    # index maps (for the embedding lookups), and the training config (which
-    # records the feature schema). All five travel together.
+    - name: Export mobile artifacts (CoreML + TFLite + manifest)
+      id: export
+      # Best-effort: a Phase 6 regression must NOT lose the trained .h5. The
+      # release step is gated on this step's success, so a failure here
+      # cleanly skips the release while still uploading the .h5 via the
+      # artifact step below.
+      continue-on-error: true
+      env:
+        OPENFLOW_MODEL_VERSION: model-${{ github.run_number }}
+      working-directory: openFlowML
+      run: python -m export_mobile --base-path "$GITHUB_WORKSPACE"
+
+    - name: Compute release tag
+      id: tag
+      if: github.ref == 'refs/heads/main' && steps.export.outcome == 'success'
+      # Use the UTC date as the base tag and suffix -rN on collision so a
+      # same-day re-run still produces a fresh release.
+      run: |
+        BASE_TAG="model-$(date -u +%Y.%m.%d)"
+        TAG="$BASE_TAG"
+        N=2
+        while git rev-parse "refs/tags/$TAG" >/dev/null 2>&1; do
+          TAG="${BASE_TAG}-r${N}"
+          N=$((N + 1))
+        done
+        echo "tag=$TAG" >> "$GITHUB_OUTPUT"
+
+    - name: Publish GitHub Release
+      if: github.ref == 'refs/heads/main' && steps.export.outcome == 'success'
+      uses: softprops/action-gh-release@v2
+      with:
+        tag_name: ${{ steps.tag.outputs.tag }}
+        name: Model ${{ steps.tag.outputs.tag }}
+        generate_release_notes: true
+        # The model is not usable for inference without scalers.json, the
+        # station + basin index maps, and training_config.json. All artifacts
+        # travel together; manifest.json carries sha256s + schema so the
+        # mobile app can verify what it downloaded. See docs/INFERENCE.md.
+        files: |
+          lstm_model.h5
+          lstm_model.mlpackage.zip
+          lstm_model.tflite
+          scalers.json
+          station_index.json
+          basin_index.json
+          training_config.json
+          manifest.json
+
+    # Belt-and-braces: even on release-step failure, the artifact upload
+    # keeps the trained model retrievable for debugging the release path.
     - name: Upload model and inference artifacts
+      if: always()
       uses: actions/upload-artifact@v4
       with:
         name: lstm_model_keras_release
         path: |
           ./lstm_model.h5
+          ./lstm_model.mlpackage.zip
+          ./lstm_model.tflite
           ./scalers.json
           ./station_index.json
           ./basin_index.json
           ./training_config.json
-        if-no-files-found: error
+          ./manifest.json
+        if-no-files-found: warn
diff --git a/README.md b/README.md
@@ -7,3 +7,4 @@ New here? See our wiki
 
 - [x] Automated dataset creation based on a USGS and CODWR station
 - [x] GitHub actions ML training
+- [x] Mobile artifacts published per training run (CoreML + TFLite + manifest) — see [docs/INFERENCE.md](docs/INFERENCE.md)
diff --git a/docs/INFERENCE.md b/docs/INFERENCE.md
@@ -0,0 +1,94 @@
+# OpenFlow Inference Contract
+
+This document specifies what the [OpenFlowMobile](https://github.com/tmart234/OpenFlowMobile) app (or any other downstream consumer) needs to know in order to run the trained model end-to-end. If you find yourself reading `train.py` to figure out a tensor shape, this doc has failed — please open an issue.
+
+## Release artifacts
+
+Every successful weekly training run on `main` produces a GitHub Release tagged `model-YYYY.MM.DD` (with a `-r2`, `-r3`, ... suffix on same-day re-runs). The release attaches:
+
+| File | Purpose |
+| --- | --- |
+| `lstm_model.h5` | Canonical Keras model. The source of truth; everything else is derived. |
+| `lstm_model.mlpackage.zip` | CoreML mlprogram for iOS (iOS 15+). Unzip and pass to `MLModel(contentsOf:)`. |
+| `lstm_model.tflite` | TFLite for Android. May require the Flex delegate — see TFLite note below. |
+| `scalers.json` | Per-column scaler params (mean, scale, transform). Inputs and outputs are in scaled space; you invert with this. |
+| `station_index.json` | Map `site_id` → embedding index. Index `0` is reserved for "unseen". |
+| `basin_index.json` | Map HUC8 → embedding index. Index `0` is reserved for "unseen". |
+| `training_config.json` | Schema: encoder/decoder window sizes, feature names, vocab sizes. |
+| `manifest.json` | sha256 + byte size per artifact, plus tool versions and the schema block. Verify on download. |
+
+### Verifying a downloaded release
+
+```python
+import hashlib, json
+with open("manifest.json") as f:
+    manifest = json.load(f)
+for entry in manifest["files"]:
+    h = hashlib.sha256()
+    with open(entry["name"], "rb") as f:
+        for chunk in iter(lambda: f.read(1 << 20), b""):
+            h.update(chunk)
+    assert h.hexdigest() == entry["sha256"], entry["name"]
+```
+
+## Input tensors
+
+The model takes a dict of 5 inputs. Names are stable across exports:
+
+| Name | dtype | Shape | Source |
+| --- | --- | --- | --- |
+| `encoder_input` | float32 | `[1, 60, len(encoder_features)]` | Last 60 days of features, columns in `training_config.encoder_features` order, pre-scaled. |
+| `decoder_input` | float32 | `[1, 14, len(decoder_features)]` | Next 14 days of forecast-time-available features, columns in `training_config.decoder_features` order, pre-scaled. |
+| `persistence_input` | float32 | `[1, len(target_features)]` | Last encoder-day flow values (the `Min Flow` / `Max Flow` columns from the last encoder row), in scaled space. |
+| `station_input` | int32 | `[1]` | `station_index.json[site_id]`, or `0` if the site isn't in the map. |
+| `basin_input` | int32 | `[1]` | `basin_index.json[huc8]`, or `0` if the HUC isn't in the map. |
+
+`encoder_days = 60`, `decoder_days = 14`. Both are recorded in `training_config.json` so a future model with a different window size doesn't silently break clients.
+
+### Scaling inputs
+
+Every numeric column in `encoder_input` and `decoder_input` is z-scored; the flow columns (`Min Flow`, `Max Flow`) are additionally `log1p`-transformed before scaling. Apply scalers per-column:
+
+```python
+# scalers.json shape:
+#   {"Min Flow": {"mean": ..., "scale": ..., "transform": "log1p"}, ...}
+def scale(raw, params):
+    x = math.log1p(raw) if params["transform"] == "log1p" else raw
+    return (x - params["mean"]) / params["scale"]
+```
+
+Indicator columns (`sm_observed`, `reservoir_observed`) are 0/1 and are **not** scaled — they don't appear in `scalers.json`.
+
+## Output tensor
+
+One output, name `persistence_plus_delta`, shape `[1, 14, len(target_features)]` (= `[1, 14, 2]`), dtype `float32`. The two target columns are `Min Flow`, `Max Flow` in that order — same as `training_config.target_features`.
+
+The output is in **scaled** space. To get cfs, invert per column:
+
+```python
+def invert(z, params):
+    x = z * params["scale"] + params["mean"]
+    return math.expm1(x) if params["transform"] == "log1p" else x
+```
+
+## Residual structure
+
+The model is a residual forecaster: `output = persistence_input + delta`, where `delta` is what the network actually learns. If the network outputs zero, predictions equal the persistence baseline — that's the worst case, not a failure mode. There's no special-casing for this in the consumer; just decode the output as above.
+
+## TFLite Flex delegate
+
+The graph contains two `Embedding(mask_zero=True)` layers and an LSTM with `initial_state`. The exporter tries three TFLite conversion paths and records which one was used in `manifest.json.tflite_mode`:
+
+- `builtins` — strict TFLITE_BUILTINS. No Flex needed on the consumer side.
+- `select_tf_ops` — needed the Flex delegate. Android consumers must add `org.tensorflow:tensorflow-lite-select-tf-ops` to their `app/build.gradle`. iOS uses CoreML, not TFLite.
+- `rebuilt_no_mask` — exporter rebuilt the model with `mask_zero=False` and copied weights. Functionally identical at inference (the mask only affected training-time padding behavior, which the model doesn't use). No Flex needed.
+
+Check `manifest.tflite_mode` on download and warn the user if the Flex delegate isn't bundled.
+
+## Unseen stations and basins
+
+Both embedding layers reserve index `0` for unknown. If the user picks a site not in `station_index.json`, pass `station_input = 0` — the model is trained with `StationDropout` to handle this via basin-only inference. Same for `basin_input` when the HUC8 isn't known.
+
+## Reference fixture
+
+The export parity tests in `tests/test_export_parity.py` generate synthetic inputs, run them through Keras / CoreML / TFLite, and assert they agree within `1e-3`. They're the executable spec of this document — when in doubt, read that file.
diff --git a/openFlowML/data/cbrfc_lid_map.json b/openFlowML/data/cbrfc_lid_map.json
@@ -0,0 +1,3 @@
+{
+  "_comment": "Map of OpenFlow site_id (USGS:XXXX or DWR:XXXX) to NWS / AHPS 5-letter LID. Used by get_cbrfc.py for both live AHPS forecast retrieval and NWPS historical forecast archive lookup. Add a row when wiring CBRFC for a new gauge; a missing site_id silently skips the CBRFC baseline for that station (it remains a fully optional comparison baseline). Find the LID for a USGS site by searching https://water.weather.gov/ahps2/ for the gauge name and reading the LID from the page URL."
+}
Original file line number	Diff line number	Diff line change
Expand Up		@@ -7,3 +7,4 @@ New here? See our wiki

		- [x] Automated dataset creation based on a USGS and CODWR station
		- [x] GitHub actions ML training
		- [x] Mobile artifacts published per training run (CoreML + TFLite + manifest) — see [docs/INFERENCE.md](docs/INFERENCE.md)