Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 125 additions & 0 deletions .github/scripts/check_data_release_version.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
"""Require pyproject.toml to track the latest finalized HF data release."""

from __future__ import annotations

import argparse
import json
import os
from pathlib import Path
import re
import sys
from urllib.error import URLError
from urllib.request import urlopen


REPO_ROOT = Path(__file__).resolve().parents[2]
DEFAULT_VERSION_MANIFEST_URL = (
"https://huggingface.co/policyengine/policyengine-us-data/"
"resolve/main/version_manifest.json"
)
VERSION_RE = re.compile(r'^version\s*=\s*"([^"]+)"', re.MULTILINE)
SEMVER_RE = re.compile(r"^(\d+)\.(\d+)\.(\d+)(?:rc\d+)?$")


def stable_version_tuple(version: str) -> tuple[int, int, int]:
match = SEMVER_RE.match(version)
if not match:
raise ValueError(f"Unsupported version format: {version}")
return tuple(int(part) for part in match.groups())


def pyproject_version(root: Path = REPO_ROOT) -> str:
text = (root / "pyproject.toml").read_text()
match = VERSION_RE.search(text)
if not match:
raise ValueError("Could not find project version in pyproject.toml")
return match.group(1)


def latest_hf_release_version(
url: str = DEFAULT_VERSION_MANIFEST_URL,
) -> str:
with urlopen(url, timeout=30) as response:
payload = json.load(response)
current = payload.get("current")
if isinstance(current, str) and current:
return current
versions = payload.get("versions")
if not isinstance(versions, list) or not versions:
raise ValueError("HF version_manifest.json has no current version")
latest = versions[-1].get("version")
if not isinstance(latest, str) or not latest:
raise ValueError("HF version_manifest.json latest entry has no version")
return latest


def version_violations(
*,
package_version: str,
finalized_release_version: str,
) -> list[str]:
if stable_version_tuple(package_version) >= stable_version_tuple(
finalized_release_version
):
return []
return [
"pyproject.toml version "
f"{package_version} is behind finalized HF data release "
f"{finalized_release_version}. Finalize the package version before "
"creating another publication candidate."
]


def check_repository(
root: Path = REPO_ROOT,
*,
finalized_release_version: str | None = None,
version_manifest_url: str = DEFAULT_VERSION_MANIFEST_URL,
) -> list[str]:
package_version = pyproject_version(root)
finalized_release_version = finalized_release_version or latest_hf_release_version(
version_manifest_url
)
return version_violations(
package_version=package_version,
finalized_release_version=finalized_release_version,
)


def main(argv: list[str] | None = None) -> int:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--mode",
choices=("warn", "fail"),
default="fail",
help="Whether stale versions should fail the command.",
)
parser.add_argument(
"--version-manifest-url",
default=os.environ.get(
"US_DATA_VERSION_MANIFEST_URL", DEFAULT_VERSION_MANIFEST_URL
),
)
args = parser.parse_args(argv)

try:
violations = check_repository(
version_manifest_url=args.version_manifest_url,
)
except (URLError, OSError, ValueError) as exc:
print(
f"Could not check finalized HF data release version: {exc}", file=sys.stderr
)
return 1 if args.mode == "fail" else 0

if not violations:
print("Data package version is current with the latest finalized HF release.")
return 0

for violation in violations:
print(violation, file=sys.stderr)
return 1 if args.mode == "fail" else 0


if __name__ == "__main__":
sys.exit(main())
3 changes: 3 additions & 0 deletions .github/workflows/pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ jobs:
POLICYENGINE_US_ALLOW_STALE: ${{ inputs.allow_stale_policyengine_us }}
run: python .github/scripts/check_policyengine_us_dependency.py --mode fail

- name: Require pyproject.toml to match finalized HF release base
run: python .github/scripts/check_data_release_version.py --mode fail

- name: Deploy and launch pipeline on Modal
env:
MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
Expand Down
13 changes: 13 additions & 0 deletions .github/workflows/push.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,17 @@ jobs:
- name: Require current PolicyEngine US dependency
run: python .github/scripts/check_policyengine_us_dependency.py --mode fail

data-release-version:
name: Data release version
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- uses: actions/setup-python@v6
with:
python-version: "3.14"
- name: Require pyproject.toml to match finalized HF release base
run: python .github/scripts/check_data_release_version.py --mode fail

# ── Documentation ──────────────────────────────────────────
docs:
name: Documentation
Expand Down Expand Up @@ -80,6 +91,7 @@ jobs:
needs:
- run-context
- policyengine-us-freshness
- data-release-version
if: |
github.event.head_commit.message != 'Update publication candidate' &&
github.event.head_commit.message != 'Finalize package version'
Expand Down Expand Up @@ -126,6 +138,7 @@ jobs:
- lint
- run-context
- policyengine-us-freshness
- data-release-version
if: github.event.head_commit.message == 'Update publication candidate'
permissions:
actions: write
Expand Down
1 change: 1 addition & 0 deletions changelog.d/data-release-version-guard.changed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Ensure publication candidates fail before launch when the package version lags the latest finalized data release.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "policyengine_us_data"
version = "1.115.2"
version = "1.115.3"
description = "A package to create representative microdata for the US."
readme = "README.md"
authors = [
Expand Down
32 changes: 32 additions & 0 deletions tests/unit/test_publication_scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,38 @@ def test_policyengine_us_dependency_check_allow_stale_keeps_local_errors_fatal(
assert module.main() == 1


def test_data_release_version_check_passes_at_latest_release(tmp_path):
module = _load_script(
".github/scripts/check_data_release_version.py",
"check_data_release_version_current_test",
)
_write_pyproject(tmp_path, "1.115.3")

assert (
module.check_repository(
tmp_path,
finalized_release_version="1.115.3",
)
== []
)


def test_data_release_version_check_flags_stale_package(tmp_path):
module = _load_script(
".github/scripts/check_data_release_version.py",
"check_data_release_version_stale_test",
)
_write_pyproject(tmp_path, "1.115.2")

violations = module.check_repository(
tmp_path,
finalized_release_version="1.115.3",
)

assert any("1.115.2" in violation for violation in violations)
assert any("1.115.3" in violation for violation in violations)


def test_restore_publication_changelog_restores_candidate_snapshot(
tmp_path,
monkeypatch,
Expand Down
2 changes: 1 addition & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading