diff --git a/.github/workflows/refresh-profile.yml b/.github/workflows/refresh-profile.yml index a177a2d..519d60a 100644 --- a/.github/workflows/refresh-profile.yml +++ b/.github/workflows/refresh-profile.yml @@ -37,7 +37,7 @@ jobs: - name: Collect metrics env: GITHUB_TOKEN: ${{ secrets.SCORE_BOT_PAT }} - run: uv run generate-repo-overview collect + run: uv run generate-repo-overview collect --org-config org_config.toml - name: Render overview (MD) run: uv run generate-repo-overview render-overview diff --git a/AGENTS.md b/AGENTS.md index 3050c44..98bc233 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -6,7 +6,7 @@ Context file for AI coding assistants. See `docs/repo-overview-tool-design.md` f ```sh uv sync --all-groups --frozen # install deps -uv run generate-repo-overview collect # GitHub API → snapshot JSON +uv run generate-repo-overview collect --org-config org_config.toml # GitHub API → snapshot JSON uv run generate-repo-overview render-overview # snapshot → profile/README.md uv run generate-repo-overview render-details # snapshot → _site/ (index + per-repo pages) uv run pytest # run tests @@ -17,14 +17,18 @@ uv run basedpyright src/ # type check ## Key files for website work ``` +org_config.toml — organization-specific settings (org name, tracked deps, workflow signals) src/generate_repo_overview/ + org_config.py — loads and validates org_config.toml metrics_html.py — HTML renderer (index + per-repo detail pages) metrics_report.py — shared helpers: grouping, version comparison, badges - models.py — RepoEntry, RepoSnapshot, signal dataclasses + models.py — RepoEntry, RepoSnapshot, TrackedDep, WorkflowSignal dataclasses cli.py — render-details writes all pages from render_all_pages() constants.py — default paths (DEFAULT_METRICS_HTML_OUTPUT = _site/) tests/ test_cli_render.py — render output tests + test_org_config.py — org config loading/validation tests + test_repo_overview.py — collector and snapshot round-trip tests ``` ## Website rendering notes diff --git a/CLAUDE.md b/CLAUDE.md index 8f62ad3..43c994c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1 +1 @@ -See [AGENTS.md](AGENTS.md) for project context, key files, and dev commands. +@AGENTS.md diff --git a/README.md b/README.md index c53fcfd..3c559d4 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ uv run generate-repo-overview render-details For a fresh GitHub pull before rendering, run: ```sh -uv run generate-repo-overview collect +uv run generate-repo-overview collect --org-config org_config.toml ``` By default, `collect` now does a cache-aware refresh: it checks fast, high-level diff --git a/docs/repo-overview-tool-design.md b/docs/repo-overview-tool-design.md index 355c665..950b082 100644 --- a/docs/repo-overview-tool-design.md +++ b/docs/repo-overview-tool-design.md @@ -11,15 +11,17 @@ The tool is split into three layers: -1. `collector/` +1. `org_config.py` + - Loads organization-specific settings from `org_config.toml`: org name, repo include patterns, tracked Bazel deps, workflow signals, reference integration repo, and registry repo. +2. `collector/` - Connects to GitHub. - Loads active repositories and custom properties. - - Derives content-based signals such as `has_ci`, `has_lint_config`, `has_coverage_config`, `bazel_version`, and `referenced_by_reference_integration`. + - Derives content-based signals such as `has_ci`, `has_lint_config`, `has_coverage_config`, `bazel_version`, `matched_workflow_signals`, `bazel_deps`, and `referenced_by_reference_integration`. - Writes and reads a local JSON snapshot cache. -2. `profile_readme.py`, `metrics_report.py`, `metrics_html.py` (with `_html_index.py`, `_html_detail.py`, `_html_common.py`) +3. `profile_readme.py`, `metrics_report.py`, `metrics_html.py` (with `_html_index.py`, `_html_detail.py`, `_html_common.py`) - Render different views (Markdown and HTML) from the same normalized data model. - Keep presentation decisions out of the collection layer. -3. `cli.py` +4. `cli.py` - Orchestrates cache-aware commands: `collect`, `render-overview`, and `render-details`. ## Data Model @@ -32,6 +34,8 @@ The shared model lives in `models.py`. - organization name - generation timestamp - normalized repositories + - tracked Bazel dependency definitions (`tracked_deps`) + - workflow signal definitions (`workflow_signals`) The snapshot is intentionally renderer-agnostic. It stores neutral values such as booleans and plain strings rather than Markdown-specific markers. @@ -78,8 +82,9 @@ uv run generate-repo-overview Built-in commands: -- `collect` +- `collect --org-config org_config.toml` - Sync the cached snapshot from GitHub and write it to disk. + - Requires `--org-config` pointing to a TOML file with organization-specific settings. - Use `--deep` to force a full refresh for every repository instead of reusing cached signals for unchanged ones. - `render-overview` - Render the profile README from an existing snapshot. diff --git a/org_config.toml b/org_config.toml new file mode 100644 index 0000000..7c9ff66 --- /dev/null +++ b/org_config.toml @@ -0,0 +1,40 @@ +# Organization configuration for generate-repo-overview. +# +# This file defines which GitHub organization to scan and how to detect +# repository signals. The tool requires this file via --org-config. +# +# For a different organization, copy this file and adjust the values. + +org_name = "eclipse-score" + +# Optional fnmatch glob patterns to limit which repositories are included. +# When empty (or omitted), all non-archived repositories in the org are included. +# Example: repo_include_patterns = ["my-prefix-*", "another-repo"] +# repo_include_patterns = [] + +[signals] + +# Repository (org/repo) that serves as the reference integration project. +# Its MODULE.bazel is parsed to determine which other repositories are +# referenced as Bazel dependencies. +# Leave empty if there is no reference integration repository. +reference_integration_repo = "eclipse-score/reference_integration" + +# Full "org/repo" path to the Bazel registry repository. +# Leave empty if there is no Bazel registry. +registry_repo = "eclipse-score/bazel_registry" + +# Tracked Bazel dependencies. Each entry maps a source repository (org/repo) +# to its Bazel module name. The tool looks up each module_name in every +# repository's MODULE.bazel to report dependency versions. +[[signals.tracked_deps]] +repo = "eclipse-score/docs-as-code" +module_name = "score_docs_as_code" + +# Named workflow signals. Each entry has a human-readable label shown in the +# metrics output, and a reference string to search for inside +# .github/workflows/*.yml files. A repository is flagged for a signal when +# any of its workflow files contain the reference string. +[[signals.workflow_signals]] +label = "Daily Workflow" +reference = "eclipse-score/cicd-workflows/.github/workflows/daily.yml@" diff --git a/src/generate_repo_overview/README.md b/src/generate_repo_overview/README.md index 96e1369..8f45aff 100644 --- a/src/generate_repo_overview/README.md +++ b/src/generate_repo_overview/README.md @@ -38,8 +38,10 @@ This document explains the package structure and cache behavior. It intentionall - Renders the main HTML metrics dashboard (tabs, filters, sortable columns). - `_html_detail.py` - Renders per-repository HTML detail pages. +- `org_config.py` + - Loads `org_config.toml`: org name, repo include patterns, tracked Bazel deps, workflow signals. - `constants.py` - - Centralizes default org, cache, and output paths. + - Centralizes default cache and output paths. - `console.py` - Keeps status output formatting in one place. @@ -70,6 +72,8 @@ That file stores a serialized `RepoSnapshot` containing: - organization name - generation timestamp - all normalized repositories +- tracked Bazel dependency definitions (`tracked_deps`) +- workflow signal definitions (`workflow_signals`) The cache loader only accepts the current schema version. If the snapshot schema does not match, the cache is treated as unusable and collection falls back to a fresh GitHub fetch. @@ -106,14 +110,13 @@ For each repository, the snapshot currently stores: - `is_bazel_repo` - `bazel_version` - `codeowners` - - `docs_as_code_version` - `referenced_by_reference_integration` - `has_lint_config` - `has_gitlint_config` - `has_pyproject_toml` - `has_pre_commit_config` - `has_ci` - - `uses_cicd_daily_workflow` + - `matched_workflow_signals` - `has_coverage_config` - `top_languages` - `bazel_deps` diff --git a/src/generate_repo_overview/_html_common.py b/src/generate_repo_overview/_html_common.py index f802ab6..c00fd1c 100644 --- a/src/generate_repo_overview/_html_common.py +++ b/src/generate_repo_overview/_html_common.py @@ -81,9 +81,15 @@ def version_badge( version: str | None, max_bazel: tuple[int, ...] | None, *, - latest_dac: str | None, + latest_dep_version: str | None, is_bazel: bool, ) -> str: + """Render a colored version badge span. + + Bazel versions are green when equal to *max_bazel*, red otherwise. + Dep versions compare against *latest_dep_version*: green if equal, + yellow if same major.minor, red if older, muted if unknown. + """ if version is None or not version.strip(): return '' @@ -95,9 +101,9 @@ def version_badge( return f'{e(cleaned)}' return f'{e(cleaned)}' - if latest_dac is None: + if latest_dep_version is None: return f'{e(cleaned)}' - latest_cleaned = latest_dac.strip() + latest_cleaned = latest_dep_version.strip() if cleaned == latest_cleaned: return f'{e(cleaned)}' if parsed is not None: diff --git a/src/generate_repo_overview/_html_detail.py b/src/generate_repo_overview/_html_detail.py index e9df6f4..732ab83 100644 --- a/src/generate_repo_overview/_html_detail.py +++ b/src/generate_repo_overview/_html_detail.py @@ -3,6 +3,7 @@ from typing import TYPE_CHECKING from ._html_common import BAZEL_ICON, CSS, GITHUB_ICON, e, language_badge, version_badge +from .metrics_report import tracked_dep_label if TYPE_CHECKING: from .models import RepoEntry, RepoSnapshot @@ -10,11 +11,11 @@ def render_detail_page( entry: RepoEntry, - org_name: str, snapshot: RepoSnapshot, max_bazel: tuple[int, ...] | None, - latest_dac: str | None, + latest_dep_versions: dict[str, str | None], ) -> str: + org_name = snapshot.org_name return ( "\n" '\n\n' @@ -27,9 +28,9 @@ def render_detail_page( + _render_stat_grid(entry) + _render_release_section(entry) + _render_dep_diff_section(entry) - + _render_tooling_section(entry) + + _render_tooling_section(entry, snapshot) + _render_ownership_section(entry) - + _render_versions_section(entry, max_bazel, latest_dac) + + _render_versions_section(entry, snapshot, max_bazel, latest_dep_versions) + _render_footer(snapshot) + "\n\n" ) @@ -231,18 +232,21 @@ def _dep_status_badge(status: str, css_class: str) -> str: return f'{e(status)}' -def _render_tooling_section(entry: RepoEntry) -> str: +def _render_tooling_section(entry: RepoEntry, snapshot: RepoSnapshot) -> str: c = entry.content - signals = [ + signals: list[tuple[bool, str]] = [ (c.has_ci, "GitHub Actions (CI)"), - (c.uses_cicd_daily_workflow, "Daily Workflow"), + ] + for label in (s.label for s in snapshot.workflow_signals): + signals.append((label in c.matched_workflow_signals, label)) + signals.extend([ (c.has_lint_config, "Lint Config"), (c.has_gitlint_config, "Gitlint"), (c.has_pre_commit_config, "Pre-commit"), (c.has_pyproject_toml, "pyproject.toml"), (c.has_coverage_config, "Coverage Config"), (c.is_bazel_repo, "Bazel Repo"), - ] + ]) items = "\n".join( f'
' @@ -288,26 +292,31 @@ def _render_ownership_section(entry: RepoEntry) -> str: def _render_versions_section( entry: RepoEntry, + snapshot: RepoSnapshot, max_bazel: tuple[int, ...] | None, - latest_dac: str | None, + latest_dep_versions: dict[str, str | None], ) -> str: + from .models import lookup_bazel_dep_version + items: list[str] = [] bazel_badge = version_badge( - entry.content.bazel_version, max_bazel, latest_dac=None, is_bazel=True + entry.content.bazel_version, max_bazel, latest_dep_version=None, is_bazel=True ) items.append( f'
' f'
Bazel Version
{bazel_badge}
' ) - dac_badge = version_badge( - entry.content.docs_as_code_version, None, latest_dac=latest_dac, is_bazel=False - ) - items.append( - f'
' - f'
Docs-As-Code Version
{dac_badge}
' - ) + for dep in snapshot.tracked_deps: + dep_label = tracked_dep_label(dep) + dep_ver = lookup_bazel_dep_version(entry.content.bazel_deps, dep.module_name) + latest_ver = latest_dep_versions.get(dep.module_name) + badge = version_badge(dep_ver, None, latest_dep_version=latest_ver, is_bazel=False) + items.append( + f'
' + f'
{e(dep_label)} Version
{badge}
' + ) refint = ( 'yes' diff --git a/src/generate_repo_overview/_html_index.py b/src/generate_repo_overview/_html_index.py index cead6d9..8e2e24f 100644 --- a/src/generate_repo_overview/_html_index.py +++ b/src/generate_repo_overview/_html_index.py @@ -14,15 +14,18 @@ version_badge, ) from .metrics_report import ( - get_latest_docs_as_code_release, + get_latest_tracked_dep_version, get_max_bazel_version, group_repos_by_category, has_latest_release, parse_version_key, + tracked_dep_label, ) if TYPE_CHECKING: - from .models import RepoEntry, RepoSnapshot + from .models import RepoEntry, RepoSnapshot, TrackedDep + +from .models import is_tracked_dep_repo _INDEX_JS = (Path(__file__).parent / "templates" / "index.js").read_text( encoding="utf-8" @@ -45,9 +48,9 @@ def render_index_page(snapshot: RepoSnapshot) -> str: + _render_filters_placeholder() + '
\n' + _render_overview_sections(categories, snapshot.org_name) - + _render_versions_sections(categories, repos, snapshot.org_name) - + _render_automation_sections(categories, snapshot.org_name) - + _render_traceability_section(repos, snapshot.org_name) + + _render_versions_sections(categories, snapshot) + + _render_automation_sections(categories, snapshot) + + _render_traceability_section(repos, snapshot) + "
\n" + _render_footer(snapshot) + _render_script(categories) @@ -246,12 +249,7 @@ def _render_release(version: str | None, commits_since: int | None) -> str: ) -_DAC_DEP_NAME = "score_docs_as_code" -_DAC_REPO_NAME = "docs-as-code" - -def _is_docs_as_code_repo(entry: RepoEntry) -> bool: - return bool(entry.content.docs_as_code_version) or entry.name == _DAC_REPO_NAME def _build_version_tooltip( @@ -375,15 +373,26 @@ def _render_dep_changes( def _render_versions_sections( categories: list[tuple[str, list[RepoEntry]]], - repos: list[RepoEntry], - org_name: str, + snapshot: RepoSnapshot, ) -> str: + repos = sorted(snapshot.repos, key=lambda r: r.name.casefold()) max_bazel = get_max_bazel_version(repos) - latest_dac = get_latest_docs_as_code_release(repos) + tracked_deps = snapshot.tracked_deps + latest_dep_versions = { + dep.module_name: get_latest_tracked_dep_version(repos, dep) + for dep in tracked_deps + } + org_name = snapshot.org_name parts: list[str] = [] for category, cat_repos in categories: rows = "\n".join( - _versions_row(r, org_name, max_bazel, latest_dac) for r in cat_repos + _versions_row(r, org_name, max_bazel, tracked_deps, latest_dep_versions) + for r in cat_repos + ) + dep_headers = "".join( + f' ' + f'{e(tracked_dep_label(dep))} Version \n' + for i, dep in enumerate(tracked_deps) ) parts.append( f'