diff --git a/pyproject.toml b/pyproject.toml index 94b9d90c..e2b2b6c1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,6 +59,7 @@ where = ["src"] agentready = [ "data/*.md", "data/*.yaml", + "data/*.arsrc", "data/.agentready-config.example.yaml", "prompts/*.md", "templates/*.j2", diff --git a/src/agentready/assessors/structure.py b/src/agentready/assessors/structure.py index 605a1be5..9c0bcb65 100644 --- a/src/agentready/assessors/structure.py +++ b/src/agentready/assessors/structure.py @@ -1,6 +1,11 @@ """Structure assessors for project layout and separation of concerns.""" import re +import tomllib +import warnings +from functools import lru_cache +from pathlib import Path +from typing import Literal, TypedDict from ..models.attribute import Attribute from ..models.finding import Citation, Finding, Remediation @@ -8,10 +13,81 @@ from .base import BaseAssessor +class SourceDirectoryInfo(TypedDict): + """Type-safe return value for _find_source_directory method.""" + + found: bool + type: Literal["src", "project-named", "heuristic", "none"] + directory: str + + +def _get_data_dir() -> Path: + """Get the path to the data directory containing config files.""" + return Path(__file__).parent.parent / "data" + + +@lru_cache(maxsize=8) +def _load_arsrc_file(filename: str) -> frozenset[str]: + """Load directory names from an .arsrc config file. + + Args: + filename: Name of the .arsrc file (e.g., "Python.arsrc") + + Returns: + Frozenset of directory names to exclude from source detection. + Returns empty set if file not found. + + File format: + - One directory name per line + - Lines starting with # are comments + - Empty lines are ignored + """ + config_path = _get_data_dir() / filename + if not config_path.exists(): + warnings.warn( + f"Config file {filename} not found at {config_path}. " + "Blocklist will be empty, which may cause false positives in " + "source directory detection. This usually indicates a packaging issue.", + UserWarning, + stacklevel=2, + ) + return frozenset() + + entries = set() + try: + with open(config_path, encoding="utf-8") as f: + for line in f: + line = line.strip() + # Skip empty lines and comments + if not line or line.startswith("#"): + continue + entries.add(line) + except OSError: + return frozenset() + + return frozenset(entries) + + +def _get_non_source_dirs() -> frozenset[str]: + """Get the set of directories that should not be considered source directories. + + Loads from Python.arsrc config file. Falls back to empty set if not found. + """ + return _load_arsrc_file("Python.arsrc") + + class StandardLayoutAssessor(BaseAssessor): """Assesses standard project layout patterns. Tier 1 Essential (10% weight) - Standard layouts help AI navigate code. + + Supports multiple valid Python project structures: + - src/ layout (PEP 517 recommended) + - Project-named flat layout (e.g., pandas/pandas/, numpy/numpy/) + - Test-only repositories (marked as not_applicable) + + Fix for #246: Recognize project-named source directories + Fix for #305: Handle test-only repositories gracefully """ @property @@ -30,7 +106,7 @@ def attribute(self) -> Attribute: category="Repository Structure", tier=self.tier, description="Follows standard project structure for language", - criteria="Standard directories (src/, tests/, docs/) present", + criteria="Standard directories (src/ or project-named, tests/) present", default_weight=0.10, ) @@ -38,23 +114,37 @@ def assess(self, repository: Repository) -> Finding: """Check for standard project layout directories. Expected patterns: - - Python: src/, tests/, docs/ + - Python: src/ or project-named directory, plus tests/ - JavaScript: src/, test/, docs/ - Java: src/main/java, src/test/java - """ - # Check for common standard directories - standard_dirs = { - "src": repository.path / "src", - } + Fix for #246, #305: Support multiple valid Python layouts + """ # Check for tests directory (either tests/ or test/) tests_path = repository.path / "tests" - if not tests_path.exists(): + has_tests = tests_path.exists() + if not has_tests: tests_path = repository.path / "test" - standard_dirs["tests"] = tests_path + has_tests = tests_path.exists() + + # Check for source directory: src/ or project-named + # Fix for #246: Detect project-named source directories + source_info = self._find_source_directory(repository) + has_source = source_info["found"] + source_type = source_info["type"] + source_dir = source_info["directory"] + + # Fix for #305: Handle test-only repositories + if not has_source and has_tests: + if self._is_test_only_repository(repository): + return Finding.not_applicable( + self.attribute, + reason="Test-only repository (no source code to organize)", + ) - found_dirs = sum(1 for d in standard_dirs.values() if d.exists()) - required_dirs = len(standard_dirs) + # Calculate score based on what we found + found_dirs = (1 if has_source else 0) + (1 if has_tests else 0) + required_dirs = 2 score = self.calculate_proportional_score( measured_value=found_dirs, @@ -64,10 +154,22 @@ def assess(self, repository: Repository) -> Finding: status = "pass" if score >= 75 else "fail" + # Build evidence with detailed source directory info + if has_source: + if source_type == "src": + source_evidence = "src/: ✓" + elif source_type == "heuristic": + # Strategy 3 match: found via directory scan, not name match + source_evidence = f"source (heuristic): ✓ ({source_dir}) — verify" + else: + source_evidence = f"source ({source_type}): ✓ ({source_dir})" + else: + source_evidence = "source directory: ✗ (no src/ or project-named dir)" + evidence = [ f"Found {found_dirs}/{required_dirs} standard directories", - f"src/: {'✓' if (repository.path / 'src').exists() else '✗'}", - f"tests/: {'✓' if (repository.path / 'tests').exists() or (repository.path / 'test').exists() else '✗'}", + source_evidence, + f"tests/: {'✓' if has_tests else '✗'}", ] return Finding( @@ -77,34 +179,231 @@ def assess(self, repository: Repository) -> Finding: measured_value=f"{found_dirs}/{required_dirs} directories", threshold=f"{required_dirs}/{required_dirs} directories", evidence=evidence, - remediation=self._create_remediation() if status == "fail" else None, + remediation=( + self._create_remediation(has_source, has_tests) + if status == "fail" + else None + ), error_message=None, ) - def _create_remediation(self) -> Remediation: - """Create remediation guidance for standard layout.""" + def _find_source_directory(self, repository: Repository) -> SourceDirectoryInfo: + """Find the source directory using multiple strategies. + + Fix for #246: Support both src/ layout and project-named layout. + + Returns: + SourceDirectoryInfo with keys: + - found: bool - whether a source directory was found + - type: "src", "project-named", "heuristic", or "none" + - directory: str - name of the source directory + """ + # Strategy 1: Check for src/ directory (PEP 517 recommended) + if (repository.path / "src").exists(): + return {"found": True, "type": "src", "directory": "src/"} + + # Strategy 2: Look for project-named directory from pyproject.toml + # Only use project-named detection when pyproject.toml exists to avoid + # false positives from migrations/, config/, etc. + pyproject_exists = (repository.path / "pyproject.toml").exists() + package_name = self._get_package_name_from_pyproject(repository) + + if package_name: + # Normalize package name (replace hyphens with underscores) + normalized_name = package_name.replace("-", "_") + package_dir = repository.path / normalized_name + if package_dir.exists() and (package_dir / "__init__.py").exists(): + return { + "found": True, + "type": "project-named", + "directory": f"{normalized_name}/", + } + + # Strategy 3: pyproject.toml exists but no matching project-named directory. + # Look for any directory with __init__.py at root level that isn't in the + # blocklist. Only do this when pyproject.toml exists to avoid false positives. + # Returns first match alphabetically. + # Mark as "heuristic" so evidence shows this is a best-guess match. + # Fix: Run this whenever pyproject.toml exists, not just when name is found. + if pyproject_exists: + for item in sorted(repository.path.iterdir(), key=lambda p: p.name): + if not item.is_dir(): + continue + if item.name.startswith("."): + continue + if item.name.lower() in _get_non_source_dirs(): + continue + if (item / "__init__.py").exists(): + return { + "found": True, + "type": "heuristic", + "directory": f"{item.name}/", + } + + return {"found": False, "type": "none", "directory": ""} + + def _get_package_name_from_pyproject(self, repository: Repository) -> str | None: + """Extract package name from pyproject.toml. + + Supports both PEP 621 [project].name and Poetry [tool.poetry].name. + + Returns: + Package name string or None if not found. + """ + pyproject_path = repository.path / "pyproject.toml" + if not pyproject_path.exists(): + return None + + try: + with open(pyproject_path, "rb") as f: + data = tomllib.load(f) + + # PEP 621 format: [project].name + if "project" in data and "name" in data["project"]: + return data["project"]["name"] + + # Poetry format: [tool.poetry].name + if ( + "tool" in data + and "poetry" in data["tool"] + and "name" in data["tool"]["poetry"] + ): + return data["tool"]["poetry"]["name"] + + except (OSError, tomllib.TOMLDecodeError): + # If we can't read pyproject.toml, fall back to other strategies + pass + + return None + + def _is_test_only_repository(self, repository: Repository) -> bool: + """Detect if this is a test-only repository. + + A test-only repository has: + - tests/ or test/ directory + - No source directory (src/ or project-named) + - Strong indicators it's dedicated to tests + + Detection strategy (conservative to avoid false skips): + 1. Name pattern: repo name contains "test", "tests", "testing", "spec", "specs" + as a word boundary (not substring like "testimonial") + 2. Config-only signal: has conftest.py/pytest.ini AND no pyproject.toml + (mixed projects typically have pyproject.toml) + + Note: conftest.py and pytest.ini alone are NOT reliable indicators since + mixed projects (source + tests) commonly have these at the root. + + Returns: + True if this appears to be a test-only repository. + """ + # Strategy 1: Name strongly suggests test-only repo + # Word-boundary matching avoids false positives like "testimonial-service" + name_suggests_tests = bool( + re.search( + r"(^|[-_.])(?:test|tests|testing|spec|specs)($|[-_.])", + repository.name.lower(), + ) + ) + + if name_suggests_tests: + return True + + # Strategy 2: Has test config files but NO pyproject.toml + # Test-only repos rarely have pyproject.toml with [project] section. + # Mixed projects (source + tests) typically DO have pyproject.toml. + has_pyproject = (repository.path / "pyproject.toml").exists() + if has_pyproject: + # If pyproject.toml exists, this is likely a mixed project, + # not a test-only repo. Don't mark as test-only just because + # it has conftest.py or pytest.ini. + return False + + # No pyproject.toml: check for test-specific config files + test_config_files = [ + repository.path / "conftest.py", + repository.path / "pytest.ini", + ] + has_test_config = any(f.exists() for f in test_config_files) + + return has_test_config + + def _create_remediation(self, has_source: bool, has_tests: bool) -> Remediation: + """Create context-aware remediation guidance for standard layout. + + Fix for #246: Provide guidance appropriate to the project type. + """ + steps = [] + commands = [] + + if not has_source: + steps.extend( + [ + "Create a source directory for your code", + "Option A: Use src/ layout (recommended for packages)", + "Option B: Use project-named directory (e.g., mypackage/)", + "Ensure your package has __init__.py", + ] + ) + commands.extend( + [ + "# Option A: src layout", + "mkdir -p src/mypackage", + "touch src/mypackage/__init__.py", + "# ---", + "# Option B: flat layout (project-named)", + "mkdir -p mypackage", + "touch mypackage/__init__.py", + ] + ) + + if not has_tests: + steps.extend( + [ + "Create tests/ directory for test files", + "Add at least one test file", + ] + ) + commands.extend( + [ + "# Create tests directory", + "mkdir -p tests", + "touch tests/__init__.py", + "touch tests/test_example.py", + ] + ) + return Remediation( - summary="Organize code into standard directories (src/, tests/, docs/)", - steps=[ - "Create src/ directory for source code", - "Create tests/ directory for test files", - "Create docs/ directory for documentation", - "Move source code into src/", - "Move tests into tests/", - ], + summary="Organize code into standard directories", + steps=steps, tools=[], - commands=[ - "mkdir -p src tests docs", - "# Move source files to src/", - "# Move test files to tests/", + commands=commands, + examples=[ + """# src layout (recommended for distributable packages) +project/ +├── src/ +│ └── mypackage/ +│ ├── __init__.py +│ └── module.py +├── tests/ +│ └── test_module.py +└── pyproject.toml + +# flat layout (common in major projects like pandas, numpy) +project/ +├── mypackage/ +│ ├── __init__.py +│ └── module.py +├── tests/ +│ └── test_module.py +└── pyproject.toml +""", ], - examples=[], citations=[ Citation( source="Python Packaging Authority", - title="Python Project Structure", - url="https://packaging.python.org/en/latest/tutorials/packaging-projects/", - relevance="Standard Python project layout", + title="src layout vs flat layout", + url="https://packaging.python.org/en/latest/discussions/src-layout-vs-flat-layout/", + relevance="Official guidance on Python project layouts", ) ], ) diff --git a/src/agentready/data/Python.arsrc b/src/agentready/data/Python.arsrc new file mode 100644 index 00000000..7e5d91ad --- /dev/null +++ b/src/agentready/data/Python.arsrc @@ -0,0 +1,70 @@ +# Python Non-Source Directories +# These directories should not be considered as primary source directories +# when detecting project layout. One entry per line. +# Lines starting with # are comments. Empty lines are ignored. +# +# Format follows .gitignore conventions (partial support): +# - One directory name per line +# - Comments start with # +# - Empty lines are ignored +# +# Future: Full gitignore syntax support (wildcards, negation) planned. +# See: https://github.com/github/gitignore + +# Test directories +tests +test +fixtures +benchmarks + +# Documentation +docs +doc +documentation + +# Scripts and utilities +scripts +utilities +utils +tools +examples +samples + +# Database migrations (generic name only) +migrations + +# Configuration directories +config +settings +conf + +# Web framework static directories (not source code) +static +assets +templates +locale +i18n + +# Data and resources +data +resources + +# CI/CD +ci +.circleci + +# Hidden/build directories +.git +.github +.venv +venv +env +node_modules +__pycache__ +.tox +.pytest_cache +.mypy_cache +build +dist +htmlcov +.eggs diff --git a/tests/unit/test_assessors_structure.py b/tests/unit/test_assessors_structure.py index fc51324b..08e6c0ae 100644 --- a/tests/unit/test_assessors_structure.py +++ b/tests/unit/test_assessors_structure.py @@ -136,3 +136,692 @@ def test_evidence_shows_both_test_variants(self, tmp_path): evidence_str = " ".join(finding.evidence) assert "tests/" in evidence_str or "test/" in evidence_str assert "✓" in evidence_str # Should show checkmark for test dir + + # === Tests for issue #246: Project-named directory support === + + def test_recognizes_project_named_directory_with_pyproject(self, tmp_path): + """Test that assessor recognizes project-named directory from pyproject.toml. + + Fix for #246: Project-named directories like pandas/pandas/ should pass. + """ + git_dir = tmp_path / ".git" + git_dir.mkdir() + + # Create project-named directory with __init__.py + (tmp_path / "mypackage").mkdir() + (tmp_path / "mypackage" / "__init__.py").touch() + (tmp_path / "tests").mkdir() + + # Create pyproject.toml with project name + pyproject = tmp_path / "pyproject.toml" + pyproject.write_text('[project]\nname = "mypackage"\n') + + repo = Repository( + path=tmp_path, + name="mypackage", + url=None, + branch="main", + commit_hash="abc123", + languages={"Python": 100}, + total_files=10, + total_lines=100, + ) + + assessor = StandardLayoutAssessor() + finding = assessor.assess(repo) + + assert finding.status == "pass" + assert finding.score == 100.0 + assert "project-named" in " ".join(finding.evidence) + + def test_recognizes_project_named_directory_with_hyphens(self, tmp_path): + """Test that hyphens in package name are converted to underscores. + + Fix for #246: my-package in pyproject.toml should match my_package/ dir. + """ + git_dir = tmp_path / ".git" + git_dir.mkdir() + + # Create directory with underscores (Python convention) + (tmp_path / "my_package").mkdir() + (tmp_path / "my_package" / "__init__.py").touch() + (tmp_path / "tests").mkdir() + + # pyproject.toml uses hyphens (common convention) + pyproject = tmp_path / "pyproject.toml" + pyproject.write_text('[project]\nname = "my-package"\n') + + repo = Repository( + path=tmp_path, + name="my-package", + url=None, + branch="main", + commit_hash="abc123", + languages={"Python": 100}, + total_files=10, + total_lines=100, + ) + + assessor = StandardLayoutAssessor() + finding = assessor.assess(repo) + + assert finding.status == "pass" + assert finding.score == 100.0 + + def test_project_named_directory_without_pyproject_fails(self, tmp_path): + """Test that project-named directory without pyproject.toml fails. + + Per PR review feedback: Strategy 3 requires pyproject.toml to exist + to prevent false positives on arbitrary repos with Python packages. + """ + git_dir = tmp_path / ".git" + git_dir.mkdir() + + # Create a package directory without pyproject.toml + (tmp_path / "coolpackage").mkdir() + (tmp_path / "coolpackage" / "__init__.py").touch() + (tmp_path / "tests").mkdir() + + repo = Repository( + path=tmp_path, + name="coolpackage", + url=None, + branch="main", + commit_hash="abc123", + languages={"Python": 100}, + total_files=10, + total_lines=100, + ) + + assessor = StandardLayoutAssessor() + finding = assessor.assess(repo) + + # Without pyproject.toml, we can't confirm this is a proper Python project + assert finding.status == "fail" + assert finding.score == 50.0 + + def test_blocklist_excludes_non_source_directories(self, tmp_path): + """Test that directories in blocklist are not considered source dirs. + + Fix for #246: utils/, scripts/, etc. should not count as source directories. + """ + git_dir = tmp_path / ".git" + git_dir.mkdir() + + # Create directories that are in the blocklist + (tmp_path / "utils").mkdir() + (tmp_path / "utils" / "__init__.py").touch() + (tmp_path / "scripts").mkdir() + (tmp_path / "scripts" / "__init__.py").touch() + (tmp_path / "tests").mkdir() + + repo = Repository( + path=tmp_path, + name="some-repo", + url=None, + branch="main", + commit_hash="abc123", + languages={"Python": 100}, + total_files=10, + total_lines=100, + ) + + assessor = StandardLayoutAssessor() + finding = assessor.assess(repo) + + # Should fail - utils/ and scripts/ are in blocklist, not valid source dirs + assert finding.status == "fail" + assert finding.score < 100.0 + + def test_src_takes_precedence_over_project_named(self, tmp_path): + """Test that src/ is preferred over project-named directory. + + Fix for #246: If both exist, src/ should be reported. + """ + git_dir = tmp_path / ".git" + git_dir.mkdir() + + # Create both src/ and project-named directory + (tmp_path / "src").mkdir() + (tmp_path / "mypackage").mkdir() + (tmp_path / "mypackage" / "__init__.py").touch() + (tmp_path / "tests").mkdir() + + repo = Repository( + path=tmp_path, + name="mypackage", + url=None, + branch="main", + commit_hash="abc123", + languages={"Python": 100}, + total_files=10, + total_lines=100, + ) + + assessor = StandardLayoutAssessor() + finding = assessor.assess(repo) + + assert finding.status == "pass" + # Evidence should show src/, not project-named + evidence_str = " ".join(finding.evidence) + assert "src/: ✓" in evidence_str + + # === Tests for issue #305: Test-only repository support === + + def test_test_only_repo_returns_not_applicable(self, tmp_path): + """Test that test-only repositories return not_applicable. + + Fix for #305: Repos with only tests/ and no source should not fail. + """ + git_dir = tmp_path / ".git" + git_dir.mkdir() + + # Create test-only structure + (tmp_path / "tests").mkdir() + (tmp_path / "conftest.py").touch() + + repo = Repository( + path=tmp_path, + name="opendatahub-tests", # Name suggests test repo + url=None, + branch="main", + commit_hash="abc123", + languages={"Python": 100}, + total_files=10, + total_lines=100, + ) + + assessor = StandardLayoutAssessor() + finding = assessor.assess(repo) + + assert finding.status == "not_applicable" + # The reason is stored in evidence, not error_message + evidence_str = " ".join(finding.evidence) if finding.evidence else "" + assert "Test-only repository" in evidence_str + + def test_test_only_repo_detected_by_name(self, tmp_path): + """Test that repos with 'test' in name are detected as test-only. + + Fix for #305: Repo name containing 'test' indicates test-only repo. + """ + git_dir = tmp_path / ".git" + git_dir.mkdir() + + (tmp_path / "tests").mkdir() + # No conftest.py, but name suggests tests + + repo = Repository( + path=tmp_path, + name="my-project-tests", + url=None, + branch="main", + commit_hash="abc123", + languages={"Python": 100}, + total_files=10, + total_lines=100, + ) + + assessor = StandardLayoutAssessor() + finding = assessor.assess(repo) + + assert finding.status == "not_applicable" + + def test_test_only_repo_detected_by_pytest_ini_without_pyproject(self, tmp_path): + """Test that repos with pytest.ini (and no pyproject.toml) are test-only. + + pytest.ini/conftest.py are only reliable test-only indicators when + pyproject.toml is absent. Mixed projects typically have pyproject.toml. + """ + git_dir = tmp_path / ".git" + git_dir.mkdir() + + (tmp_path / "tests").mkdir() + (tmp_path / "pytest.ini").touch() + # Note: no pyproject.toml + + repo = Repository( + path=tmp_path, + name="some-repo", # Generic name + url=None, + branch="main", + commit_hash="abc123", + languages={"Python": 100}, + total_files=10, + total_lines=100, + ) + + assessor = StandardLayoutAssessor() + finding = assessor.assess(repo) + + assert finding.status == "not_applicable" + + def test_pytest_ini_with_pyproject_is_not_test_only(self, tmp_path): + """Test that pytest.ini alone doesn't mark repo as test-only when pyproject.toml exists. + + Mixed projects (source + tests) commonly have both pytest.ini and pyproject.toml. + The presence of pyproject.toml suggests this is a proper Python project, not test-only. + """ + git_dir = tmp_path / ".git" + git_dir.mkdir() + + (tmp_path / "tests").mkdir() + (tmp_path / "pytest.ini").touch() + # pyproject.toml exists but no matching source directory + pyproject = tmp_path / "pyproject.toml" + pyproject.write_text('[project]\nname = "myproject"\n') + + repo = Repository( + path=tmp_path, + name="some-repo", # Generic name, not test-related + url=None, + branch="main", + commit_hash="abc123", + languages={"Python": 100}, + total_files=10, + total_lines=100, + ) + + assessor = StandardLayoutAssessor() + finding = assessor.assess(repo) + + # Should FAIL, not be marked as not_applicable + # This is a project that's missing its source directory, not a test-only repo + assert finding.status == "fail" + + def test_repo_with_tests_but_no_source_and_no_indicators_fails(self, tmp_path): + """Test that repos with tests/ but no test indicators still fail. + + Fix for #305: Only repos that look like test repos get not_applicable. + """ + git_dir = tmp_path / ".git" + git_dir.mkdir() + + (tmp_path / "tests").mkdir() + # No conftest.py, no pytest.ini, generic name + + repo = Repository( + path=tmp_path, + name="generic-project", # Doesn't suggest tests + url=None, + branch="main", + commit_hash="abc123", + languages={"Python": 100}, + total_files=10, + total_lines=100, + ) + + assessor = StandardLayoutAssessor() + finding = assessor.assess(repo) + + # Should fail because it doesn't look like a test-only repo + assert finding.status == "fail" + assert finding.remediation is not None + + # === Tests for Poetry support === + + def test_recognizes_poetry_project_name(self, tmp_path): + """Test that assessor parses [tool.poetry].name from pyproject.toml.""" + git_dir = tmp_path / ".git" + git_dir.mkdir() + + (tmp_path / "mypoetrypackage").mkdir() + (tmp_path / "mypoetrypackage" / "__init__.py").touch() + (tmp_path / "tests").mkdir() + + # Poetry-style pyproject.toml + pyproject = tmp_path / "pyproject.toml" + pyproject.write_text('[tool.poetry]\nname = "mypoetrypackage"\n') + + repo = Repository( + path=tmp_path, + name="mypoetrypackage", + url=None, + branch="main", + commit_hash="abc123", + languages={"Python": 100}, + total_files=10, + total_lines=100, + ) + + assessor = StandardLayoutAssessor() + finding = assessor.assess(repo) + + assert finding.status == "pass" + assert finding.score == 100.0 + + # === Edge case tests === + + def test_malformed_pyproject_toml_handled_gracefully(self, tmp_path): + """Test that malformed pyproject.toml doesn't crash the assessor.""" + git_dir = tmp_path / ".git" + git_dir.mkdir() + + (tmp_path / "tests").mkdir() + + # Malformed TOML + pyproject = tmp_path / "pyproject.toml" + pyproject.write_text("this is not valid toml {{{{") + + repo = Repository( + path=tmp_path, + name="broken-project", + url=None, + branch="main", + commit_hash="abc123", + languages={"Python": 100}, + total_files=10, + total_lines=100, + ) + + assessor = StandardLayoutAssessor() + # Should not raise, should fall back to other strategies + finding = assessor.assess(repo) + + # Will fail (no source dir found) but shouldn't crash + assert finding.status in ["fail", "not_applicable"] + + def test_celery_directory_not_blocked(self, tmp_path): + """Test that celery/ directory is recognized as valid source. + + The Celery project uses celery/ as its source directory. We should not + block package proper names, only generic non-source patterns. + """ + git_dir = tmp_path / ".git" + git_dir.mkdir() + + (tmp_path / "celery").mkdir() + (tmp_path / "celery" / "__init__.py").touch() + (tmp_path / "tests").mkdir() + + # pyproject.toml with project name + pyproject = tmp_path / "pyproject.toml" + pyproject.write_text('[project]\nname = "celery"\n') + + repo = Repository( + path=tmp_path, + name="celery", + url=None, + branch="main", + commit_hash="abc123", + languages={"Python": 100}, + total_files=10, + total_lines=100, + ) + + assessor = StandardLayoutAssessor() + finding = assessor.assess(repo) + + assert finding.status == "pass" + assert finding.score == 100.0 + + def test_heuristic_match_shows_verify_in_evidence(self, tmp_path): + """Test that heuristic source detection shows 'verify' in evidence. + + When pyproject.toml exists but package name doesn't match any directory, + Strategy 3 picks the first directory with __init__.py. This is a heuristic + guess and should be flagged in the evidence. + """ + git_dir = tmp_path / ".git" + git_dir.mkdir() + + # pyproject.toml with name that doesn't match any directory + pyproject = tmp_path / "pyproject.toml" + pyproject.write_text('[project]\nname = "myproject"\n') + + # Create a package directory with a different name + (tmp_path / "api").mkdir() + (tmp_path / "api" / "__init__.py").touch() + (tmp_path / "tests").mkdir() + + repo = Repository( + path=tmp_path, + name="myproject", + url=None, + branch="main", + commit_hash="abc123", + languages={"Python": 100}, + total_files=10, + total_lines=100, + ) + + assessor = StandardLayoutAssessor() + finding = assessor.assess(repo) + + assert finding.status == "pass" + evidence_str = " ".join(finding.evidence) + # Should indicate this is a heuristic match + assert "heuristic" in evidence_str + assert "verify" in evidence_str + + # === Tests for PR #322 review feedback === + + def test_pyproject_without_name_uses_heuristic(self, tmp_path): + """Test that pyproject.toml without [project].name still allows heuristic detection. + + PR #322 review feedback: Strategy 3 should run whenever pyproject.toml exists, + not just when a package name is found. A pyproject.toml with only [build-system] + should still allow heuristic detection of source directories. + """ + git_dir = tmp_path / ".git" + git_dir.mkdir() + + # pyproject.toml with only [build-system], no [project].name + pyproject = tmp_path / "pyproject.toml" + pyproject.write_text( + '[build-system]\nrequires = ["setuptools"]\nbuild-backend = "setuptools.build_meta"\n' + ) + + # Create a package directory + (tmp_path / "mypackage").mkdir() + (tmp_path / "mypackage" / "__init__.py").touch() + (tmp_path / "tests").mkdir() + + repo = Repository( + path=tmp_path, + name="mypackage", + url=None, + branch="main", + commit_hash="abc123", + languages={"Python": 100}, + total_files=10, + total_lines=100, + ) + + assessor = StandardLayoutAssessor() + finding = assessor.assess(repo) + + # Should pass via heuristic detection since pyproject.toml exists + assert finding.status == "pass" + evidence_str = " ".join(finding.evidence) + assert "heuristic" in evidence_str + + def test_project_named_directory_without_init_falls_through(self, tmp_path): + """Test that project-named directory without __init__.py is not detected. + + PR #322 review feedback: Namespace packages (PEP 420) don't have __init__.py. + Strategy 2 should fall through correctly when the directory exists but has + no __init__.py. + """ + git_dir = tmp_path / ".git" + git_dir.mkdir() + + # pyproject.toml with project name + pyproject = tmp_path / "pyproject.toml" + pyproject.write_text('[project]\nname = "mypackage"\n') + + # Create project-named directory WITHOUT __init__.py (namespace package style) + (tmp_path / "mypackage").mkdir() + # Note: no __init__.py - this is a namespace package + + # Create another package that does have __init__.py + (tmp_path / "api").mkdir() + (tmp_path / "api" / "__init__.py").touch() + (tmp_path / "tests").mkdir() + + repo = Repository( + path=tmp_path, + name="mypackage", + url=None, + branch="main", + commit_hash="abc123", + languages={"Python": 100}, + total_files=10, + total_lines=100, + ) + + assessor = StandardLayoutAssessor() + finding = assessor.assess(repo) + + # Should pass - Strategy 2 fails (no __init__.py), but Strategy 3 + # finds api/ as a heuristic match + assert finding.status == "pass" + evidence_str = " ".join(finding.evidence) + # Falls through to heuristic since mypackage/ doesn't have __init__.py + assert "heuristic" in evidence_str + assert "api/" in evidence_str + + # === Tests for .arsrc config file loading === + + def test_arsrc_file_exists_and_is_loaded(self): + """Test that Python.arsrc config file exists and can be loaded.""" + from agentready.assessors.structure import _load_arsrc_file + + non_source_dirs = _load_arsrc_file("Python.arsrc") + + # Should load successfully and contain expected entries + assert isinstance(non_source_dirs, frozenset) + assert len(non_source_dirs) > 0 + # Check for some expected entries + assert "tests" in non_source_dirs + assert "docs" in non_source_dirs + assert ".git" in non_source_dirs + assert "node_modules" in non_source_dirs + + def test_get_non_source_dirs_returns_frozenset(self): + """Test that _get_non_source_dirs returns a frozenset.""" + from agentready.assessors.structure import _get_non_source_dirs + + result = _get_non_source_dirs() + + assert isinstance(result, frozenset) + assert len(result) > 0 + + def test_arsrc_file_ignores_comments_and_empty_lines(self, tmp_path, monkeypatch): + """Test that .arsrc file parsing ignores comments and empty lines.""" + from agentready.assessors import structure + + # Clear the cache before testing + structure._load_arsrc_file.cache_clear() + + # Create a test config file + test_config = tmp_path / "Test.arsrc" + test_config.write_text("""# This is a comment +valid_entry + +# Another comment +another_entry + # Indented comment should be treated as entry (leading spaces) +""") + + # Monkey-patch the data directory + monkeypatch.setattr(structure, "_get_data_dir", lambda: tmp_path) + + result = structure._load_arsrc_file("Test.arsrc") + + # Should contain valid entries + assert "valid_entry" in result + assert "another_entry" in result + # Comments should be excluded + assert "# This is a comment" not in result + assert "" not in result + + # Clean up cache + structure._load_arsrc_file.cache_clear() + + def test_arsrc_missing_file_returns_empty_set_with_warning( + self, tmp_path, monkeypatch + ): + """Test that missing .arsrc file returns empty frozenset and emits warning.""" + import warnings + + from agentready.assessors import structure + + # Clear the cache before testing + structure._load_arsrc_file.cache_clear() + + # Monkey-patch the data directory to a directory without the file + monkeypatch.setattr(structure, "_get_data_dir", lambda: tmp_path) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + result = structure._load_arsrc_file("NonExistent.arsrc") + + # Should return empty set + assert result == frozenset() + + # Should emit a warning + assert len(w) == 1 + assert "NonExistent.arsrc" in str(w[0].message) + assert "not found" in str(w[0].message) + + # Clean up cache + structure._load_arsrc_file.cache_clear() + + def test_blocklist_excludes_directories_from_heuristic_detection(self, tmp_path): + """Test that directories in .arsrc blocklist are excluded from heuristic detection. + + This verifies the integration between the config file and the assessor. + """ + git_dir = tmp_path / ".git" + git_dir.mkdir() + + # Create pyproject.toml without project name + pyproject = tmp_path / "pyproject.toml" + pyproject.write_text('[build-system]\nrequires = ["setuptools"]\n') + + # Create a blocklisted directory with __init__.py + (tmp_path / "utils").mkdir() + (tmp_path / "utils" / "__init__.py").touch() + + # Create a valid source directory with __init__.py + (tmp_path / "myapp").mkdir() + (tmp_path / "myapp" / "__init__.py").touch() + + (tmp_path / "tests").mkdir() + + repo = Repository( + path=tmp_path, + name="myproject", + url=None, + branch="main", + commit_hash="abc123", + languages={"Python": 100}, + total_files=10, + total_lines=100, + ) + + assessor = StandardLayoutAssessor() + finding = assessor.assess(repo) + + # Should pass with myapp/ detected (utils/ should be skipped) + assert finding.status == "pass" + evidence_str = " ".join(finding.evidence) + assert "myapp/" in evidence_str + # utils/ should NOT be in evidence as it's blocklisted + assert "utils/" not in evidence_str + + def test_python_arsrc_bundled_with_package(self): + """Verify Python.arsrc is accessible from the installed package. + + This test ensures the config file is properly included in package-data + and will be distributed when the package is installed via pip. + """ + from agentready.assessors.structure import _get_data_dir + + arsrc_path = _get_data_dir() / "Python.arsrc" + assert arsrc_path.exists(), f"Python.arsrc not found at {arsrc_path}" + + # Also verify it has content (not empty) + content = arsrc_path.read_text() + assert len(content) > 0, "Python.arsrc is empty" + assert "tests" in content, "Python.arsrc missing expected entry 'tests'"