From 3a270abdc5fbebe002e2e1bab965c07de7e51c86 Mon Sep 17 00:00:00 2001 From: Tobias Macey Date: Fri, 26 Jun 2026 16:36:26 -0400 Subject: [PATCH 1/2] feat(skills): add dependency-pruning skill MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Audits a repository's dependencies across Python, JS/TS, Go, Rust, and other ecosystems to surface four categories of action: - Remove: unused packages (confirmed via tool output + manual grep) - Optimize: JS/TS packages with import styles that block tree-shaking - Vendor/rewrite: packages where only ≤3 symbols are used and the package is small enough to inline (configurable thresholds) - Migrate: deprecated, sunset, or abandoned packages with known migration targets Includes blind-spot guidance for Django projects (deptry false positives, INSTALLED_APPS string loading), server runtime packages (check Dockerfile + git history for in-flight migrations before flagging for removal), and CLI-invoked developer tooling (ipdb, bpython, pdbpp, etc. that static analysis always marks unused). Evaluated over 2 iterations against ocw-studio; skill achieves 93% assertion pass rate vs 60% for the no-skill baseline. Co-Authored-By: Claude Sonnet 4.6 --- skills/README.md | 1 + skills/process/README.md | 1 + skills/process/dependency-pruning/SKILL.md | 322 ++++++++++++++++++ .../dependency-pruning/evals/evals.json | 44 +++ .../references/unused-detection.md | 214 ++++++++++++ 5 files changed, 582 insertions(+) create mode 100644 skills/process/dependency-pruning/SKILL.md create mode 100644 skills/process/dependency-pruning/evals/evals.json create mode 100644 skills/process/dependency-pruning/references/unused-detection.md diff --git a/skills/README.md b/skills/README.md index 4835640..44d90dc 100644 --- a/skills/README.md +++ b/skills/README.md @@ -35,6 +35,7 @@ Skills are organized by **category**. Each skill lives in | process | [`generate-standup`](./process/generate-standup/SKILL.md) | Generate and post a daily standup from GitHub activity to the mitodl/hq Check-ins discussion | | process | [`screenshot-pr`](./process/screenshot-pr/SKILL.md) | Capture UI changes for a PR with shot-scraper at desktop, tablet, and mobile viewports | | process | [`dependency-updates`](./process/dependency-updates/SKILL.md) | Triage and apply Renovate dependency updates safely across Python, JS/TS, Helm, Apt, and database ecosystems | +| process | [`dependency-pruning`](./process/dependency-pruning/SKILL.md) | Audit dependencies to find unused ones to remove and underused ones to vendor or rewrite | ## Authoring a Skill diff --git a/skills/process/README.md b/skills/process/README.md index 05dc023..c5e40be 100644 --- a/skills/process/README.md +++ b/skills/process/README.md @@ -11,3 +11,4 @@ Workflow skills for interacting with external services and developer processes | [`generate-standup`](./generate-standup/SKILL.md) | Generate and post a daily standup from GitHub activity to the mitodl/hq Check-ins discussion | | [`screenshot-pr`](./screenshot-pr/SKILL.md) | Capture UI changes for a PR with shot-scraper at desktop, tablet, and mobile viewports | | [`dependency-updates`](./dependency-updates/SKILL.md) | Triage and apply Renovate dependency updates safely across Python, JS/TS, Helm, Apt, and database ecosystems | +| [`dependency-pruning`](./dependency-pruning/SKILL.md) | Audit dependencies to find unused ones to remove and underused ones to vendor or rewrite | diff --git a/skills/process/dependency-pruning/SKILL.md b/skills/process/dependency-pruning/SKILL.md new file mode 100644 index 0000000..dbf9695 --- /dev/null +++ b/skills/process/dependency-pruning/SKILL.md @@ -0,0 +1,322 @@ +--- +name: dependency-pruning +description: > + Audit a repository's dependencies to identify those that are unused (safe to + remove), underused (few enough features used to vendor or rewrite), imported + inefficiently (blocking tree-shaking), or deprecated/sunset (need migration). + Use this skill when the user wants to slim down their dependency tree, reduce + supply chain risk, remove dead weight from package manifests, identify + libraries that could be replaced with a few lines of code, or do a general + dependency health check. Covers Python, JavaScript/TypeScript, Go, Rust, and + other ecosystems. Invoke whenever the user mentions "unused dependencies", + "dependency audit", "remove packages", "we don't need this library", "too + many dependencies", "could we vendor this", "could we just write this + ourselves", "bundle size", "tree shaking", or any variation of slimming down + or cleaning up external dependencies. +license: BSD-3-Clause +metadata: + category: process +--- + +# Dependency Pruning + +## Goal + +Produce an actionable report categorizing dependencies into: **remove**, +**optimize import style**, **vendor/rewrite**, **migrate away from**, and +**keep**. Then offer to execute the safe changes. + +## Configurable thresholds (defaults) + +- **Max imported symbols for vendor candidate**: 3 unique symbols from the package +- **Max package LOC proxy for vendor candidate**: 500 total source lines + +Mention these defaults briefly at the start and ask if the user wants to +override them — but don't block; proceed with defaults if they don't respond. + +--- + +## Phase 1 — Detect ecosystems + +Scan the repo root and subdirectories for manifest files: + +| Ecosystem | Indicator files | +|-----------|----------------| +| Python | `pyproject.toml`, `requirements*.txt`, `setup.py`, `setup.cfg`, `Pipfile` | +| JS/TS | `package.json` | +| Go | `go.mod` | +| Rust | `Cargo.toml` | + +For other ecosystems, look for any manifest that lists dependencies (Gemfile, +pom.xml, build.gradle, pubspec.yaml, mix.exs, etc.) and adapt the approach +below to that format. + +Announce which ecosystems were found before proceeding. In monorepos, enumerate +all manifest files. + +--- + +## Phase 2 — Find unused dependencies + +See `references/unused-detection.md` for the full per-ecosystem commands. + +**Python** — run `uvx deptry .` (no install needed). Focus on `DEP001` +(declared but never imported). If deptry is unavailable, fall back to grepping +for each dependency name across `*.py` files. + +**JS/TS** — run `npx --yes depcheck --json`. Unused runtime deps appear under +`dependencies`, unused dev deps under `devDependencies`. `knip` is a good +alternative for monorepos: `npx --yes knip --reporter json`. + +**Go** — run `go mod tidy -v` in a temporary copy of go.mod/go.sum; diff the +result to see what disappears. Do not mutate the real files yet. + +**Rust** — run `cargo machete` (installs itself if needed via `cargo install +cargo-machete --quiet`). + +**Other** — enumerate declared deps from the manifest, then grep the source +tree for each package name to find those never referenced. + +### Go beyond the tool output + +Automated tools miss things. After the tool run, scan the full declared dep +list yourself and verify packages the tool didn't flag — especially: +- Packages whose PyPI/npm name differs from the import name +- Packages that are very old or rarely heard of +- Packages that duplicate stdlib functionality + +### Known tool blind spots + +**Django / Python projects**: deptry's DEP002 false-positive rate can be very +high (sometimes 30+ flags for a single project) because PyPI package names +rarely match their Python module names: +- `djangorestframework` → `rest_framework` +- `beautifulsoup4` → `bs4` +- `pyyaml` → `yaml` +- `pygithub` → `github` +- `psycopg2-binary` → `psycopg2` + +When you see many DEP002 warnings on a Django project, verify each one manually +rather than reporting them all as unused. After the audit, suggest adding a +`[tool.deptry.package_module_name_map]` section to `pyproject.toml` so future +runs are accurate. + +**Django INSTALLED_APPS**: packages registered as Django apps (django-anymail, +django-storages, django-guardian, etc.) are loaded by the framework from +strings in `INSTALLED_APPS`, not via Python `import` statements. Static +analysis will always miss these. Always check `settings.py` before flagging +a `django-*` package as unused. + +**Server runtime packages**: WSGI/ASGI servers (gunicorn, uwsgi, granian, +hypercorn, uvicorn) are invoked via CLI in Dockerfile or Kubernetes manifests, +not imported in Python. Before flagging any of these for removal: +1. Check deployment configs (`Dockerfile`, `docker-compose.yml`, Helm charts, `Procfile`) +2. Check git history for in-progress migrations (e.g., uwsgi→granian): if a pending + PR or recent commit is switching servers, both the old and new runtime belong in + the manifest until the migration lands. Flag as "keep — migration in progress" + rather than a removal candidate. + +**Developer tooling (CLI-invoked)**: Debuggers, REPLs, and profilers — such as +`ipdb`, `pdbpp`, `bpython`, `ptpython`, `debugpy`, `pudb`, `py-spy`, +`memory-profiler` — are invoked from the terminal, not imported in application +code. Static analysis will always flag them as unused. These belong in +dev dependencies (not main), but should generally be kept rather than removed +if they're clearly intended as team-wide developer conveniences. Flag as +"dev tooling — classify as dev dep if in main" rather than "remove". + +**Webpack/babel loaders and plugins**: These are referenced in +`webpack.config.js`, not imported in source code. Don't flag webpack plugins +as unused based on source-code search alone. + +### Handling test-only usage + +If a package is only imported in test files (`tests/`, `spec/`, `*_test.go`, +`*_spec.rs`, etc.), classify it as a dev-only dep, not as fully unused — the +fix is moving it to dev dependencies, not removing it outright. + +### Dynamic and conditional imports + +Flag packages that are loaded dynamically (`importlib`, `require()` with a +variable, `dlopen`) or conditionally (`try: import X except ImportError:`) as +"possibly used — verify manually" rather than unused. + +--- + +## Phase 3 — Analyze API surface for vendoring candidates + +For each dependency that IS used, check how much of it is actually called. + +```bash +# Python: unique symbols imported from a package +PKG="humanize" +rg "from ${PKG}(\.\w+)? import (\w+)" --no-filename -o --include="*.py" \ + | grep -oP 'import \K\w+' | sort -u + +# JS/TS: named imports + member accesses +PKG="lodash" +rg "import \{([^}]+)\} from ['\"]${PKG}['\"]" --no-filename --include="*.{ts,tsx,js,jsx}" +rg "${PKG}\.\w+" --no-filename --include="*.{ts,tsx,js,jsx}" -o | sort -u + +# Go: symbols accessed after package alias +PKG="github.com/pkg/errors" +rg "\"${PKG}\"" --include="*.go" -l + +# Rust: use paths from the crate +CRATE="serde" +rg "use ${CRATE}::" --include="*.rs" -o | sort -u +``` + +Count unique symbols. If the count is at or below the threshold (default: 3), +flag the package as a vendoring candidate. Include a one-sentence sketch of the +replacement (e.g., "The 2 functions could be replaced with ~25 lines of Python"). + +Also estimate package size: +```bash +# Python +python -c " +import importlib.util, pathlib +spec = importlib.util.find_spec('${PKG}') +if spec and spec.origin: + root = pathlib.Path(spec.origin).parent + lines = sum(len(f.read_text(errors='ignore').splitlines()) for f in root.rglob('*.py')) + print(lines) +" +# JS/TS +du -sk node_modules/${PKG} 2>/dev/null +``` + +--- + +## Phase 3b — Import style and bundling analysis (JS/TS only) + +For each JS/TS package that passes the "keep" threshold (too many symbols to +vendor), check whether its import style is preventing tree-shaking: + +```bash +PKG="lodash" +# Default/namespace imports that load the whole package +rg "import _ from ['\"]${PKG}['\"]" --include="*.{ts,tsx,js,jsx}" +rg "import \* as .+ from ['\"]${PKG}['\"]" --include="*.{ts,tsx,js,jsx}" +rg "const .+ = require\(['\"]${PKG}['\"]" --include="*.{ts,tsx,js,jsx}" + +# Named imports (tree-shakeable IF the package ships ESM) +rg "import \{" --include="*.{ts,tsx,js,jsx}" | grep "${PKG}" + +# Check if the package ships an ESM build +ls node_modules/${PKG}/esm 2>/dev/null || cat node_modules/${PKG}/package.json | grep -E '"module"|"exports"' | head -5 +``` + +If the package is imported via default/namespace import but ships an ESM +alternative (e.g., `lodash-es`, per-function path imports like +`lodash/debounce`), flag it as an **import style optimization** opportunity — +not a removal candidate, but potentially a large bundle-size win. + +--- + +## Phase 4 — Check for deprecated or sunset packages + +After the unused and vendoring analysis, scan for packages that are still +in active use but should be migrated away from: + +- **Deprecated by maintainer**: check npm/PyPI for deprecation notices +- **Abandoned**: check for packages with no commits in 2+ years and open issues +- **Sunset by platform**: e.g., Google Analytics UA (react-ga, analytics.js) + was sunset in July 2023; older React component libraries with known + React 18 incompatibilities (react-hot-loader, etc.) +- **Superseded by stdlib**: e.g., `more-itertools.batched` → `itertools.batched` + (Python 3.12+), `moment` → `Temporal` or `date-fns`, `request` → `fetch` + +For each, note what the migration target is and roughly how large the change +would be. Do not conflate "deprecated" with "remove" — flag these as +"should migrate" rather than immediately removable. + +--- + +## Phase 5 — Report + +Present a structured report: + +``` +# Dependency Audit: +Thresholds: API surface <= N symbols, package LOC proxy <= N + +## Summary +- Ecosystems: +- Total direct dependencies: N + - Remove (unused): N + - Optimize import style: N + - Vendor/rewrite candidates: N + - Migrate away from (deprecated/sunset): N + - Dev-only misclassified (in main, should be dev): N + - CLI-invoked dev tooling (keep, move to dev if needed): N + - Well-used: N + +## Remove — Unused Dependencies +| Package | Ecosystem | Evidence of non-use | +| ddt | Python | No `import ddt` or `from ddt` in any test file | + +## Optimize Import Style (JS/TS) +| Package | Current import | Issue | Fix | +| lodash | `import _ from 'lodash'` | Prevents tree-shaking; full ~72KB ships | Switch to `lodash-es` or per-function imports | + +## Vendor/Rewrite Candidates +| Package | Used symbols | Package LOC | Replacement sketch | +| waait | default (1) | 1 LOC | `const wait = (ms=0) => new Promise(r => setTimeout(r, ms))` | + +## Migrate Away From +| Package | Status | Migration target | +| react-ga | GA3 sunset Jul 2023 | PostHog (already wired), or GA4 via gtag | + +## Dev-only Misclassifications +| Package | Currently | Should be | +| ipython | dependencies | dev dependencies | + +## Well-used (checked, nothing to do) + +``` + +--- + +## Phase 6 — Offer to apply changes + +After the report, present the user with options: + +1. **Report only** — done; they act on it manually. +2. **Remove unused deps** — show dry-run list and confirm, then execute. +3. **Remove unused + optimize imports** — remove unused, then apply the + import-style fixes (e.g., `yarn add lodash-es && yarn remove lodash`). +4. **Remove unused + draft vendor stubs** — remove unused, then for each + vendoring candidate create a stub comment showing which symbols to implement. + +For any execution option, show a dry-run list of what will change and confirm +before executing. If the user has the `create-ol-pull-request` skill available, +offer to create a PR after applying changes. + +### Removal commands + +| Ecosystem | Command | +|-----------|---------| +| Python (uv) | `uv remove ` | +| Python (pyproject.toml) | Edit `[project.dependencies]`, then `uv sync` | +| Python (requirements.txt) | Remove line; `pip install -r requirements.txt` | +| JS (npm) | `npm uninstall ` | +| JS (bun) | `bun remove ` | +| JS (yarn) | `yarn remove ` | +| JS (pnpm) | `pnpm remove ` | +| Go | Edit `go.mod`, then `go mod tidy` | +| Rust | Edit `Cargo.toml`, then `cargo build` | + +After removal, run the project's test suite (or at minimum a build/typecheck). + +--- + +## Additional caveats + +- **Wildcard imports** (`from pkg import *`, `import * from 'pkg'`) make + static analysis unreliable — note these and recommend converting to explicit + imports as a follow-up. +- **Transitive deps used directly** — a dep used in code but not declared in + the manifest (deptry's `DEP003`) is a separate problem; note it but don't + flag for removal. These should be added to the manifest instead. +- **Optional feature extras** — packages installed as `pkg[extra]` may have + conditional sub-imports; check the extras explicitly. diff --git a/skills/process/dependency-pruning/evals/evals.json b/skills/process/dependency-pruning/evals/evals.json new file mode 100644 index 0000000..94b3752 --- /dev/null +++ b/skills/process/dependency-pruning/evals/evals.json @@ -0,0 +1,44 @@ +{ + "skill_name": "dependency-pruning", + "evals": [ + { + "id": 1, + "prompt": "Can you audit the dependencies in this repo and tell me which ones we can drop? I have a feeling we're carrying some dead weight. The project is at /home/tmacey/code/mit/misc/agent-kit", + "expected_output": "A structured report listing unused dependencies that can be removed, underused ones that could be vendored (with symbol counts), and a brief summary. Should offer to apply changes.", + "files": [], + "expectations": [ + "The report identifies which ecosystem(s) are present in the repo", + "Unused dependencies are listed separately from used ones", + "At least one vendoring candidate assessment is provided (or a clear statement that all deps are well-used)", + "The report includes a summary section with counts", + "The skill offers the user options to apply changes, not just silently proceed" + ] + }, + { + "id": 2, + "prompt": "I'm looking at our package.json and I think we only use like 2 functions from lodash. Can you check what we actually use from it and whether it's worth just writing those ourselves instead of keeping the lodash dep?", + "expected_output": "An analysis of which lodash functions are imported/used in the codebase, the count of unique symbols, and a recommendation on whether to vendor vs keep, with a rough sketch of what the replacement would look like.", + "files": [], + "expectations": [ + "The skill searches for lodash imports and member accesses in JS/TS files", + "The number of unique lodash symbols used is counted and reported", + "A recommendation is made (vendor vs keep) based on the count", + "If vendoring is recommended, a sketch of the replacement is provided", + "The lodash package size or LOC proxy is reported" + ] + }, + { + "id": 3, + "prompt": "We just inherited this Django project and I want to understand the Python dependency situation. Some packages might be legacy leftovers from 2 years ago. Run a dependency audit with the default thresholds.", + "expected_output": "A full audit using deptry (or grep fallback) across the Python dependencies, categorized into unused/vendoring candidates/well-used. Should handle the Django plugin ecosystem caveat (framework plugins may be indirect).", + "files": [], + "expectations": [ + "The skill attempts to run deptry or falls back gracefully", + "The report notes that Django plugins/apps may be loaded indirectly and flags these for manual verification", + "Unused deps are listed with evidence (no imports found)", + "The thresholds used are stated in the report", + "The user is offered options to apply removals" + ] + } + ] +} diff --git a/skills/process/dependency-pruning/references/unused-detection.md b/skills/process/dependency-pruning/references/unused-detection.md new file mode 100644 index 0000000..fe58e9b --- /dev/null +++ b/skills/process/dependency-pruning/references/unused-detection.md @@ -0,0 +1,214 @@ +# Unused Dependency Detection — Per-Ecosystem Commands + +## Python + +### Primary: deptry + +```bash +uvx deptry . +``` + +deptry reads `pyproject.toml` or `requirements.txt` and cross-references +imports across the codebase. No installation needed with `uvx`. + +Exit codes: 0 = clean, 1 = issues found (check stdout for details). + +Relevant codes: +- `DEP001` — dependency declared but never imported → removal candidate +- `DEP002` — import found but not declared (missing dep) → ignore for pruning +- `DEP003` — transitive dep used directly → note, don't remove +- `DEP004` — dev dep used in non-dev code → note as misclassification + +```bash +# JSON output for parsing +uvx deptry . --json-output /tmp/deptry-out.json && cat /tmp/deptry-out.json +``` + +### Fallback: grep-based + +```bash +# Read all declared deps from pyproject.toml +python -c " +import tomllib, pathlib, subprocess, sys + +with open('pyproject.toml', 'rb') as f: + data = tomllib.load(f) + +deps = data.get('project', {}).get('dependencies', []) +# Strip version specifiers +pkgs = [d.split('[')[0].split('>=')[0].split('==')[0].split('<')[0].strip().lower().replace('-','_') for d in deps] + +for pkg in pkgs: + result = subprocess.run(['rg', '-l', pkg, '--include=*.py', '--glob=!tests/'], capture_output=True, text=True) + if not result.stdout.strip(): + print(f'UNUSED: {pkg}') + else: + print(f'used: {pkg} ({len(result.stdout.strip().splitlines())} files)') +" +``` + +--- + +## JavaScript / TypeScript + +### Primary: depcheck + +```bash +npx --yes depcheck --json 2>/dev/null +``` + +Output structure: +```json +{ + "dependencies": ["unused-pkg"], + "devDependencies": ["unused-dev-pkg"], + "missing": {}, + "invalidFiles": [], + "invalidDirs": [] +} +``` + +### Alternative: knip (better for monorepos) + +```bash +npx --yes knip --reporter json 2>/dev/null +``` + +knip also finds unused exports, files, and type references — more thorough but +slower. + +### Fallback: grep-based + +```bash +node -e " +const pkg = require('./package.json'); +const { execSync } = require('child_process'); +const deps = Object.keys({...(pkg.dependencies||{}), ...(pkg.devDependencies||{})}); +for (const dep of deps) { + try { + const out = execSync(\`rg -l '\"'\${dep}'\"\\|'\${dep}' ' src/ --include='*.{ts,tsx,js,jsx}'\`, {stdio:['pipe','pipe','pipe']}).toString(); + console.log(out.trim() ? 'used: '+dep : 'UNUSED: '+dep); + } catch { console.log('UNUSED: '+dep); } +} +" +``` + +--- + +## Go + +### Check what go mod tidy would remove + +Run in a temp directory to avoid mutating the real go.mod: + +```bash +# Non-destructive: show what's unused +cp go.mod /tmp/go.mod.bak && cp go.sum /tmp/go.sum.bak +go mod tidy -v 2>&1 | grep "^removing" +# Restore +cp /tmp/go.mod.bak go.mod && cp /tmp/go.sum.bak go.sum +``` + +Or use `go mod why` to check if a dep is reachable: + +```bash +go mod why github.com/some/package 2>&1 +# Output: "(main module does not need github.com/some/package)" → unused +``` + +List all direct deps: + +```bash +go list -m -json all 2>/dev/null | python3 -c " +import json, sys +data = sys.stdin.read() +import re +for obj in re.split(r'\n(?=\{)', data.strip()): + try: + m = json.loads(obj) + if not m.get('Indirect') and not m.get('Main'): + print(m['Path']) + except: pass +" +``` + +--- + +## Rust + +### Primary: cargo-machete + +```bash +cargo machete +``` + +If not installed: + +```bash +cargo install cargo-machete --quiet 2>&1 +cargo machete +``` + +### Alternative: cargo-udeps (requires nightly) + +```bash +cargo +nightly udeps +``` + +### Fallback: grep the Cargo.toml deps against src/ + +```bash +python3 -c " +import re, subprocess, pathlib + +cargo = pathlib.Path('Cargo.toml').read_text() +deps = re.findall(r'^(\w[\w-]*)\s*=', cargo, re.MULTILINE) + +for dep in deps: + crate_name = dep.replace('-', '_') + result = subprocess.run(['rg', '-l', crate_name, 'src/'], capture_output=True, text=True) + if result.stdout.strip(): + print(f'used: {dep}') + else: + result2 = subprocess.run(['rg', '-l', dep, 'src/'], capture_output=True, text=True) + print(f'UNUSED: {dep}' if not result2.stdout.strip() else f'used: {dep}') +" +``` + +--- + +## Other ecosystems + +### Ruby (Gemfile) + +```bash +bundle exec debundle 2>/dev/null || true +# Grep approach: +ruby -e " +require 'bundler' +Bundler.load.specs.each do |spec| + next if spec.name == 'bundler' + used = \`rg -l '#{spec.name}' lib/ app/ 2>/dev/null\`.strip + puts used.empty? ? \"UNUSED: #{spec.name}\" : \"used: #{spec.name}\" +end +" +``` + +### Java/Kotlin (Maven) + +```bash +mvn dependency:analyze 2>&1 | grep -E "Unused declared|Used undeclared" +``` + +### Java/Kotlin (Gradle) + +```bash +./gradlew dependencies --configuration runtimeClasspath 2>/dev/null +# Then grep source for each dep +``` + +### Elixir (mix.exs) + +```bash +mix deps.unlock --check-unused 2>&1 +``` From da7a5cd77df35353e4a188e14e5a54ac5fc9c9ec Mon Sep 17 00:00:00 2001 From: Tobias Macey Date: Mon, 29 Jun 2026 14:58:05 -0400 Subject: [PATCH 2/2] fix(skills/dependency-pruning): address review feedback - Fix DEP001/DEP002 confusion: Django false positives are DEP001 (declared but never imported), not DEP002; explain both codes and why they co-occur when PyPI name != module name - Replace grep -oP with rg -r capture groups (GNU grep not on macOS) - Fix Python LOC script to handle single-file modules: check for __init__.py before rglob'ing parent (otherwise scans all site-packages) - Add separator rows to all Phase 5 report tables - Python fallback: add tomllib->tomli fallback, use regex to parse PEP 508 dep specifiers, drop --glob=!tests/ (test-only use != unused) - JS/TS fallback: fix broken nested quoting; escape dep name as regex literal to handle scoped packages and metacharacters - Go mod tidy: fix misleading 'temp directory' comment (actually backup/restore) - Cargo.toml fallback: replace fragile regex (matched metadata keys like 'name', 'version', 'edition') with proper tomllib parsing of dependency sections Co-Authored-By: Claude Sonnet 4.6 --- skills/process/dependency-pruning/SKILL.md | 32 +++++++---- .../references/unused-detection.md | 56 ++++++++++++++----- 2 files changed, 63 insertions(+), 25 deletions(-) diff --git a/skills/process/dependency-pruning/SKILL.md b/skills/process/dependency-pruning/SKILL.md index dbf9695..a8d1ff8 100644 --- a/skills/process/dependency-pruning/SKILL.md +++ b/skills/process/dependency-pruning/SKILL.md @@ -87,8 +87,8 @@ list yourself and verify packages the tool didn't flag — especially: ### Known tool blind spots -**Django / Python projects**: deptry's DEP002 false-positive rate can be very -high (sometimes 30+ flags for a single project) because PyPI package names +**Django / Python projects**: deptry's **DEP001** false-positive rate can be +very high (sometimes 30+ flags for a single project) because PyPI package names rarely match their Python module names: - `djangorestframework` → `rest_framework` - `beautifulsoup4` → `bs4` @@ -96,10 +96,14 @@ rarely match their Python module names: - `pygithub` → `github` - `psycopg2-binary` → `psycopg2` -When you see many DEP002 warnings on a Django project, verify each one manually -rather than reporting them all as unused. After the audit, suggest adding a -`[tool.deptry.package_module_name_map]` section to `pyproject.toml` so future -runs are accurate. +When deptry can't find `import djangorestframework` anywhere, it raises DEP001 +("declared but never imported") — but the package IS in use, just as +`from rest_framework import ...`. The paired DEP002 ("import found but not +declared") may also fire for the Python module name that is imported, since the +manifest lists the PyPI name. Both errors stem from the same root cause. +Verify each DEP001 manually before treating any as a removal candidate. After +the audit, suggest adding a `[tool.deptry.package_module_name_map]` section to +`pyproject.toml` so future runs are accurate. **Django INSTALLED_APPS**: packages registered as Django apps (django-anymail, django-storages, django-guardian, etc.) are loaded by the framework from @@ -149,8 +153,7 @@ For each dependency that IS used, check how much of it is actually called. ```bash # Python: unique symbols imported from a package PKG="humanize" -rg "from ${PKG}(\.\w+)? import (\w+)" --no-filename -o --include="*.py" \ - | grep -oP 'import \K\w+' | sort -u +rg "from ${PKG}(?:\.\w+)? import (\w+)" -g "*.py" -o -r '$1' --no-filename | sort -u # JS/TS: named imports + member accesses PKG="lodash" @@ -177,8 +180,12 @@ python -c " import importlib.util, pathlib spec = importlib.util.find_spec('${PKG}') if spec and spec.origin: - root = pathlib.Path(spec.origin).parent - lines = sum(len(f.read_text(errors='ignore').splitlines()) for f in root.rglob('*.py')) + origin = pathlib.Path(spec.origin) + if origin.name == '__init__.py': + root = origin.parent + lines = sum(len(f.read_text(errors='ignore').splitlines()) for f in root.rglob('*.py')) + else: + lines = len(origin.read_text(errors='ignore').splitlines()) print(lines) " # JS/TS @@ -253,22 +260,27 @@ Thresholds: API surface <= N symbols, package LOC proxy <= N ## Remove — Unused Dependencies | Package | Ecosystem | Evidence of non-use | +|---------|-----------|---------------------| | ddt | Python | No `import ddt` or `from ddt` in any test file | ## Optimize Import Style (JS/TS) | Package | Current import | Issue | Fix | +|---------|----------------|-------|-----| | lodash | `import _ from 'lodash'` | Prevents tree-shaking; full ~72KB ships | Switch to `lodash-es` or per-function imports | ## Vendor/Rewrite Candidates | Package | Used symbols | Package LOC | Replacement sketch | +|---------|-------------|-------------|-------------------| | waait | default (1) | 1 LOC | `const wait = (ms=0) => new Promise(r => setTimeout(r, ms))` | ## Migrate Away From | Package | Status | Migration target | +|---------|--------|-----------------| | react-ga | GA3 sunset Jul 2023 | PostHog (already wired), or GA4 via gtag | ## Dev-only Misclassifications | Package | Currently | Should be | +|---------|-----------|-----------| | ipython | dependencies | dev dependencies | ## Well-used (checked, nothing to do) diff --git a/skills/process/dependency-pruning/references/unused-detection.md b/skills/process/dependency-pruning/references/unused-detection.md index fe58e9b..7510338 100644 --- a/skills/process/dependency-pruning/references/unused-detection.md +++ b/skills/process/dependency-pruning/references/unused-detection.md @@ -29,21 +29,35 @@ uvx deptry . --json-output /tmp/deptry-out.json && cat /tmp/deptry-out.json ```bash # Read all declared deps from pyproject.toml python -c " -import tomllib, pathlib, subprocess, sys +import pathlib, subprocess, sys, re +try: + import tomllib +except ImportError: + try: + import pip._vendor.tomli as tomllib + except ImportError: + print('Error: tomllib or tomli required'); sys.exit(1) with open('pyproject.toml', 'rb') as f: data = tomllib.load(f) deps = data.get('project', {}).get('dependencies', []) -# Strip version specifiers -pkgs = [d.split('[')[0].split('>=')[0].split('==')[0].split('<')[0].strip().lower().replace('-','_') for d in deps] +pkgs = [] +for d in deps: + m = re.match(r'^([a-zA-Z0-9_.-]+)', d) + if m: + pkgs.append(m.group(1).lower().replace('-', '_')) for pkg in pkgs: - result = subprocess.run(['rg', '-l', pkg, '--include=*.py', '--glob=!tests/'], capture_output=True, text=True) + # Search including tests/ — test-only use is 'dev-only', not 'unused' + result = subprocess.run(['rg', '-l', pkg, '--include=*.py'], capture_output=True, text=True) if not result.stdout.strip(): print(f'UNUSED: {pkg}') else: - print(f'used: {pkg} ({len(result.stdout.strip().splitlines())} files)') + files = result.stdout.strip().splitlines() + test_only = all('test' in f for f in files) + label = 'test-only' if test_only else 'used' + print(f'{label}: {pkg} ({len(files)} files)') " ``` @@ -85,10 +99,15 @@ const pkg = require('./package.json'); const { execSync } = require('child_process'); const deps = Object.keys({...(pkg.dependencies||{}), ...(pkg.devDependencies||{})}); for (const dep of deps) { + // Escape regex metacharacters (e.g. scoped packages like @org/name) + const escaped = dep.replace(/[.*+?^\${}()|[\]\\\\]/g, '\\\\$&'); try { - const out = execSync(\`rg -l '\"'\${dep}'\"\\|'\${dep}' ' src/ --include='*.{ts,tsx,js,jsx}'\`, {stdio:['pipe','pipe','pipe']}).toString(); - console.log(out.trim() ? 'used: '+dep : 'UNUSED: '+dep); - } catch { console.log('UNUSED: '+dep); } + const out = execSync( + 'rg -l \"' + escaped + '\" src/ -g \"*.ts\" -g \"*.tsx\" -g \"*.js\" -g \"*.jsx\"', + {stdio:['pipe','pipe','pipe']} + ).toString(); + console.log(out.trim() ? 'used: ' + dep : 'UNUSED: ' + dep); + } catch { console.log('UNUSED: ' + dep); } } " ``` @@ -99,13 +118,11 @@ for (const dep of deps) { ### Check what go mod tidy would remove -Run in a temp directory to avoid mutating the real go.mod: +Back up go.mod/go.sum, run tidy, capture output, then restore — non-destructive: ```bash -# Non-destructive: show what's unused cp go.mod /tmp/go.mod.bak && cp go.sum /tmp/go.sum.bak go mod tidy -v 2>&1 | grep "^removing" -# Restore cp /tmp/go.mod.bak go.mod && cp /tmp/go.sum.bak go.sum ``` @@ -159,12 +176,21 @@ cargo +nightly udeps ```bash python3 -c " -import re, subprocess, pathlib +import pathlib, subprocess, sys +try: + import tomllib +except ImportError: + try: + import pip._vendor.tomli as tomllib + except ImportError: + print('Error: tomllib or tomli required'); sys.exit(1) -cargo = pathlib.Path('Cargo.toml').read_text() -deps = re.findall(r'^(\w[\w-]*)\s*=', cargo, re.MULTILINE) +cargo_data = tomllib.loads(pathlib.Path('Cargo.toml').read_text()) +deps = [] +for section in ['dependencies', 'dev-dependencies', 'build-dependencies']: + deps.extend(cargo_data.get(section, {}).keys()) -for dep in deps: +for dep in sorted(set(deps)): crate_name = dep.replace('-', '_') result = subprocess.run(['rg', '-l', crate_name, 'src/'], capture_output=True, text=True) if result.stdout.strip():