diff --git a/packages/core/src/api/top-langs.js b/packages/core/src/api/top-langs.js index 9d5c9670e0164..c7e013d6f7765 100644 --- a/packages/core/src/api/top-langs.js +++ b/packages/core/src/api/top-langs.js @@ -7,6 +7,7 @@ import { } from "../common/error.js"; import { parseArray, parseBoolean } from "../common/ops.js"; import { renderError } from "../common/render.js"; +import { fetchPersonalContributionLanguages } from "../fetchers/personal-contribution-languages.js"; import { fetchTopLanguages } from "../fetchers/top-languages.js"; import { isLocaleAvailable } from "../translations.js"; @@ -37,6 +38,10 @@ export default async ( hide_progress, hide_values, stats_format, + personal_contributions, + orgs, + personal_pages, + personal_limit, }, pat = null, ) => { @@ -100,14 +105,23 @@ export default async ( } try { - const topLangs = await fetchTopLanguages( - username, - parseArray(exclude_repo), - size_weight, - count_weight, - parseArray(role), - pat, - ); + const usePersonalContributions = parseBoolean(personal_contributions); + const topLangs = usePersonalContributions + ? await fetchPersonalContributionLanguages( + username, + parseArray(orgs), + personal_pages, + personal_limit, + pat, + ) + : await fetchTopLanguages( + username, + parseArray(exclude_repo), + size_weight, + count_weight, + parseArray(role), + pat, + ); return { status: "success", diff --git a/packages/core/src/fetchers/personal-contribution-languages.js b/packages/core/src/fetchers/personal-contribution-languages.js new file mode 100644 index 0000000000000..a6882ef7a853d --- /dev/null +++ b/packages/core/src/fetchers/personal-contribution-languages.js @@ -0,0 +1,430 @@ +// @ts-check + +import { createRequire } from "module"; + +import axios from "axios"; + +import { CustomError, MissingParamError } from "../common/error.js"; +import { logger } from "../common/log.js"; +import { retryer } from "../common/retryer.js"; + +const require = createRequire(import.meta.url); +const languageColors = require("../common/languageColors.json"); + +const GITHUB_API_URL = "https://api.github.com"; +const DEFAULT_PAGES = 2; +const DEFAULT_LIMIT = 40; +const MAX_PAGES = 10; +const MAX_LIMIT = 200; +const COMMIT_FETCH_CONCURRENCY = 10; + +const SPECIAL_FILENAMES = { + dockerfile: "Dockerfile", + "dockerfile.dev": "Dockerfile", + "dockerfile.prod": "Dockerfile", + makefile: "Makefile", + "cmakelists.txt": "CMake", + gemfile: "Ruby", + rakefile: "Ruby", + jenkinsfile: "Groovy", + procfile: "Procfile", + vagrantfile: "Ruby", + "composer.json": "JSON", + "package.json": "JSON", + "tsconfig.json": "JSON", +}; + +const EXTENSION_LANGUAGES = { + ".astro": "Astro", + ".bat": "Batchfile", + ".c": "C", + ".cc": "C++", + ".clj": "Clojure", + ".cljs": "Clojure", + ".cpp": "C++", + ".cs": "C#", + ".css": "CSS", + ".dart": "Dart", + ".ex": "Elixir", + ".exs": "Elixir", + ".go": "Go", + ".graphql": "GraphQL", + ".gql": "GraphQL", + ".groovy": "Groovy", + ".h": "C", + ".hpp": "C++", + ".html": "HTML", + ".java": "Java", + ".js": "JavaScript", + ".json": "JSON", + ".jsx": "JavaScript", + ".kt": "Kotlin", + ".kts": "Kotlin", + ".less": "Less", + ".lua": "Lua", + ".md": "Markdown", + ".mjs": "JavaScript", + ".php": "PHP", + ".pl": "Perl", + ".prisma": "Prisma", + ".py": "Python", + ".r": "R", + ".rb": "Ruby", + ".rs": "Rust", + ".sass": "Sass", + ".scala": "Scala", + ".scss": "SCSS", + ".sh": "Shell", + ".sql": "SQL", + ".svelte": "Svelte", + ".swift": "Swift", + ".tsx": "TypeScript", + ".ts": "TypeScript", + ".vue": "Vue", + ".xml": "XML", + ".yaml": "YAML", + ".yml": "YAML", +}; + +const IGNORED_EXTENSIONS = new Set([ + ".gif", + ".ico", + ".jpg", + ".jpeg", + ".lock", + ".map", + ".pdf", + ".png", + ".svg", + ".webp", +]); + +const IGNORED_FILENAMES = new Set([ + "composer.lock", + "package-lock.json", + "pnpm-lock.yaml", + "yarn.lock", +]); + +const IGNORED_PATH_PARTS = new Set([ + "dist", + "build", + "coverage", + "node_modules", + "vendor", +]); + +/** + * @param {number | string | undefined} pages Requested number of Search API pages. + * @returns {number} A bounded page count. + */ +const normalizePages = (pages) => { + const parsedPages = parseInt(String(pages || DEFAULT_PAGES), 10); + + if (Number.isNaN(parsedPages) || parsedPages < 1) { + return DEFAULT_PAGES; + } + + return Math.min(parsedPages, MAX_PAGES); +}; + +/** + * @param {number | string | undefined} limit Requested number of commits to scan. + * @returns {number} A bounded commit count. + */ +const normalizeLimit = (limit) => { + const parsedLimit = parseInt(String(limit || DEFAULT_LIMIT), 10); + + if (Number.isNaN(parsedLimit) || parsedLimit < 1) { + return DEFAULT_LIMIT; + } + + return Math.min(parsedLimit, MAX_LIMIT); +}; + +/** + * @param {string} filename File path returned by the GitHub commit API. + * @returns {string | null} Lowercase file extension, or null when absent. + */ +const getExtension = (filename) => { + const lastPathPart = filename.split("/").pop() || filename; + const dotIndex = lastPathPart.lastIndexOf("."); + + if (dotIndex <= 0) { + return null; + } + + return lastPathPart.slice(dotIndex).toLowerCase(); +}; + +/** + * @param {string} filename File path returned by the GitHub commit API. + * @returns {boolean} Whether the file should be skipped from language totals. + */ +const shouldIgnoreFile = (filename) => { + const normalizedParts = filename.toLowerCase().split("/"); + const lastPathPart = normalizedParts[normalizedParts.length - 1]; + + if (IGNORED_FILENAMES.has(lastPathPart)) { + return true; + } + + if (normalizedParts.some((part) => IGNORED_PATH_PARTS.has(part))) { + return true; + } + + const extension = getExtension(filename); + return extension ? IGNORED_EXTENSIONS.has(extension) : false; +}; + +/** + * @param {string} filename File path returned by the GitHub commit API. + * @returns {string | null} Detected language name, or null when unknown. + */ +const detectLanguage = (filename) => { + const lastPathPart = filename.split("/").pop()?.toLowerCase() || filename; + + if (SPECIAL_FILENAMES[lastPathPart]) { + return SPECIAL_FILENAMES[lastPathPart]; + } + + const extension = getExtension(filename); + + if (!extension) { + return null; + } + + return EXTENSION_LANGUAGES[extension] || null; +}; + +/** + * @param {string} path GitHub REST API path. + * @param {string} token GitHub token used for private repository access. + * @param {Record} params Query string parameters. + * @returns {Promise} GitHub REST API response. + */ +const githubGet = (path, token, params = {}) => { + return axios({ + url: `${GITHUB_API_URL}${path}`, + method: "get", + headers: { + Accept: + "application/vnd.github.cloak-preview+json, application/vnd.github+json", + Authorization: `token ${token}`, + "X-GitHub-Api-Version": "2022-11-28", + }, + params, + }); +}; + +/** + * @param {string} token GitHub token used for private repository access. + * @param {string} query Search query. + * @param {number} page Search result page number. + * @returns {Promise} Commit search result items. + */ +const searchCommits = async (token, query, page) => { + const res = await githubGet("/search/commits", token, { + q: query, + per_page: 100, + page, + }); + + return res.data.items || []; +}; + +/** + * @param {string} token GitHub token used for private repository access. + * @param {string} owner Repository owner login. + * @param {string} repo Repository name. + * @param {string} sha Commit SHA. + * @returns {Promise} Files changed in the commit. + */ +const fetchCommitFiles = async (token, owner, repo, sha) => { + const res = await githubGet(`/repos/${owner}/${repo}/commits/${sha}`, token); + return res.data.files || []; +}; + +/** + * @param {any[]} commits Commit search result items. + * @param {number} size Batch size. + * @returns {any[][]} Commit batches. + */ +const chunkCommits = (commits, size) => { + const chunks = []; + + for (let index = 0; index < commits.length; index += size) { + chunks.push(commits.slice(index, index + size)); + } + + return chunks; +}; + +/** + * @param {{ username: string, orgs: string[], pages: number, limit: number }} variables Fetcher variables. + * @param {string} token GitHub token used for private repository access. + * @returns {Promise<{ data: Record }>} Aggregated language data. + */ +const fetcher = async ({ username, orgs, pages, limit }, token) => { + const languageStats = {}; + const seenCommits = new Set(); + const searchScopes = orgs.length > 0 ? orgs.map((org) => `org:${org}`) : [""]; + const commitsToFetch = []; + + for (const scope of searchScopes) { + const query = ["author:" + username, scope] + .filter(Boolean) + .join(" ") + .trim(); + + for (let page = 1; page <= pages; page++) { + const commits = await searchCommits(token, query, page); + + if (commits.length === 0) { + break; + } + + for (const commit of commits) { + const repository = commit.repository; + const sha = commit.sha; + const fullName = repository?.full_name; + + if (!fullName || !sha) { + continue; + } + + const commitKey = `${fullName}@${sha}`; + + if (seenCommits.has(commitKey)) { + continue; + } + + seenCommits.add(commitKey); + commitsToFetch.push(commit); + + if (commitsToFetch.length >= limit) { + break; + } + } + + if (commits.length < 100 || commitsToFetch.length >= limit) { + break; + } + } + + if (commitsToFetch.length >= limit) { + break; + } + } + + for (const commitBatch of chunkCommits( + commitsToFetch, + COMMIT_FETCH_CONCURRENCY, + )) { + const commitFiles = await Promise.all( + commitBatch.map(async (commit) => { + const [owner, repo] = commit.repository.full_name.split("/"); + return fetchCommitFiles(token, owner, repo, commit.sha); + }), + ); + + commitFiles.forEach((files) => { + const languagesInCommit = new Set(); + + for (const file of files) { + if (!file.filename || shouldIgnoreFile(file.filename)) { + continue; + } + + const language = detectLanguage(file.filename); + const additions = Number(file.additions || 0); + + if (!language || additions <= 0) { + continue; + } + + if (!languageStats[language]) { + languageStats[language] = { + name: language, + color: languageColors[language] || "#858585", + size: 0, + count: 0, + }; + } + + languageStats[language].size += additions; + languagesInCommit.add(language); + } + + languagesInCommit.forEach((language) => { + languageStats[language].count += 1; + }); + }); + } + + return { data: languageStats }; +}; + +/** + * Fetch top languages from commits authored by a given username. + * + * @param {string} username GitHub username. + * @param {string[]} orgs Organization logins to include. Empty means all accessible commits. + * @param {number | string | undefined} pages Search pages to scan, 100 commits per page. + * @param {number | string | undefined} limit Commits to inspect after search. + * @param {string | null} pat Optional PAT override. + * @returns {Promise} Top languages data. + */ +const fetchPersonalContributionLanguages = async ( + username, + orgs = [], + pages = DEFAULT_PAGES, + limit = DEFAULT_LIMIT, + pat = null, +) => { + if (!username) { + throw new MissingParamError(["username"]); + } + + const res = await retryer( + fetcher, + { + username, + orgs, + pages: normalizePages(pages), + limit: normalizeLimit(limit), + }, + pat, + ); + + if (res.status && res.status >= 400) { + logger.error(res.data); + throw new CustomError( + res.data?.message || "Could not fetch contribution language data", + CustomError.GITHUB_REST_API_ERROR, + ); + } + + if (!res.data || Object.keys(res.data).length === 0) { + throw new CustomError( + "No contribution language data found", + CustomError.GITHUB_REST_API_ERROR, + ); + } + + return Object.keys(res.data) + .sort((a, b) => res.data[b].size - res.data[a].size) + .reduce((result, key) => { + result[key] = res.data[key]; + return result; + }, {}); +}; + +export { + detectLanguage, + fetchPersonalContributionLanguages, + normalizeLimit, + normalizePages, + shouldIgnoreFile, +}; +export default fetchPersonalContributionLanguages; diff --git a/packages/core/tests/fetchPersonalContributionLanguages.test.js b/packages/core/tests/fetchPersonalContributionLanguages.test.js new file mode 100644 index 0000000000000..09a3de5e57f4a --- /dev/null +++ b/packages/core/tests/fetchPersonalContributionLanguages.test.js @@ -0,0 +1,118 @@ +import axios from "axios"; +import MockAdapter from "axios-mock-adapter"; +import { afterEach, describe, expect, it, vi } from "vitest"; + +import { + detectLanguage, + fetchPersonalContributionLanguages, + normalizeLimit, + normalizePages, + shouldIgnoreFile, +} from "../src/fetchers/personal-contribution-languages.js"; + +vi.mock(import("../src/common/log.js"), async () => { + const { createLoggerMock } = await import("./utils.js"); + return createLoggerMock(); +}); + +const mock = new MockAdapter(axios); + +afterEach(() => { + mock.reset(); +}); + +describe("FetchPersonalContributionLanguages", () => { + it("should detect languages from common contribution file paths", () => { + expect(detectLanguage("src/index.ts")).toBe("TypeScript"); + expect(detectLanguage("Dockerfile")).toBe("Dockerfile"); + expect(detectLanguage("unknown/file.ext")).toBe(null); + }); + + it("should ignore generated and binary files", () => { + expect(shouldIgnoreFile("dist/app.js")).toBe(true); + expect(shouldIgnoreFile("src/logo.png")).toBe(true); + expect(shouldIgnoreFile("src/app.ts")).toBe(false); + }); + + it("should clamp page count", () => { + expect(normalizePages("0")).toBe(2); + expect(normalizePages("50")).toBe(10); + expect(normalizePages("3")).toBe(3); + }); + + it("should clamp commit limit", () => { + expect(normalizeLimit("0")).toBe(40); + expect(normalizeLimit("500")).toBe(200); + expect(normalizeLimit("30")).toBe(30); + }); + + it("should fetch and aggregate additions from authored commits", async () => { + mock.onGet("https://api.github.com/search/commits").reply((config) => { + expect(config.params.q).toBe("author:andre org:acme"); + expect(config.params.page).toBe(1); + + return [ + 200, + { + items: [ + { + sha: "abc123", + repository: { full_name: "acme/app" }, + }, + { + sha: "def456", + repository: { full_name: "acme/api" }, + }, + ], + }, + ]; + }); + + mock + .onGet("https://api.github.com/repos/acme/app/commits/abc123") + .reply(200, { + files: [ + { filename: "src/App.tsx", additions: 12 }, + { filename: "dist/App.js", additions: 200 }, + { filename: "README.md", additions: 4 }, + ], + }); + + mock + .onGet("https://api.github.com/repos/acme/api/commits/def456") + .reply(200, { + files: [ + { filename: "server.js", additions: 5 }, + { filename: "package-lock.json", additions: 50 }, + ], + }); + + const languages = await fetchPersonalContributionLanguages( + "andre", + ["acme"], + 1, + 40, + ); + + expect(languages).toStrictEqual({ + TypeScript: { + color: expect.any(String), + count: 1, + name: "TypeScript", + size: 12, + }, + JavaScript: { + color: expect.any(String), + count: 1, + name: "JavaScript", + size: 5, + }, + Markdown: { + color: expect.any(String), + count: 1, + name: "Markdown", + size: 4, + }, + }); + }); +});