diff --git a/README.md b/README.md index d8218d0f..8a419251 100644 --- a/README.md +++ b/README.md @@ -191,6 +191,7 @@ Run `opencli list` for the live registry. | **facebook** | `feed` `profile` `search` `friends` `groups` `events` `notifications` `memories` `add-friend` `join-group` | Browser | | **google** | `news` `search` `suggest` `trends` | Public | | **36kr** | `news` `hot` `search` `article` | Public / Browser | +| **imdb** | `search` `title` `top` `trending` `person` `reviews` | Public | | **producthunt** | `posts` `today` `hot` `browse` | Public / Browser | | **instagram** | `explore` `profile` `search` `user` `followers` `following` `follow` `unfollow` `like` `unlike` `comment` `save` `unsave` `saved` | Browser | | **lobsters** | `hot` `newest` `active` `tag` | Public | diff --git a/README.zh-CN.md b/README.zh-CN.md index 7ad8b462..1b0263bc 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -173,6 +173,7 @@ npm install -g @jackwener/opencli@latest | **facebook** | `feed` `profile` `search` `friends` `groups` `events` `notifications` `memories` `add-friend` `join-group` | 浏览器 | | **google** | `news` `search` `suggest` `trends` | 公开 | | **36kr** | `news` `hot` `search` `article` | 公开 / 浏览器 | +| **imdb** | `search` `title` `top` `trending` `person` `reviews` | 公开 | | **producthunt** | `posts` `today` `hot` `browse` | 公开 / 浏览器 | | **instagram** | `explore` `profile` `search` `user` `followers` `following` `follow` `unfollow` `like` `unlike` `comment` `save` `unsave` `saved` | 浏览器 | | **lobsters** | `hot` `newest` `active` `tag` | 公开 | diff --git a/docs/.vitepress/config.mts b/docs/.vitepress/config.mts index e73ca4d2..7141e57b 100644 --- a/docs/.vitepress/config.mts +++ b/docs/.vitepress/config.mts @@ -79,6 +79,7 @@ export default defineConfig({ { text: 'Doubao', link: '/adapters/browser/doubao' }, { text: 'Facebook', link: '/adapters/browser/facebook' }, { text: 'Google', link: '/adapters/browser/google' }, + { text: 'IMDb', link: '/adapters/browser/imdb' }, { text: 'Instagram', link: '/adapters/browser/instagram' }, { text: 'JD.com', link: '/adapters/browser/jd' }, { text: 'Medium', link: '/adapters/browser/medium' }, diff --git a/docs/adapters/browser/imdb.md b/docs/adapters/browser/imdb.md new file mode 100644 index 00000000..1e7dfd4b --- /dev/null +++ b/docs/adapters/browser/imdb.md @@ -0,0 +1,47 @@ +# IMDb + +**Mode**: 🌐 Public (Browser) · **Domain**: `www.imdb.com` + +## Commands + +| Command | Description | +|---------|-------------| +| `opencli imdb search` | Search movies, TV shows, and people | +| `opencli imdb title` | Get movie or TV show details | +| `opencli imdb top` | IMDb Top 250 Movies | +| `opencli imdb trending` | IMDb Most Popular Movies | +| `opencli imdb person` | Get actor or director info | +| `opencli imdb reviews` | Get user reviews for a title | + +## Usage Examples + +```bash +# Search for a movie +opencli imdb search "inception" --limit 10 + +# Get movie details +opencli imdb title tt1375666 + +# Get TV series details (also accepts full URL) +opencli imdb title "https://www.imdb.com/title/tt0903747/" + +# Top 250 movies +opencli imdb top --limit 20 + +# Currently trending movies +opencli imdb trending --limit 10 + +# Actor/director info with filmography +opencli imdb person nm0634240 --limit 5 + +# User reviews +opencli imdb reviews tt1375666 --limit 5 + +# JSON output +opencli imdb top --limit 5 -f json +``` + +## Prerequisites + +- Chrome with Browser Bridge extension installed +- No login required (all data is public) diff --git a/docs/adapters/index.md b/docs/adapters/index.md index 5fbe2b6e..cb2950c2 100644 --- a/docs/adapters/index.md +++ b/docs/adapters/index.md @@ -32,6 +32,7 @@ Run `opencli list` for the live registry. | **[weread](/adapters/browser/weread)** | `shelf` `search` `book` `ranking` `notebooks` `highlights` `notes` | 🔐 Browser | | **[douban](/adapters/browser/douban)** | `search` `top250` `subject` `photos` `download` `marks` `reviews` `movie-hot` `book-hot` | 🔐 Browser | | **[facebook](/adapters/browser/facebook)** | `feed` `profile` `search` `friends` `groups` `events` `notifications` `memories` `add-friend` `join-group` | 🔐 Browser | +| **[imdb](/adapters/browser/imdb)** | `search` `title` `top` `trending` `person` `reviews` | 🌐 / 🔐 | | **[instagram](/adapters/browser/instagram)** | `explore` `profile` `search` `user` `followers` `following` `follow` `unfollow` `like` `unlike` `comment` `save` `unsave` `saved` | 🔐 Browser | | **[medium](/adapters/browser/medium)** | `feed` `search` `user` | 🔐 Browser | | **[sinablog](/adapters/browser/sinablog)** | `hot` `search` `article` `user` | 🔐 Browser | diff --git a/src/clis/douban/download.test.ts b/src/clis/douban/download.test.ts index 7c993424..c450052a 100644 --- a/src/clis/douban/download.test.ts +++ b/src/clis/douban/download.test.ts @@ -1,4 +1,5 @@ import { beforeAll, beforeEach, describe, expect, it, vi } from 'vitest'; +import path from 'node:path'; import type { CliCommand } from '../../registry.js'; import { getRegistry } from '../../registry.js'; import type { IPage } from '../../types.js'; @@ -39,6 +40,10 @@ beforeAll(() => { expect(cmd?.func).toBeTypeOf('function'); }); +function toPosixPath(value: string): string { + return value.replaceAll(path.sep, '/'); +} + describe('douban download', () => { beforeEach(() => { mockHttpDownload.mockReset(); @@ -89,26 +94,22 @@ describe('douban download', () => { type: 'Rb', limit: 20, }); - expect(mockMkdirSync).toHaveBeenCalledWith('/tmp/douban-test/30382501', { recursive: true }); + expect(mockMkdirSync).toHaveBeenCalledTimes(1); + expect(toPosixPath(mockMkdirSync.mock.calls[0][0])).toBe('/tmp/douban-test/30382501'); + expect(mockMkdirSync.mock.calls[0][1]).toEqual({ recursive: true }); expect(mockHttpDownload).toHaveBeenCalledTimes(2); - expect(mockHttpDownload).toHaveBeenNthCalledWith( - 1, - 'https://img1.doubanio.com/view/photo/l/public/p2913450214.webp', - '/tmp/douban-test/30382501/30382501_001_2913450214_Main_poster.webp', - expect.objectContaining({ - headers: { Referer: 'https://movie.douban.com/photos/photo/2913450214/' }, - timeout: 60000, - }), - ); - expect(mockHttpDownload).toHaveBeenNthCalledWith( - 2, - 'https://img1.doubanio.com/view/photo/l/public/p2913450215.jpg', - '/tmp/douban-test/30382501/30382501_002_2913450215_Character_poster.jpg', - expect.objectContaining({ - headers: { Referer: 'https://movie.douban.com/photos/photo/2913450215/' }, - timeout: 60000, - }), - ); + expect(mockHttpDownload.mock.calls[0]?.[0]).toBe('https://img1.doubanio.com/view/photo/l/public/p2913450214.webp'); + expect(toPosixPath(mockHttpDownload.mock.calls[0]?.[1])).toBe('/tmp/douban-test/30382501/30382501_001_2913450214_Main_poster.webp'); + expect(mockHttpDownload.mock.calls[0]?.[2]).toEqual(expect.objectContaining({ + headers: { Referer: 'https://movie.douban.com/photos/photo/2913450214/' }, + timeout: 60000, + })); + expect(mockHttpDownload.mock.calls[1]?.[0]).toBe('https://img1.doubanio.com/view/photo/l/public/p2913450215.jpg'); + expect(toPosixPath(mockHttpDownload.mock.calls[1]?.[1])).toBe('/tmp/douban-test/30382501/30382501_002_2913450215_Character_poster.jpg'); + expect(mockHttpDownload.mock.calls[1]?.[2]).toEqual(expect.objectContaining({ + headers: { Referer: 'https://movie.douban.com/photos/photo/2913450215/' }, + timeout: 60000, + })); expect(result).toEqual([ { @@ -164,14 +165,13 @@ describe('douban download', () => { type: 'Rb', targetPhotoId: '2913450215', }); - expect(mockHttpDownload).toHaveBeenCalledWith( - 'https://img1.doubanio.com/view/photo/l/public/p2913450215.jpg', - '/tmp/douban-test/30382501/30382501_002_2913450215_Character_poster.jpg', - expect.objectContaining({ - headers: { Referer: 'https://movie.douban.com/photos/photo/2913450215/' }, - timeout: 60000, - }), - ); + expect(mockHttpDownload).toHaveBeenCalledTimes(1); + expect(mockHttpDownload.mock.calls[0]?.[0]).toBe('https://img1.doubanio.com/view/photo/l/public/p2913450215.jpg'); + expect(toPosixPath(mockHttpDownload.mock.calls[0]?.[1])).toBe('/tmp/douban-test/30382501/30382501_002_2913450215_Character_poster.jpg'); + expect(mockHttpDownload.mock.calls[0]?.[2]).toEqual(expect.objectContaining({ + headers: { Referer: 'https://movie.douban.com/photos/photo/2913450215/' }, + timeout: 60000, + })); expect(result).toEqual([ { diff --git a/src/clis/imdb/person.ts b/src/clis/imdb/person.ts new file mode 100644 index 00000000..defc5d00 --- /dev/null +++ b/src/clis/imdb/person.ts @@ -0,0 +1,232 @@ +import { CommandExecutionError } from '../../errors.js'; +import { cli, Strategy } from '../../registry.js'; +import { + forceEnglishUrl, + getCurrentImdbId, + isChallengePage, + normalizeImdbId, + waitForImdbPath, +} from './utils.js'; + +/** + * Read IMDb person details from public profile pages. + */ +cli({ + site: 'imdb', + name: 'person', + description: 'Get actor or director info', + domain: 'www.imdb.com', + strategy: Strategy.PUBLIC, + browser: true, + args: [ + { name: 'id', positional: true, required: true, help: 'IMDb person ID (nm0634240) or URL' }, + { name: 'limit', type: 'int', default: 10, help: 'Max filmography entries' }, + ], + columns: ['field', 'value'], + func: async (page, args) => { + const id = normalizeImdbId(String(args.id), 'nm'); + // Clamp to 30 to match the internal evaluate cap + const limit = Math.max(1, Math.min(Number(args.limit) || 10, 30)); + const url = forceEnglishUrl(`https://www.imdb.com/name/${id}/`); + + await page.goto(url); + const onPersonPage = await waitForImdbPath(page, `^/name/${id}/`); + + if (await isChallengePage(page)) { + throw new CommandExecutionError( + 'IMDb blocked this request', + 'Try again with a normal browser session or extension mode', + ); + } + if (!onPersonPage) { + throw new CommandExecutionError( + `Person page did not finish loading: ${id}`, + 'Retry the command; if it persists, IMDb may have changed their navigation flow', + ); + } + + const currentId = await getCurrentImdbId(page, 'nm'); + if (currentId && currentId !== id) { + throw new CommandExecutionError( + `IMDb redirected to a different person: ${currentId}`, + 'Retry the command; if it persists, the person page may have changed', + ); + } + + const data = await page.evaluate(` + (function() { + var result = { + nameId: '', + name: '', + description: '', + birthDate: '', + filmography: [] + }; + + var scripts = document.querySelectorAll('script[type="application/ld+json"]'); + for (var i = 0; i < scripts.length; i++) { + try { + var ld = JSON.parse(scripts[i].textContent || 'null'); + if (ld && ld['@type'] === 'Person') { + if (typeof ld.url === 'string') { + var ldMatch = ld.url.match(/(nm\\d{7,8})/); + if (ldMatch) { + result.nameId = ldMatch[1]; + } + } + result.name = result.name || ld.name || ''; + result.description = result.description || ld.description || ''; + break; + } + } catch (error) { + void error; + } + } + + var nextDataEl = document.getElementById('__NEXT_DATA__'); + if (!nextDataEl) { + return result; + } + + try { + var nextData = JSON.parse(nextDataEl.textContent || 'null'); + var pageProps = nextData && nextData.props && nextData.props.pageProps; + var above = pageProps && (pageProps.aboveTheFold || pageProps.aboveTheFoldData); + var main = pageProps && (pageProps.mainColumnData || pageProps.belowTheFold); + + if (above) { + if (!result.nameId && above.id) { + result.nameId = String(above.id); + } + if (!result.name && above.nameText && above.nameText.text) { + result.name = above.nameText.text; + } + + if (above.birthDate) { + if (above.birthDate.displayableProperty && above.birthDate.displayableProperty.value) { + result.birthDate = above.birthDate.displayableProperty.value.plainText || ''; + } + if (!result.birthDate && above.birthDate.dateComponents) { + var dc = above.birthDate.dateComponents; + result.birthDate = [dc.year, dc.month, dc.day].filter(Boolean).join('-'); + } + } + + if (above.bio && above.bio.text && above.bio.text.plainText) { + result.description = above.bio.text.plainText.substring(0, 300); + } + } + + var pushFilmography = function(title, year, role) { + if (!title) { + return; + } + result.filmography.push({ + title: title, + year: year || '', + role: role || '' + }); + }; + + var knownFor = main && main.knownForFeatureV2; + if (knownFor && Array.isArray(knownFor.credits)) { + for (var j = 0; j < knownFor.credits.length; j++) { + var knownNode = knownFor.credits[j]; + if (!knownNode || !knownNode.title) { + continue; + } + var knownRole = ''; + var knownRoleEdge = knownNode.creditedRoles && Array.isArray(knownNode.creditedRoles.edges) + ? knownNode.creditedRoles.edges[0] + : null; + if (knownRoleEdge && knownRoleEdge.node) { + knownRole = knownRoleEdge.node.text + || (knownRoleEdge.node.category ? knownRoleEdge.node.category.text || '' : ''); + } + pushFilmography( + knownNode.title.titleText ? knownNode.title.titleText.text : '', + knownNode.title.releaseYear ? String(knownNode.title.releaseYear.year || '') : '', + knownRole + ); + } + } + + if (result.filmography.length === 0) { + var creditSources = []; + if (main && main.released && Array.isArray(main.released.edges)) { + creditSources.push(main.released.edges); + } + if (main && main.groupings && Array.isArray(main.groupings.edges)) { + creditSources.push(main.groupings.edges); + } + + for (var k = 0; k < creditSources.length && result.filmography.length < 30; k++) { + var groups = creditSources[k]; + for (var m = 0; m < groups.length && result.filmography.length < 30; m++) { + var groupNode = groups[m] && groups[m].node; + if (!groupNode) { + continue; + } + + var roleName = groupNode.grouping ? groupNode.grouping.text || '' : ''; + var credits = groupNode.credits && Array.isArray(groupNode.credits.edges) + ? groupNode.credits.edges + : []; + for (var n = 0; n < credits.length && result.filmography.length < 30; n++) { + var creditNode = credits[n] && credits[n].node; + if (!creditNode || !creditNode.title) { + continue; + } + pushFilmography( + creditNode.title.titleText ? creditNode.title.titleText.text : (creditNode.title.originalTitleText ? creditNode.title.originalTitleText.text : ''), + creditNode.title.releaseYear ? String(creditNode.title.releaseYear.year || '') : '', + roleName + ); + } + } + } + } + } catch (error) { + void error; + } + + return result; + })() + `); + + if (!data || typeof data !== 'object' || !('name' in data) || !(data as Record).name) { + throw new CommandExecutionError(`Person not found: ${id}`, 'Check the person ID and try again'); + } + + const result = data as Record; + if (result.nameId && result.nameId !== id) { + throw new CommandExecutionError( + `IMDb returned a different person payload: ${result.nameId}`, + 'Retry the command; if it persists, the person parser may need updating', + ); + } + const filmography = Array.isArray(result.filmography) ? result.filmography : []; + + // Override url with a clean canonical URL (no query params like ?language=en-US) + result.url = `https://www.imdb.com/name/${id}/`; + + const rows = Object.entries(result) + .filter(([field, value]) => field !== 'filmography' && field !== 'nameId' && value !== '' && value != null) + .map(([field, value]) => ({ field, value: String(value) })); + + if (filmography.length > 0) { + rows.push({ field: 'filmography', value: '' }); + for (const entry of filmography.slice(0, limit)) { + const suffix = [entry.year ? `(${entry.year})` : '', entry.role ? `[${entry.role}]` : ''] + .filter(Boolean) + .join(' '); + rows.push({ + field: String(entry.title || ''), + value: suffix, + }); + } + } + + return rows; + }, +}); diff --git a/src/clis/imdb/reviews.ts b/src/clis/imdb/reviews.ts new file mode 100644 index 00000000..d529fd4c --- /dev/null +++ b/src/clis/imdb/reviews.ts @@ -0,0 +1,111 @@ +import { CommandExecutionError } from '../../errors.js'; +import { cli, Strategy } from '../../registry.js'; +import { + forceEnglishUrl, + getCurrentImdbId, + isChallengePage, + normalizeImdbId, + waitForImdbPath, + waitForImdbReviewsReady, +} from './utils.js'; + +/** + * Read IMDb user reviews from the first review page. + */ +cli({ + site: 'imdb', + name: 'reviews', + description: 'Get user reviews for a movie or TV show', + domain: 'www.imdb.com', + strategy: Strategy.PUBLIC, + browser: true, + args: [ + { name: 'id', positional: true, required: true, help: 'IMDb title ID (tt1375666) or URL' }, + { name: 'limit', type: 'int', default: 10, help: 'Number of reviews' }, + ], + columns: ['rank', 'title', 'rating', 'author', 'date', 'text'], + func: async (page, args) => { + const id = normalizeImdbId(String(args.id), 'tt'); + const limit = Math.max(1, Math.min(Number(args.limit) || 10, 25)); + const url = forceEnglishUrl(`https://www.imdb.com/title/${id}/reviews/`); + + await page.goto(url); + const onReviewsPage = await waitForImdbPath(page, `^/title/${id}/reviews/?$`); + const reviewsReady = await waitForImdbReviewsReady(page, 15000); + + if (await isChallengePage(page)) { + throw new CommandExecutionError( + 'IMDb blocked this request', + 'Try again with a normal browser session or extension mode', + ); + } + if (!onReviewsPage || !reviewsReady) { + throw new CommandExecutionError( + 'IMDb reviews did not finish loading', + 'Retry the command; if it persists, the review page structure may have changed', + ); + } + + const currentId = await getCurrentImdbId(page, 'tt'); + if (currentId && currentId !== id) { + throw new CommandExecutionError( + `IMDb redirected to a different title: ${currentId}`, + 'Retry the command; if it persists, the review page may have changed', + ); + } + + const reviews = await page.evaluate(` + (function() { + var limit = ${limit}; + var items = []; + var containers = document.querySelectorAll('article.user-review-item, [data-testid="review-card-parent"], .imdb-user-review, [data-testid="review-card"], .review-container'); + + for (var i = 0; i < containers.length && items.length < limit; i++) { + var el = containers[i]; + var titleEl = el.querySelector('.title, [data-testid="review-summary"], a.title'); + var ratingEl = el.querySelector('.review-rating .ipc-rating-star--rating, .rating-other-user-rating span:first-child, [data-testid="review-rating"]'); + var authorEl = el.querySelector('.display-name-link a, [data-testid="author-link"], .author-text, a[href*="/user/"]'); + var dateEl = el.querySelector('.review-date, [data-testid="review-date"]'); + var textEl = el.querySelector('.content .text, .content .show-more__control, [data-testid="review-overflow"]'); + + var title = titleEl ? (titleEl.textContent || '').trim() : ''; + var text = textEl ? (textEl.textContent || '').replace(/\\s+/g, ' ').trim().slice(0, 200) : ''; + + if (!title && !text) { + continue; + } + + // Deduplicate: IMDb renders both preview and expanded versions of each review + var isDupe = false; + for (var d = 0; d < items.length; d++) { + if (items[d].title === title) { isDupe = true; break; } + } + if (isDupe) { continue; } + + items.push({ + title: title, + rating: ratingEl ? (ratingEl.textContent || '').trim() : '', + author: authorEl ? (authorEl.textContent || '').trim() : '', + date: dateEl ? (dateEl.textContent || '').trim() : '', + text: text + }); + } + + return items; + })() + `); + + if (!Array.isArray(reviews)) { + return []; + } + + return reviews.map((item: any, index: number) => ({ + rank: index + 1, + title: item.title || '', + rating: item.rating || '', + author: item.author || '', + date: item.date || '', + text: item.text || '', + })); + }, +}); diff --git a/src/clis/imdb/search.ts b/src/clis/imdb/search.ts new file mode 100644 index 00000000..c5af029e --- /dev/null +++ b/src/clis/imdb/search.ts @@ -0,0 +1,179 @@ +import { ArgumentError, CommandExecutionError } from '../../errors.js'; +import { cli, Strategy } from '../../registry.js'; +import { + forceEnglishUrl, + isChallengePage, + normalizeImdbTitleType, + waitForImdbPath, + waitForImdbSearchReady, +} from './utils.js'; + +/** + * Search IMDb via the public search page and parse Next.js payload first. + */ +cli({ + site: 'imdb', + name: 'search', + description: 'Search IMDb for movies, TV shows, and people', + domain: 'www.imdb.com', + strategy: Strategy.PUBLIC, + browser: true, + args: [ + { name: 'query', positional: true, required: true, help: 'Search query' }, + { name: 'limit', type: 'int', default: 20, help: 'Number of results' }, + ], + columns: ['rank', 'id', 'title', 'year', 'type', 'url'], + func: async (page, args) => { + const query = String(args.query || '').trim(); + // Reject empty or whitespace-only queries early + if (!query) { + throw new ArgumentError('Search query cannot be empty'); + } + const limit = Math.max(1, Math.min(Number(args.limit) || 20, 50)); + const url = forceEnglishUrl(`https://www.imdb.com/find/?q=${encodeURIComponent(query)}&ref_=nv_sr_sm`); + + await page.goto(url); + const onSearchPage = await waitForImdbPath(page, '^/find/?$'); + const searchReady = await waitForImdbSearchReady(page, 15000); + + if (await isChallengePage(page)) { + throw new CommandExecutionError( + 'IMDb blocked this request', + 'Try again with a normal browser session or extension mode', + ); + } + if (!onSearchPage || !searchReady) { + throw new CommandExecutionError( + 'IMDb search results did not finish loading', + 'Retry the command; if it persists, the search page structure may have changed', + ); + } + + const results = await page.evaluate(` + (function() { + var results = []; + + function pushResult(item) { + if (!item || !item.id || !item.title) { + return; + } + results.push(item); + } + + var nextDataEl = document.getElementById('__NEXT_DATA__'); + if (nextDataEl) { + try { + var nextData = JSON.parse(nextDataEl.textContent || 'null'); + var pageProps = nextData && nextData.props && nextData.props.pageProps; + if (pageProps) { + // IMDb wraps results as {index: "tt...", listItem: {...}} + var titleResults = (pageProps.titleResults && pageProps.titleResults.results) || []; + for (var i = 0; i < titleResults.length; i++) { + var tr = titleResults[i] || {}; + var tItem = tr.listItem || {}; + var tId = tr.index || ''; + var tTitle = typeof tItem.originalTitleText === 'string' + ? tItem.originalTitleText + : (tItem.originalTitleText && tItem.originalTitleText.text) || ''; + if (!tTitle) { + tTitle = typeof tItem.titleText === 'string' + ? tItem.titleText + : (tItem.titleText && tItem.titleText.text) || ''; + } + var tYear = ''; + if (typeof tItem.releaseYear === 'number' || typeof tItem.releaseYear === 'string') { + tYear = String(tItem.releaseYear); + } else if (tItem.releaseYear && typeof tItem.releaseYear === 'object') { + tYear = String(tItem.releaseYear.year || ''); + } + pushResult({ + id: tId, + title: tTitle, + year: tYear, + type: tItem.titleType || (tItem.endYear != null ? 'tvSeries' : ''), + url: tId ? 'https://www.imdb.com/title/' + tId + '/' : '' + }); + } + + var nameResults = (pageProps.nameResults && pageProps.nameResults.results) || []; + for (var j = 0; j < nameResults.length; j++) { + var nr = nameResults[j] || {}; + var nItem = nr.listItem || {}; + var nId = nr.index || ''; + var nTitle = typeof nItem.nameText === 'string' + ? nItem.nameText + : (nItem.nameText && nItem.nameText.text) || ''; + if (!nTitle) { + nTitle = typeof nItem.originalNameText === 'string' + ? nItem.originalNameText + : (nItem.originalNameText && nItem.originalNameText.text) || ''; + } + var nType = ''; + if (typeof nItem.primaryProfession === 'string') { + nType = nItem.primaryProfession; + } else if (Array.isArray(nItem.primaryProfessions) && nItem.primaryProfessions.length > 0) { + nType = String(nItem.primaryProfessions[0] || ''); + } else if (Array.isArray(nItem.professions) && nItem.professions.length > 0) { + nType = String(nItem.professions[0] || ''); + } + pushResult({ + id: nId, + title: nTitle, + year: nItem.knownFor && nItem.knownFor.yearRange ? String(nItem.knownFor.yearRange.year || '') : (nItem.knownForTitleYear ? String(nItem.knownForTitleYear) : ''), + type: nType || 'Person', + url: nId ? 'https://www.imdb.com/name/' + nId + '/' : '' + }); + } + } + } catch (error) { + void error; + } + } + + if (results.length === 0) { + var items = document.querySelectorAll('[class*="find-title-result"], [class*="find-name-result"], .ipc-metadata-list-summary-item'); + for (var k = 0; k < items.length; k++) { + var el = items[k]; + var linkEl = el.querySelector('a[href*="/title/"], a[href*="/name/"]'); + if (!linkEl) { + continue; + } + + var href = linkEl.getAttribute('href') || ''; + var idMatch = href.match(/(tt|nm)\\d{7,8}/); + if (!idMatch) { + continue; + } + + var titleEl = el.querySelector('.ipc-metadata-list-summary-item__t, h3, a'); + var metaEls = el.querySelectorAll('.ipc-metadata-list-summary-item__li, span'); + var absoluteUrl = href.startsWith('http') ? href : 'https://www.imdb.com' + href.split('?')[0]; + + pushResult({ + id: idMatch[0], + title: titleEl ? (titleEl.textContent || '').trim() : '', + year: metaEls.length > 0 ? (metaEls[0].textContent || '').trim() : '', + type: metaEls.length > 1 ? (metaEls[1].textContent || '').trim() : '', + url: absoluteUrl + }); + } + } + + return results; + })() + `); + + if (!Array.isArray(results)) { + return []; + } + + return results.slice(0, limit).map((item: any, index: number) => ({ + rank: index + 1, + id: item.id || '', + title: item.title || '', + year: item.year || '', + type: normalizeImdbTitleType(item.type), + url: item.url || '', + })); + }, +}); diff --git a/src/clis/imdb/title.ts b/src/clis/imdb/title.ts new file mode 100644 index 00000000..7003a9e3 --- /dev/null +++ b/src/clis/imdb/title.ts @@ -0,0 +1,121 @@ +import { CommandExecutionError } from '../../errors.js'; +import { cli, Strategy } from '../../registry.js'; +import { + extractJsonLd, + forceEnglishUrl, + formatDuration, + getCurrentImdbId, + isChallengePage, + normalizeImdbId, + waitForImdbPath, +} from './utils.js'; + +/** + * Read IMDb title details from JSON-LD on the public page. + */ +cli({ + site: 'imdb', + name: 'title', + description: 'Get movie or TV show details', + domain: 'www.imdb.com', + strategy: Strategy.PUBLIC, + browser: true, + args: [ + { name: 'id', positional: true, required: true, help: 'IMDb title ID (tt1375666) or URL' }, + ], + columns: ['field', 'value'], + func: async (page, args) => { + const id = normalizeImdbId(String(args.id), 'tt'); + const url = forceEnglishUrl(`https://www.imdb.com/title/${id}/`); + + await page.goto(url); + const onTitlePage = await waitForImdbPath(page, `^/title/${id}/`); + + if (await isChallengePage(page)) { + throw new CommandExecutionError( + 'IMDb blocked this request', + 'Try again with a normal browser session or extension mode', + ); + } + if (!onTitlePage) { + throw new CommandExecutionError( + `Title page did not finish loading: ${id}`, + 'Retry the command; if it persists, IMDb may have changed their navigation flow', + ); + } + + const currentId = await getCurrentImdbId(page, 'tt'); + if (currentId && currentId !== id) { + throw new CommandExecutionError( + `IMDb redirected to a different title: ${currentId}`, + 'Retry the command; if it persists, the title page may have changed', + ); + } + + // Single browser roundtrip: fetch title JSON-LD by type whitelist + const titleTypes = ['Movie', 'TVSeries', 'TVEpisode', 'TVMiniseries', 'TVMovie', 'TVSpecial', 'VideoGame', 'ShortFilm']; + const ld = await extractJsonLd(page, titleTypes); + if (!ld) { + throw new CommandExecutionError(`Title not found: ${id}`, 'Check the title ID and try again'); + } + + const data = ld as Record; + const type = String(data['@type'] || ''); + const isTvSeries = type === 'TVSeries' || type === 'TVMiniseries'; + + // Handle both array and single-object JSON-LD person fields + const toPeople = (arr: any): string => { + if (!arr) return ''; + const list = Array.isArray(arr) ? arr : [arr]; + return list + .slice(0, 5) + .map((p: any) => p.name || '') + .filter(Boolean) + .join(', '); + }; + + const year = (() => { + if (isTvSeries && typeof data.startDate === 'string') { + const startYear = data.startDate.split('-')[0] || ''; + const endYear = typeof data.endDate === 'string' ? data.endDate.split('-')[0] || '' : ''; + // Show "2024-" for ongoing series (no endDate) or "2010-2015" for ended ones + return endYear ? `${startYear}-${endYear}` : `${startYear}-`; + } + if (typeof data.datePublished === 'string') { + return data.datePublished.split('-')[0] || ''; + } + return ''; + })(); + + const directorField = isTvSeries ? 'creator' : 'director'; + const directorValue = isTvSeries ? toPeople(data.creator) : toPeople(data.director); + + const fields: Record = { + title: String(data.name || ''), + type, + year, + rating: data.aggregateRating?.ratingValue != null ? String(data.aggregateRating.ratingValue) : '', + votes: data.aggregateRating?.ratingCount != null ? String(data.aggregateRating.ratingCount) : '', + genre: Array.isArray(data.genre) ? data.genre.join(', ') : String(data.genre || ''), + [directorField]: directorValue, + cast: toPeople(data.actor), + duration: formatDuration(String(data.duration || '')), + contentRating: String(data.contentRating || ''), + plot: String(data.description || ''), + url: `https://www.imdb.com/title/${id}/`, + }; + + if (isTvSeries) { + if (data.numberOfSeasons != null) { + fields.seasons = String(data.numberOfSeasons); + } + if (data.numberOfEpisodes != null) { + fields.episodes = String(data.numberOfEpisodes); + } + } + + return Object.entries(fields) + .filter(([, value]) => value !== '') + .map(([field, value]) => ({ field, value })); + }, +}); diff --git a/src/clis/imdb/top.ts b/src/clis/imdb/top.ts new file mode 100644 index 00000000..60c89689 --- /dev/null +++ b/src/clis/imdb/top.ts @@ -0,0 +1,67 @@ +import { CommandExecutionError } from '../../errors.js'; +import { cli, Strategy } from '../../registry.js'; +import { extractJsonLd, forceEnglishUrl, isChallengePage } from './utils.js'; + +/** + * Fetch the IMDb Top 250 Movies list from JSON-LD structured data on the chart page. + */ +cli({ + site: 'imdb', + name: 'top', + description: 'IMDb Top 250 Movies', + domain: 'www.imdb.com', + strategy: Strategy.PUBLIC, + browser: true, + args: [ + { name: 'limit', type: 'int', default: 20, help: 'Number of results' }, + ], + columns: ['rank', 'title', 'rating', 'votes', 'genre', 'url'], + func: async (page, args) => { + const url = forceEnglishUrl('https://www.imdb.com/chart/top/'); + + await page.goto(url); + await page.wait(2); + + if (await isChallengePage(page)) { + throw new CommandExecutionError( + 'IMDb blocked this request', + 'Try again with a normal browser session or extension mode', + ); + } + + // Extract the ItemList JSON-LD block which contains all chart entries + const ld = await extractJsonLd(page, 'ItemList'); + if (!ld || !Array.isArray(ld.itemListElement)) { + throw new CommandExecutionError( + 'Could not find chart data on page', + 'IMDb may have changed their page structure', + ); + } + + const limit = Math.max(1, Math.min(Number(args.limit) || 20, 250)); + const items = (ld.itemListElement as any[]).slice(0, limit); + + return items.map((entry: any, index: number) => { + const item = entry.item || {}; + const rating = item.aggregateRating || {}; + const genre = Array.isArray(item.genre) + ? item.genre.join(', ') + : String(item.genre || ''); + + // Normalize relative URLs to absolute IMDb URLs + let itemUrl: string = item.url || ''; + if (itemUrl && !/^https?:\/\//.test(itemUrl)) { + itemUrl = 'https://www.imdb.com' + itemUrl; + } + + return { + rank: entry.position || index + 1, + title: String(item.name || ''), + rating: rating.ratingValue != null ? String(rating.ratingValue) : '', + votes: rating.ratingCount != null ? String(rating.ratingCount) : '', + genre, + url: itemUrl, + }; + }); + }, +}); diff --git a/src/clis/imdb/trending.ts b/src/clis/imdb/trending.ts new file mode 100644 index 00000000..32a31cdb --- /dev/null +++ b/src/clis/imdb/trending.ts @@ -0,0 +1,66 @@ +import { CommandExecutionError } from '../../errors.js'; +import { cli, Strategy } from '../../registry.js'; +import { extractJsonLd, forceEnglishUrl, isChallengePage } from './utils.js'; + +/** + * Fetch the IMDb Most Popular Movies (MovieMeter) list from JSON-LD structured data. + */ +cli({ + site: 'imdb', + name: 'trending', + description: 'IMDb Most Popular Movies', + domain: 'www.imdb.com', + strategy: Strategy.PUBLIC, + browser: true, + args: [ + { name: 'limit', type: 'int', default: 20, help: 'Number of results' }, + ], + columns: ['rank', 'title', 'rating', 'genre', 'url'], + func: async (page, args) => { + const url = forceEnglishUrl('https://www.imdb.com/chart/moviemeter/'); + + await page.goto(url); + await page.wait(2); + + if (await isChallengePage(page)) { + throw new CommandExecutionError( + 'IMDb blocked this request', + 'Try again with a normal browser session or extension mode', + ); + } + + // Extract the ItemList JSON-LD block which contains all chart entries + const ld = await extractJsonLd(page, 'ItemList'); + if (!ld || !Array.isArray(ld.itemListElement)) { + throw new CommandExecutionError( + 'Could not find chart data on page', + 'IMDb may have changed their page structure', + ); + } + + const limit = Math.max(1, Math.min(Number(args.limit) || 20, 100)); + const items = (ld.itemListElement as any[]).slice(0, limit); + + return items.map((entry: any, index: number) => { + const item = entry.item || {}; + const rating = item.aggregateRating || {}; + const genre = Array.isArray(item.genre) + ? item.genre.join(', ') + : String(item.genre || ''); + + // Normalize relative URLs to absolute IMDb URLs + let itemUrl: string = item.url || ''; + if (itemUrl && !/^https?:\/\//.test(itemUrl)) { + itemUrl = 'https://www.imdb.com' + itemUrl; + } + + return { + rank: entry.position || index + 1, + title: String(item.name || ''), + rating: rating.ratingValue != null ? String(rating.ratingValue) : '', + genre, + url: itemUrl, + }; + }); + }, +}); diff --git a/src/clis/imdb/utils.test.ts b/src/clis/imdb/utils.test.ts new file mode 100644 index 00000000..cf2a9bda --- /dev/null +++ b/src/clis/imdb/utils.test.ts @@ -0,0 +1,117 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { IPage } from '../../types.js'; +import { + extractJsonLd, + forceEnglishUrl, + formatDuration, + getCurrentImdbId, + isChallengePage, + normalizeImdbTitleType, + normalizeImdbId, + waitForImdbPath, + waitForImdbReviewsReady, + waitForImdbSearchReady, +} from './utils.js'; + +describe('normalizeImdbId', () => { + it('passes through bare ids', () => { + expect(normalizeImdbId('tt1375666', 'tt')).toBe('tt1375666'); + expect(normalizeImdbId('nm0634240', 'nm')).toBe('nm0634240'); + }); + + it('extracts ids from supported urls', () => { + expect(normalizeImdbId('https://www.imdb.com/title/tt1375666/', 'tt')).toBe('tt1375666'); + expect(normalizeImdbId('https://m.imdb.com/title/tt1375666/', 'tt')).toBe('tt1375666'); + expect(normalizeImdbId('https://www.imdb.com/de/title/tt1375666/?ref_=nv_sr_srsg_0', 'tt')).toBe('tt1375666'); + expect(normalizeImdbId('https://www.imdb.com/name/nm0634240/', 'nm')).toBe('nm0634240'); + }); + + it('throws on invalid or mismatched ids', () => { + expect(() => normalizeImdbId('invalid', 'tt')).toThrow('Invalid IMDb ID'); + expect(() => normalizeImdbId('tt1', 'tt')).toThrow('Invalid IMDb ID'); + expect(() => normalizeImdbId('nm0634240', 'tt')).toThrow('Invalid IMDb ID'); + }); +}); + +describe('formatDuration', () => { + it('converts ISO-8601 durations to a short human format', () => { + expect(formatDuration('PT2H28M')).toBe('2h 28m'); + expect(formatDuration('PT1H')).toBe('1h'); + expect(formatDuration('PT45M')).toBe('45m'); + expect(formatDuration('PT2H')).toBe('2h'); + }); + + it('returns an empty string for invalid input', () => { + expect(formatDuration('')).toBe(''); + expect(formatDuration('invalid')).toBe(''); + }); +}); + +describe('forceEnglishUrl', () => { + it('adds the English language parameter', () => { + expect(forceEnglishUrl('https://www.imdb.com/title/tt1375666/')).toContain('language=en-US'); + }); + + it('preserves existing query parameters', () => { + const result = forceEnglishUrl('https://www.imdb.com/title/tt1375666/?ref_=nv'); + expect(result).toContain('language=en-US'); + expect(result).toContain('ref_=nv'); + }); +}); + +describe('normalizeImdbTitleType', () => { + it('maps internal imdb ids to readable labels', () => { + expect(normalizeImdbTitleType({ id: 'movie', text: '' })).toBe('Movie'); + expect(normalizeImdbTitleType({ id: 'tvSeries', text: '' })).toBe('TV Series'); + expect(normalizeImdbTitleType('short')).toBe('Short'); + }); + + it('preserves explicit text labels', () => { + expect(normalizeImdbTitleType({ id: 'movie', text: 'Feature Film' })).toBe('Feature Film'); + }); +}); + +describe('extractJsonLd', () => { + it('returns the evaluated JSON-LD payload', async () => { + const page = { + evaluate: vi.fn().mockResolvedValue({ '@type': 'Movie', name: 'Inception' }), + } as unknown as IPage; + + await expect(extractJsonLd(page, 'Movie')).resolves.toEqual({ '@type': 'Movie', name: 'Inception' }); + expect(page.evaluate).toHaveBeenCalledTimes(1); + expect(page.evaluate).toHaveBeenCalledWith(expect.stringContaining('"Movie"')); + }); +}); + +describe('isChallengePage', () => { + it('returns true when the page evaluation matches a challenge', async () => { + const page = { + evaluate: vi.fn().mockResolvedValue(true), + } as unknown as IPage; + + await expect(isChallengePage(page)).resolves.toBe(true); + expect(page.evaluate).toHaveBeenCalledTimes(1); + }); +}); + +describe('imdb browser helpers', () => { + it('reads the current imdb id from page metadata', async () => { + const page = { + evaluate: vi.fn().mockResolvedValue('nm0634240'), + } as unknown as IPage; + + await expect(getCurrentImdbId(page, 'nm')).resolves.toBe('nm0634240'); + expect(page.evaluate).toHaveBeenCalledTimes(1); + }); + + it('wait helpers resolve mocked readiness booleans', async () => { + const page = { + evaluate: vi.fn().mockResolvedValue(true), + } as unknown as IPage; + + await expect(waitForImdbPath(page, '^/find/?$')).resolves.toBe(true); + await expect(waitForImdbSearchReady(page)).resolves.toBe(true); + await expect(waitForImdbReviewsReady(page)).resolves.toBe(true); + expect(page.evaluate).toHaveBeenCalledTimes(3); + }); +}); diff --git a/src/clis/imdb/utils.ts b/src/clis/imdb/utils.ts new file mode 100644 index 00000000..d1a23dc0 --- /dev/null +++ b/src/clis/imdb/utils.ts @@ -0,0 +1,305 @@ +import { ArgumentError } from '../../errors.js'; +import type { IPage } from '../../types.js'; + +/** + * Normalize an IMDb title or person input to a bare ID. + * Accepts bare IDs, desktop URLs, mobile URLs, and URLs with language prefixes or query params. + */ +export function normalizeImdbId(input: string, prefix: 'tt' | 'nm'): string { + const trimmed = input.trim(); + const barePattern = new RegExp(`^${prefix}\\d{7,8}$`); + if (barePattern.test(trimmed)) { + return trimmed; + } + + const pathPattern = new RegExp(`/(?:[a-z]{2}/)?(?:title|name)/(${prefix}\\d{7,8})(?:[/?#]|$)`, 'i'); + const pathMatch = trimmed.match(pathPattern); + if (pathMatch) { + return pathMatch[1]; + } + + throw new ArgumentError( + `Invalid IMDb ID: "${input}"`, + `Expected ${prefix === 'tt' ? 'title' : 'name'} ID like ${prefix === 'tt' ? 'tt1375666' : 'nm0634240'} or an IMDb URL`, + ); +} + +/** + * Convert an ISO 8601 duration string to a short human-readable format for table display. + * Example: PT2H28M -> 2h 28m. + */ +export function formatDuration(iso: string): string { + if (!iso) { + return ''; + } + + const match = iso.match(/^PT(?:(\d+)H)?(?:(\d+)M)?$/); + if (!match) { + return ''; + } + + const parts: string[] = []; + if (match[1]) { + parts.push(`${match[1]}h`); + } + if (match[2]) { + parts.push(`${match[2]}m`); + } + return parts.join(' '); +} + +/** + * Force an IMDb page URL to use the English language parameter, + * reducing structural differences across localized pages. + */ +export function forceEnglishUrl(url: string): string { + const parsed = new URL(url); + parsed.searchParams.set('language', 'en-US'); + return parsed.toString(); +} + +/** + * Normalize IMDb title-type payloads that may be represented as an object, + * a raw string, or an empty text field with only an internal id. + */ +export function normalizeImdbTitleType(input: unknown): string { + const raw = (() => { + if (typeof input === 'string') return input; + if (!input || typeof input !== 'object') return ''; + const value = input as Record; + return typeof value.text === 'string' && value.text.trim() + ? value.text + : typeof value.id === 'string' + ? value.id + : ''; + })().trim(); + + if (!raw) return ''; + + const known: Record = { + movie: 'Movie', + short: 'Short', + video: 'Video', + tvEpisode: 'TV Episode', + tvMiniSeries: 'TV Mini Series', + tvMovie: 'TV Movie', + tvSeries: 'TV Series', + tvShort: 'TV Short', + tvSpecial: 'TV Special', + videoGame: 'Video Game', + }; + + return known[raw] ?? raw; +} + +/** + * Extract structured JSON-LD data from the page. + * Accepts a single type string or an array of types to match against @type. + */ +export async function extractJsonLd(page: IPage, type?: string | string[]): Promise | null> { + const filterTypes = type ? (Array.isArray(type) ? type : [type]) : []; + return page.evaluate(` + (function() { + var scripts = document.querySelectorAll('script[type="application/ld+json"]'); + var wantedTypes = ${JSON.stringify(filterTypes)}; + + function matchesType(data) { + if (wantedTypes.length === 0) { + return true; + } + if (!data || typeof data !== 'object') { + return false; + } + if (wantedTypes.indexOf(data['@type']) !== -1) { + return true; + } + if (Array.isArray(data['@type'])) { + for (var t = 0; t < data['@type'].length; t++) { + if (wantedTypes.indexOf(data['@type'][t]) !== -1) return true; + } + } + return false; + } + + function findMatch(data) { + if (Array.isArray(data)) { + for (var i = 0; i < data.length; i++) { + var itemMatch = findMatch(data[i]); + if (itemMatch) { + return itemMatch; + } + } + return null; + } + + if (!data || typeof data !== 'object') { + return null; + } + + if (matchesType(data)) { + return data; + } + + if (Array.isArray(data['@graph'])) { + return findMatch(data['@graph']); + } + + return null; + } + + for (var i = 0; i < scripts.length; i++) { + try { + var parsed = JSON.parse(scripts[i].textContent || 'null'); + var match = findMatch(parsed); + if (match) { + return match; + } + } catch (error) { + void error; + } + } + + return null; + })() + `); +} + +/** + * Poll until the current IMDb page path matches the expected entity/search path. + */ +export async function waitForImdbPath(page: IPage, pathPattern: string, timeoutMs: number = 15000): Promise { + const result = await page.evaluate(` + (async function() { + var deadline = Date.now() + ${timeoutMs}; + var pattern = new RegExp(${JSON.stringify(pathPattern)}, 'i'); + while (Date.now() < deadline) { + if (pattern.test(window.location.pathname)) { + return true; + } + await new Promise(function(resolve) { setTimeout(resolve, 250); }); + } + return pattern.test(window.location.pathname); + })() + `); + return Boolean(result); +} + +/** + * Wait until IMDb search results (or the search UI state) has rendered. + */ +export async function waitForImdbSearchReady(page: IPage, timeoutMs: number = 15000): Promise { + const result = await page.evaluate(` + (async function() { + var deadline = Date.now() + ${timeoutMs}; + + function hasSearchResults() { + var nextDataEl = document.getElementById('__NEXT_DATA__'); + if (nextDataEl) { + try { + var nextData = JSON.parse(nextDataEl.textContent || 'null'); + var pageProps = nextData && nextData.props && nextData.props.pageProps; + var titleResults = (pageProps && pageProps.titleResults && pageProps.titleResults.results) || []; + var nameResults = (pageProps && pageProps.nameResults && pageProps.nameResults.results) || []; + if (titleResults.length > 0 || nameResults.length > 0) { + return true; + } + } catch (error) { + void error; + } + } + + if (document.querySelector('a[href*="/title/"], a[href*="/name/"]')) { + return true; + } + + var body = document.body ? (document.body.textContent || '') : ''; + return body.includes('No results found for') || body.includes('No exact matches'); + } + + while (Date.now() < deadline) { + if (hasSearchResults()) { + return true; + } + await new Promise(function(resolve) { setTimeout(resolve, 250); }); + } + + return hasSearchResults(); + })() + `); + return Boolean(result); +} + +/** + * Wait until IMDb review cards (or the page review summary) has rendered. + */ +export async function waitForImdbReviewsReady(page: IPage, timeoutMs: number = 15000): Promise { + const result = await page.evaluate(` + (async function() { + var deadline = Date.now() + ${timeoutMs}; + + function hasReviewContent() { + if (document.querySelector('article.user-review-item, [data-testid="review-card-parent"], [data-testid="tturv-total-reviews"]')) { + return true; + } + var body = document.body ? (document.body.textContent || '') : ''; + return body.includes('No user reviews') || body.includes('Review this title'); + } + + while (Date.now() < deadline) { + if (hasReviewContent()) { + return true; + } + await new Promise(function(resolve) { setTimeout(resolve, 250); }); + } + + return hasReviewContent(); + })() + `); + return Boolean(result); +} + +/** + * Read the current IMDb entity id from the page URL/canonical metadata. + */ +export async function getCurrentImdbId(page: IPage, prefix: 'tt' | 'nm'): Promise { + const result = await page.evaluate(` + (function() { + var pattern = new RegExp('(${prefix}\\\\d{7,8})', 'i'); + var candidates = [ + window.location.pathname || '', + document.querySelector('link[rel="canonical"]')?.getAttribute('href') || '', + document.querySelector('meta[property="og:url"]')?.getAttribute('content') || '' + ]; + + for (var i = 0; i < candidates.length; i++) { + var match = candidates[i].match(pattern); + if (match) { + return match[1]; + } + } + return ''; + })() + `); + + return typeof result === 'string' ? result : ''; +} + +/** + * Detect whether the current page is an IMDb bot-challenge or verification page. + */ +export async function isChallengePage(page: IPage): Promise { + const result = await page.evaluate(` + (function() { + var title = document.title || ''; + var body = document.body ? (document.body.textContent || '') : ''; + return title.includes('Robot Check') || + title.includes('Are you a robot') || + title.includes('JavaScript is disabled') || + body.includes('captcha') || + body.includes('verify that you are human') || + body.includes('not a robot'); + })() + `); + + return Boolean(result); +} diff --git a/tests/e2e/browser-public.test.ts b/tests/e2e/browser-public.test.ts index d185943f..c8922d14 100644 --- a/tests/e2e/browser-public.test.ts +++ b/tests/e2e/browser-public.test.ts @@ -1,5 +1,5 @@ /** - * E2E tests for core browser commands (bilibili, zhihu, v2ex). + * E2E tests for core browser commands (bilibili, zhihu, v2ex, IMDb). * These use OPENCLI_HEADLESS=1 to launch a headless Chromium. * * NOTE: Some sites may block headless browsers with bot detection. @@ -7,7 +7,7 @@ */ import { describe, it, expect } from 'vitest'; -import { runCli, parseJsonOutput } from './helpers.js'; +import { runCli, parseJsonOutput, type CliResult } from './helpers.js'; async function tryBrowserCommand(args: string[]): Promise { const { stdout, code } = await runCli(args, { timeout: 60_000 }); @@ -28,13 +28,47 @@ function expectDataOrSkip(data: any[] | null, label: string) { expect(data.length).toBeGreaterThanOrEqual(1); } +function isImdbChallenge(result: CliResult): boolean { + const text = `${result.stderr}\n${result.stdout}`; + return /IMDb blocked this request|Robot Check|Are you a robot|verify that you are human|captcha/i.test(text); +} + +function isBrowserBridgeUnavailable(result: CliResult): boolean { + const text = `${result.stderr}\n${result.stdout}`; + return /Browser Extension is not connected|Browser Bridge extension.*not connected|Daemon is running but the Browser Extension is not connected/i.test(text); +} + +async function expectImdbDataOrChallengeSkip(args: string[], label: string): Promise { + const result = await runCli(args, { timeout: 60_000 }); + if (result.code !== 0) { + if (isImdbChallenge(result)) { + console.warn(`${label}: skipped — IMDb challenge page detected`); + return null; + } + if (isBrowserBridgeUnavailable(result)) { + console.warn(`${label}: skipped — Browser Bridge extension is unavailable in this environment`); + return null; + } + throw new Error(`${label} failed:\n${result.stderr || result.stdout}`); + } + + const data = parseJsonOutput(result.stdout); + if (!Array.isArray(data)) { + throw new Error(`${label} returned non-array JSON:\n${result.stdout.slice(0, 500)}`); + } + if (data.length === 0) { + throw new Error(`${label} returned an empty result`); + } + return data; +} + describe('browser public-data commands E2E', () => { // ── bilibili ── it('bilibili hot returns trending videos', async () => { const data = await tryBrowserCommand(['bilibili', 'hot', '--limit', '5', '-f', 'json']); expectDataOrSkip(data, 'bilibili hot'); - if (data) { + if (data?.length) { expect(data[0]).toHaveProperty('title'); } }, 60_000); @@ -53,7 +87,7 @@ describe('browser public-data commands E2E', () => { it('zhihu hot returns trending questions', async () => { const data = await tryBrowserCommand(['zhihu', 'hot', '--limit', '5', '-f', 'json']); expectDataOrSkip(data, 'zhihu hot'); - if (data) { + if (data?.length) { expect(data[0]).toHaveProperty('title'); } }, 60_000); @@ -68,4 +102,28 @@ describe('browser public-data commands E2E', () => { const data = await tryBrowserCommand(['v2ex', 'daily', '--limit', '3', '-f', 'json']); expectDataOrSkip(data, 'v2ex daily'); }, 60_000); + + // ── imdb ── + it('imdb top returns chart data', async () => { + const data = await expectImdbDataOrChallengeSkip(['imdb', 'top', '--limit', '3', '-f', 'json'], 'imdb top'); + if (data?.length) { + expect(data[0]).toHaveProperty('title'); + } + }, 60_000); + + it('imdb search returns results', async () => { + const data = await expectImdbDataOrChallengeSkip(['imdb', 'search', 'inception', '--limit', '3', '-f', 'json'], 'imdb search'); + if (data?.length) { + expect(data[0]).toHaveProperty('id'); + expect(data[0]).toHaveProperty('title'); + } + }, 60_000); + + it('imdb title returns movie details', async () => { + const data = await expectImdbDataOrChallengeSkip(['imdb', 'title', 'tt1375666', '-f', 'json'], 'imdb title'); + if (data?.length) { + expect(data[0]).toHaveProperty('field'); + expect(data[0]).toHaveProperty('value'); + } + }, 60_000); }); diff --git a/vitest.config.ts b/vitest.config.ts index 92c6318d..023b6543 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -18,6 +18,7 @@ export default defineConfig({ name: 'adapter', include: [ 'src/clis/bilibili/**/*.test.ts', + 'src/clis/imdb/**/*.test.ts', 'src/clis/jd/**/*.test.ts', 'src/clis/linux-do/**/*.test.ts', 'src/clis/xiaohongshu/**/*.test.ts',