diff --git a/cli-manifest.json b/cli-manifest.json index fea0100f5..fd8826ad4 100644 --- a/cli-manifest.json +++ b/cli-manifest.json @@ -7825,6 +7825,202 @@ "modulePath": "jimeng/workspaces.js", "sourceFile": "jimeng/workspaces.js" }, + { + "site": "ke", + "name": "chengjiao", + "description": "贝壳找房成交记录", + "domain": "ke.com", + "strategy": "cookie", + "browser": true, + "args": [ + { + "name": "city", + "type": "str", + "default": "bj", + "required": false, + "help": "城市代码,如 bj(北京), sh(上海), gz(广州), sz(深圳), zs(中山)" + }, + { + "name": "district", + "type": "str", + "required": false, + "help": "区域拼音,如 chaoyang, haidian" + }, + { + "name": "limit", + "type": "int", + "default": 20, + "required": false, + "help": "返回数量" + } + ], + "columns": [ + "title", + "community", + "layout", + "area", + "deal_price", + "unit_price", + "deal_date" + ], + "type": "js", + "modulePath": "ke/chengjiao.js", + "sourceFile": "ke/chengjiao.js" + }, + { + "site": "ke", + "name": "ershoufang", + "description": "贝壳找房二手房列表", + "domain": "ke.com", + "strategy": "cookie", + "browser": true, + "args": [ + { + "name": "city", + "type": "str", + "default": "bj", + "required": false, + "help": "城市代码,如 bj(北京), sh(上海), gz(广州), sz(深圳), zs(中山)" + }, + { + "name": "district", + "type": "str", + "required": false, + "help": "区域拼音,如 chaoyang, haidian, tianhe" + }, + { + "name": "min-price", + "type": "int", + "required": false, + "help": "最低总价(万元)" + }, + { + "name": "max-price", + "type": "int", + "required": false, + "help": "最高总价(万元)" + }, + { + "name": "rooms", + "type": "int", + "required": false, + "help": "几居室 (1-5)" + }, + { + "name": "limit", + "type": "int", + "default": 20, + "required": false, + "help": "返回数量" + } + ], + "columns": [ + "title", + "community", + "layout", + "area", + "direction", + "total_price", + "unit_price", + "url" + ], + "type": "js", + "modulePath": "ke/ershoufang.js", + "sourceFile": "ke/ershoufang.js" + }, + { + "site": "ke", + "name": "xiaoqu", + "description": "贝壳找房小区列表", + "domain": "ke.com", + "strategy": "cookie", + "browser": true, + "args": [ + { + "name": "city", + "type": "str", + "default": "bj", + "required": false, + "help": "城市代码,如 bj(北京), sh(上海), gz(广州), sz(深圳), zs(中山)" + }, + { + "name": "district", + "type": "str", + "required": false, + "help": "区域拼音,如 chaoyang, haidian" + }, + { + "name": "limit", + "type": "int", + "default": 20, + "required": false, + "help": "返回数量" + } + ], + "columns": [ + "name", + "district", + "avg_price", + "year", + "on_sale" + ], + "type": "js", + "modulePath": "ke/xiaoqu.js", + "sourceFile": "ke/xiaoqu.js" + }, + { + "site": "ke", + "name": "zufang", + "description": "贝壳找房租房列表", + "domain": "ke.com", + "strategy": "cookie", + "browser": true, + "args": [ + { + "name": "city", + "type": "str", + "default": "bj", + "required": false, + "help": "城市代码,如 bj(北京), sh(上海), gz(广州), sz(深圳), zs(中山)" + }, + { + "name": "district", + "type": "str", + "required": false, + "help": "区域拼音,如 chaoyang, haidian" + }, + { + "name": "min-price", + "type": "int", + "required": false, + "help": "最低月租(元)" + }, + { + "name": "max-price", + "type": "int", + "required": false, + "help": "最高月租(元)" + }, + { + "name": "limit", + "type": "int", + "default": 20, + "required": false, + "help": "返回数量" + } + ], + "columns": [ + "title", + "community", + "area", + "layout", + "price", + "url" + ], + "type": "js", + "modulePath": "ke/zufang.js", + "sourceFile": "ke/zufang.js" + }, { "site": "lesswrong", "name": "comments", diff --git a/clis/ke/chengjiao.js b/clis/ke/chengjiao.js new file mode 100644 index 000000000..1415fa673 --- /dev/null +++ b/clis/ke/chengjiao.js @@ -0,0 +1,77 @@ +import { cli, Strategy } from '@jackwener/opencli/registry'; +import { cityUrl, gotoKe } from './utils.js'; + +cli({ + site: 'ke', + name: 'chengjiao', + description: '贝壳找房成交记录', + domain: 'ke.com', + strategy: Strategy.COOKIE, + browser: true, + args: [ + { name: 'city', default: 'bj', help: '城市代码,如 bj(北京), sh(上海), gz(广州), sz(深圳), zs(中山)' }, + { name: 'district', help: '区域拼音,如 chaoyang, haidian' }, + { name: 'limit', type: 'int', default: 20, help: '返回数量' }, + ], + columns: ['title', 'community', 'layout', 'area', 'deal_price', 'unit_price', 'deal_date'], + func: async (page, kwargs) => { + const city = kwargs.city || 'bj'; + const limit = Number(kwargs.limit) || 20; + const base = cityUrl(city); + + let path = '/chengjiao/'; + if (kwargs.district) { + path = `/chengjiao/${kwargs.district}/`; + } + + await gotoKe(page, base + path); + + const items = await page.evaluate(`(async () => { + // chengjiao page uses .listContent li or similar structure + const selectors = [ + '.listContent li', + 'ul.listContent li', + '.sellListContent li.clear', + 'li.clear', + ]; + let cards = []; + for (const sel of selectors) { + cards = document.querySelectorAll(sel); + if (cards.length > 0) break; + } + + const results = []; + for (const card of cards) { + const titleEl = card.querySelector('.title a, a.VIEWDATA'); + if (!titleEl) continue; + + const houseInfoEl = card.querySelector('.houseInfo'); + const communityEl = card.querySelector('.positionInfo a'); + const priceEl = card.querySelector('.totalPrice span'); + const unitPriceEl = card.querySelector('.unitPrice span'); + const dateEl = card.querySelector('.dealDate'); + const dealCycleEl = card.querySelector('.dealCycleTxt span'); + + const houseText = (houseInfoEl ? houseInfoEl.textContent : '').replace(/\\s+/g, ' ').trim(); + const houseParts = houseText.split('|').map(s => s.trim()); + + const layoutMatch = (houseParts[0] || '').match(/(\\d室\\d厅)/); + const layout = layoutMatch ? layoutMatch[1] : (houseParts[0] || ''); + + results.push({ + title: (titleEl.textContent || '').trim(), + url: titleEl.href || '', + community: (communityEl ? communityEl.textContent : '').trim(), + layout: layout, + area: (houseParts[1] || '').trim(), + deal_price: ((priceEl ? priceEl.textContent : '').trim() || '') + '万', + unit_price: (unitPriceEl ? unitPriceEl.textContent : '').trim(), + deal_date: (dateEl ? dateEl.textContent : '').replace(/\\s+/g, ' ').trim(), + }); + } + return results; +})()`); + + return (items || []).slice(0, limit); + }, +}); diff --git a/clis/ke/ershoufang.js b/clis/ke/ershoufang.js new file mode 100644 index 000000000..785479abc --- /dev/null +++ b/clis/ke/ershoufang.js @@ -0,0 +1,100 @@ +import { cli, Strategy } from '@jackwener/opencli/registry'; +import { cityUrl, gotoKe } from './utils.js'; + +cli({ + site: 'ke', + name: 'ershoufang', + description: '贝壳找房二手房列表', + domain: 'ke.com', + strategy: Strategy.COOKIE, + browser: true, + args: [ + { name: 'city', default: 'bj', help: '城市代码,如 bj(北京), sh(上海), gz(广州), sz(深圳), zs(中山)' }, + { name: 'district', help: '区域拼音,如 chaoyang, haidian, tianhe' }, + { name: 'min-price', type: 'int', help: '最低总价(万元)' }, + { name: 'max-price', type: 'int', help: '最高总价(万元)' }, + { name: 'rooms', type: 'int', help: '几居室 (1-5)' }, + { name: 'limit', type: 'int', default: 20, help: '返回数量' }, + ], + columns: ['title', 'community', 'layout', 'area', 'direction', 'total_price', 'unit_price', 'url'], + func: async (page, kwargs) => { + const city = kwargs.city || 'bj'; + const limit = Number(kwargs.limit) || 20; + const base = cityUrl(city); + + let path = '/ershoufang/'; + if (kwargs.district) { + path = `/ershoufang/${kwargs.district}/`; + } + + const priceParts = []; + if (kwargs['min-price'] || kwargs['max-price']) { + const min = kwargs['min-price'] || ''; + const max = kwargs['max-price'] || ''; + priceParts.push(`p${min}t${max}`); + } + + const roomParts = []; + if (kwargs.rooms) { + roomParts.push(`l${kwargs.rooms}`); + } + + const filters = [...priceParts, ...roomParts].join(''); + const url = base + path + (filters ? filters + '/' : ''); + + await gotoKe(page, url); + + const items = await page.evaluate(`(async () => { + const cards = document.querySelectorAll('.sellListContent li.clear'); + const results = []; + for (const card of cards) { + const titleEl = card.querySelector('.title a'); + const communityEl = card.querySelector('.positionInfo a'); + const houseInfoEl = card.querySelector('.houseInfo'); + const priceEl = card.querySelector('.totalPrice span'); + const unitPriceEl = card.querySelector('.unitPrice span'); + + if (!titleEl) continue; + + // houseInfo text varies: + // "中楼层 (共24层) 4室2厅 | 133.99平米 | 东南" + // "高楼层 (共32层) | 2022年 | 4室2厅 | 110平米" + const houseText = (houseInfoEl ? houseInfoEl.textContent : '').replace(/\\s+/g, ' ').trim(); + const houseParts = houseText.split('|').map(s => s.trim()); + + // Extract structured fields from all parts + let layout = '', area = '', direction = '', floor = ''; + for (const part of houseParts) { + if (/\\d室\\d厅/.test(part)) { + layout = part.match(/(\\d室\\d厅)/)[1]; + } else if (/平米|㎡/.test(part)) { + area = part; + } else if (/^[东南西北]+$/.test(part.replace(/\\s/g, ''))) { + direction = part; + } else if (/楼层/.test(part)) { + floor = part; + } + } + // layout might be embedded in the floor part: "中楼层 (共24层) 4室2厅" + if (!layout) { + const m = houseText.match(/(\\d室\\d厅)/); + if (m) layout = m[1]; + } + + results.push({ + title: (titleEl.textContent || '').trim(), + url: titleEl.href || '', + community: (communityEl ? communityEl.textContent : '').trim(), + layout: layout, + area: area, + direction: direction, + total_price: ((priceEl ? priceEl.textContent : '').trim() || '') + '万', + unit_price: (unitPriceEl ? unitPriceEl.textContent : '').trim(), + }); + } + return results; +})()`); + + return (items || []).slice(0, limit); + }, +}); diff --git a/clis/ke/utils.js b/clis/ke/utils.js new file mode 100644 index 000000000..a0958c37d --- /dev/null +++ b/clis/ke/utils.js @@ -0,0 +1,104 @@ +import { AuthRequiredError, CommandExecutionError } from '@jackwener/opencli/errors'; + +const CAPTCHA_TEXT_PATTERNS = [ + '请拖动下方滑块完成验证', + '请按住滑块', + '验证码', + '安全验证', + '访问验证', + '滑动验证', +]; + +const LOGIN_TEXT_PATTERNS = [ + '请登录', + '登录后', + '账号登录', + '手机登录', + '立即登录', + '扫码登录', +]; + +function cleanText(value) { + return typeof value === 'string' + ? value.replace(/\u00a0/g, ' ').replace(/\s+/g, ' ').trim() + : ''; +} + +export async function readPageState(page) { + const result = await page.evaluate(` + (() => { + try { + return { + href: window.location.href || '', + title: document.title || '', + body_text: document.body ? (document.body.innerText || '').substring(0, 2000) : '', + }; + } catch(e) { + return { href: '', title: '', body_text: '' }; + } + })() + `); + if (!result) { + return { href: '', title: '', body_text: '' }; + } + return { + href: cleanText(result.href), + title: cleanText(result.title), + body_text: cleanText(result.body_text), + }; +} + +export function assertNotBlocked(state) { + const { href, title, body_text } = state; + if (href.includes('hip.ke.com/captcha') || href.includes('/captcha')) { + throw new AuthRequiredError('ke.com', '触发了验证码,请先在浏览器中完成验证'); + } + if (CAPTCHA_TEXT_PATTERNS.some(p => title.includes(p) || body_text.includes(p))) { + throw new AuthRequiredError('ke.com', '触发了验证码,请先在浏览器中完成滑块验证'); + } + if (LOGIN_TEXT_PATTERNS.some(p => title.includes(p))) { + throw new AuthRequiredError('ke.com', '未登录,请先在浏览器中登录贝壳找房'); + } +} + +export async function gotoKe(page, url) { + await page.goto(url, { settleMs: 2500 }); + await page.wait(2); + const state = await readPageState(page); + assertNotBlocked(state); + return state; +} + +/** + * Fetch a ke.com JSON API from inside the browser context (credentials included). + */ +export async function fetchKeJson(page, url) { + const result = await page.evaluate(`(async () => { + const res = await fetch(${JSON.stringify(url)}, { credentials: 'include' }); + if (!res.ok) return { __keErr: res.status }; + try { + return await res.json(); + } catch { + return { __keErr: 'parse' }; + } + })()`); + const r = result; + if (r?.__keErr !== undefined) { + const code = r.__keErr; + if (code === 401 || code === 403) { + throw new AuthRequiredError('ke.com', '未登录或登录已过期,请先在浏览器中登录贝壳找房'); + } + if (code === 'parse') { + throw new CommandExecutionError('响应不是有效 JSON', '可能触发了风控,请检查登录状态或稍后重试'); + } + throw new CommandExecutionError(`HTTP ${code}`, '请检查网络连接或登录状态'); + } + return result; +} + +/** + * Build a ke.com city URL prefix. Default city is 'bj' (Beijing). + */ +export function cityUrl(city) { + return `https://${city}.ke.com`; +} diff --git a/clis/ke/xiaoqu.js b/clis/ke/xiaoqu.js new file mode 100644 index 000000000..54cd2694b --- /dev/null +++ b/clis/ke/xiaoqu.js @@ -0,0 +1,77 @@ +import { cli, Strategy } from '@jackwener/opencli/registry'; +import { cityUrl, gotoKe } from './utils.js'; + +cli({ + site: 'ke', + name: 'xiaoqu', + description: '贝壳找房小区列表', + domain: 'ke.com', + strategy: Strategy.COOKIE, + browser: true, + args: [ + { name: 'city', default: 'bj', help: '城市代码,如 bj(北京), sh(上海), gz(广州), sz(深圳), zs(中山)' }, + { name: 'district', help: '区域拼音,如 chaoyang, haidian' }, + { name: 'limit', type: 'int', default: 20, help: '返回数量' }, + ], + columns: ['name', 'district', 'avg_price', 'year', 'on_sale'], + func: async (page, kwargs) => { + const city = kwargs.city || 'bj'; + const limit = Number(kwargs.limit) || 20; + const base = cityUrl(city); + + let path = '/xiaoqu/'; + if (kwargs.district) { + path = `/xiaoqu/${kwargs.district}/`; + } + + await gotoKe(page, base + path); + + const items = await page.evaluate(`(async () => { + const selectors = [ + '.xiaoquListItem', + 'li.xiaoquListItem', + '.listContent li', + 'ul.listContent li', + ]; + let cards = []; + for (const sel of selectors) { + cards = document.querySelectorAll(sel); + if (cards.length > 0) break; + } + + const results = []; + for (const card of cards) { + // Name is in a.img[title] or .title a + const imgLink = card.querySelector('a.img[title], a[title]'); + const titleLink = card.querySelector('.title a'); + const nameEl = titleLink || imgLink; + if (!nameEl) continue; + + const name = (titleLink ? titleLink.textContent : imgLink.getAttribute('title')) || ''; + const url = nameEl.href || ''; + + const priceEl = card.querySelector('.totalPrice span'); + const districtEl = card.querySelector('.positionInfo a, .district a'); + const infoEl = card.querySelector('.positionInfo, .houseInfo, .xiaoquListItemInfo'); + const saleEl = card.querySelector('.xiaoquListItemSellCount a, .houseInfo a'); + + const infoText = infoEl ? infoEl.textContent : ''; + const yearMatch = infoText.match(/(\\d{4})年/); + + const priceText = (priceEl ? priceEl.textContent : '').trim(); + + results.push({ + name: name.trim(), + url: url, + district: (districtEl ? districtEl.textContent : '').trim(), + avg_price: priceText ? priceText + '元/平' : '暂无', + year: yearMatch ? yearMatch[1] : '', + on_sale: (saleEl ? saleEl.textContent : '').trim(), + }); + } + return results; +})()`); + + return (items || []).slice(0, limit); + }, +}); diff --git a/clis/ke/zufang.js b/clis/ke/zufang.js new file mode 100644 index 000000000..0efd11711 --- /dev/null +++ b/clis/ke/zufang.js @@ -0,0 +1,94 @@ +import { cli, Strategy } from '@jackwener/opencli/registry'; +import { gotoKe } from './utils.js'; + +cli({ + site: 'ke', + name: 'zufang', + description: '贝壳找房租房列表', + domain: 'ke.com', + strategy: Strategy.COOKIE, + browser: true, + args: [ + { name: 'city', default: 'bj', help: '城市代码,如 bj(北京), sh(上海), gz(广州), sz(深圳), zs(中山)' }, + { name: 'district', help: '区域拼音,如 chaoyang, haidian' }, + { name: 'min-price', type: 'int', help: '最低月租(元)' }, + { name: 'max-price', type: 'int', help: '最高月租(元)' }, + { name: 'limit', type: 'int', default: 20, help: '返回数量' }, + ], + columns: ['title', 'community', 'area', 'layout', 'price', 'url'], + func: async (page, kwargs) => { + const city = kwargs.city || 'bj'; + const limit = Number(kwargs.limit) || 20; + + let path = '/zufang/'; + if (kwargs.district) { + path = `/zufang/${kwargs.district}/`; + } + + const priceParts = []; + if (kwargs['min-price'] || kwargs['max-price']) { + const min = kwargs['min-price'] || ''; + const max = kwargs['max-price'] || ''; + priceParts.push(`rp${min}t${max}`); + } + const filters = priceParts.join(''); + + const baseUrl = `https://${city}.zu.ke.com`; + const url = baseUrl + path + (filters ? filters + '/' : ''); + + await gotoKe(page, url); + + const items = await page.evaluate(`(async () => { + const allLinks = document.querySelectorAll('a.twoline'); + const results = []; + for (const titleEl of allLinks) { + let card = titleEl.closest('div'); + if (!card) continue; + while (card && card.parentElement && !card.parentElement.classList.contains('content__list')) { + card = card.parentElement; + } + if (!card) continue; + + const title = (titleEl.textContent || '').replace(/\\s+/g, ' ').trim(); + const href = titleEl.getAttribute('href') || ''; + const fullUrl = href.startsWith('http') ? href : '${baseUrl}' + href; + + const allPs = card.querySelectorAll('p'); + let community = '', area = '', layout = ''; + for (const p of allPs) { + if ((p.className || '').indexOf('des') === -1) continue; + const links = p.querySelectorAll('a[title]'); + if (links.length > 0) { + community = (links[links.length - 1].getAttribute('title') || '').trim(); + } + const parts = p.textContent.replace(/\\s+/g, ' ').trim().split('/'); + for (const part of parts) { + const t = part.trim(); + if (/\\u33A1|\\u5E73\\u7C73/.test(t)) area = t; + else if (/\\u5BA4.*\\u5385/.test(t)) layout = t; + } + break; + } + + const emEls = card.querySelectorAll('em'); + let priceText = ''; + for (const em of emEls) { + const t = em.textContent.trim(); + if (/^\\d+$/.test(t)) { priceText = t; break; } + } + + results.push({ + title, + url: fullUrl, + community, + area, + layout, + price: priceText ? priceText + '\\u5143/\\u6708' : '', + }); + } + return results; +})()`); + + return (items || []).slice(0, limit); + }, +});