diff --git a/AGENTS.md b/AGENTS.md
index 3eafcee68..48559bc32 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -246,9 +246,9 @@ Follow existing patterns in `mg-fs-utils` or `mg-tinynews`.
## HTML & XML Parsing
-**Use `@tryghost/mg-utils` for all HTML and XML parsing. Do not use `cheerio`.**
+**Use `@tryghost/mg-utils` for all HTML and XML parsing. Do not use `cheerio` or `jsdom`.**
-See the [`mg-utils` README](packages/mg-utils/README.md) for full API documentation.
+Powered by [linkedom](https://github.com/WebReflection/linkedom) — lightweight and memory-efficient. See the [`mg-utils` README](packages/mg-utils/README.md) for full API documentation.
```javascript
import {xmlUtils, domUtils} from '@tryghost/mg-utils';
@@ -258,10 +258,17 @@ const parsed = await xmlUtils.parseXml(xmlString);
const channel = parsed.rss.channel;
const items = [].concat(channel.item || []); // normalize single/array
-// HTML: parse, manipulate, serialize
-const frag = domUtils.parseFragment(html);
-frag.$('.unwanted').forEach(el => el.remove());
-const output = frag.html();
+// HTML: use processFragment for automatic cleanup
+const output = domUtils.processFragment(html, (frag) => {
+ frag.$('.unwanted').forEach(el => el.remove());
+ return frag.html();
+});
+
+// Async version when the callback needs to await
+const output = await domUtils.processFragmentAsync(html, async (frag) => {
+ // ... async operations ...
+ return frag.html();
+});
```
## Error Handling
diff --git a/packages/mg-blogger/lib/process.js b/packages/mg-blogger/lib/process.js
index af3d39aaa..af7411285 100644
--- a/packages/mg-blogger/lib/process.js
+++ b/packages/mg-blogger/lib/process.js
@@ -360,9 +360,13 @@ const processPosts = async (posts, options) => {
// Filter out falsy items in the post list
posts = posts.filter(i => i);
- return Promise.all(posts.map((post) => {
- return processPost(post, options);
- }));
+ const results = [];
+
+ for (let i = 0; i < posts.length; i++) {
+ results.push(await processPost(posts[i], options));
+ }
+
+ return results;
};
const all = async (input, {options}) => {
diff --git a/packages/mg-chorus/lib/processor.js b/packages/mg-chorus/lib/processor.js
index 7f99e42be..8006e93b9 100644
--- a/packages/mg-chorus/lib/processor.js
+++ b/packages/mg-chorus/lib/processor.js
@@ -120,7 +120,14 @@ const processPost = (data, options) => {
};
const processPosts = (posts, options) => {
- return posts.map(post => processPost(post, options));
+ const results = [];
+ for (let i = 0; i < posts.length; i++) {
+ const post = posts[i];
+ if (post) {
+ results.push(processPost(post, options));
+ }
+ }
+ return results;
};
const all = ({result, options}) => {
diff --git a/packages/mg-curated-export/lib/process.js b/packages/mg-curated-export/lib/process.js
index 914d53d26..b72927418 100644
--- a/packages/mg-curated-export/lib/process.js
+++ b/packages/mg-curated-export/lib/process.js
@@ -32,7 +32,13 @@ export default (input, ctx) => {
});
if (input.posts && input.posts.length > 0) {
- output.posts = input.posts.map(post => processPost(post.json, globalUser, tags, ctx));
+ output.posts = [];
+ for (let i = 0; i < input.posts.length; i++) {
+ const post = input.posts[i];
+ if (post) {
+ output.posts.push(processPost(post.json, globalUser, tags, ctx));
+ }
+ }
}
return output;
diff --git a/packages/mg-ghost-api/lib/processor.js b/packages/mg-ghost-api/lib/processor.js
index 161df21a7..231cc3085 100644
--- a/packages/mg-ghost-api/lib/processor.js
+++ b/packages/mg-ghost-api/lib/processor.js
@@ -47,7 +47,14 @@ const processPost = (ghPost) => {
};
const processPosts = (posts) => {
- return posts.map(post => processPost(post));
+ const results = [];
+ for (let i = 0; i < posts.length; i++) {
+ const post = posts[i];
+ if (post) {
+ results.push(processPost(post));
+ }
+ }
+ return results;
};
const processAuthor = (ghAuthor) => {
diff --git a/packages/mg-jekyll-export/lib/process.js b/packages/mg-jekyll-export/lib/process.js
index 1e4c3e096..37ab33dae 100644
--- a/packages/mg-jekyll-export/lib/process.js
+++ b/packages/mg-jekyll-export/lib/process.js
@@ -16,7 +16,13 @@ export default (input, options = {}) => {
};
if (input.posts && input.posts.length > 0) {
- output.posts = input.posts.map(post => processPost(post.fileName, post.fileContents, globalUser, options));
+ output.posts = [];
+ for (let i = 0; i < input.posts.length; i++) {
+ const post = input.posts[i];
+ if (post) {
+ output.posts.push(processPost(post.fileName, post.fileContents, globalUser, options));
+ }
+ }
}
return output;
diff --git a/packages/mg-letterdrop/lib/processor.js b/packages/mg-letterdrop/lib/processor.js
index ea7c21031..ebc7584f5 100644
--- a/packages/mg-letterdrop/lib/processor.js
+++ b/packages/mg-letterdrop/lib/processor.js
@@ -168,7 +168,14 @@ const processPost = (data, options) => {
};
const processPosts = (posts, options) => {
- return posts.map(post => processPost(post, options));
+ const results = [];
+ for (let i = 0; i < posts.length; i++) {
+ const post = posts[i];
+ if (post) {
+ results.push(processPost(post, options));
+ }
+ }
+ return results;
};
const all = ({result, options}) => {
diff --git a/packages/mg-letterdrop/test/processor.test.js b/packages/mg-letterdrop/test/processor.test.js
index 3f7274faf..b627c9865 100644
--- a/packages/mg-letterdrop/test/processor.test.js
+++ b/packages/mg-letterdrop/test/processor.test.js
@@ -1,15 +1,18 @@
import assert from 'node:assert/strict';
+import {readFileSync} from 'node:fs';
+import {dirname, join} from 'node:path';
import {describe, it} from 'node:test';
-import {createRequire} from 'node:module';
+import {fileURLToPath} from 'node:url';
import processor from '../lib/processor.js';
-const require = createRequire(import.meta.url);
-const fixture = require('./fixtures/api-response.json');
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const fixtureData = JSON.parse(readFileSync(join(__dirname, 'fixtures/api-response.json'), 'utf8'));
+const fixture = () => structuredClone(fixtureData);
describe('Process', function () {
it('Can convert a single post', function () {
const ctx = {
- result: fixture,
+ result: fixture(),
options: {
url: 'https://example.com',
addPrimaryTag: 'Newsletter',
@@ -68,7 +71,7 @@ describe('Process', function () {
it('Converts signup iframes to Portal links', function () {
const ctx = {
- result: fixture,
+ result: fixture(),
options: {
url: 'https://example.com',
addPrimaryTag: 'Newsletter',
diff --git a/packages/mg-libsyn/lib/processor.js b/packages/mg-libsyn/lib/processor.js
index b92e1d14c..ea8fbcba5 100644
--- a/packages/mg-libsyn/lib/processor.js
+++ b/packages/mg-libsyn/lib/processor.js
@@ -167,7 +167,14 @@ const processPost = (libsynPost, author, tags, options, errors) => { // eslint-d
return post;
};
const processPosts = (posts, author, tags, options, errors) => { // eslint-disable-line no-shadow
- return posts.map(post => processPost(post, author, tags, options, errors));
+ const results = [];
+ for (let i = 0; i < posts.length; i++) {
+ const post = posts[i];
+ if (post) {
+ results.push(processPost(post, author, tags, options, errors));
+ }
+ }
+ return results;
};
const all = ({result, errors, options}) => { // eslint-disable-line no-shadow
diff --git a/packages/mg-libsyn/test/processor.test.js b/packages/mg-libsyn/test/processor.test.js
index eaefc8a18..b9585367e 100644
--- a/packages/mg-libsyn/test/processor.test.js
+++ b/packages/mg-libsyn/test/processor.test.js
@@ -1,10 +1,13 @@
import assert from 'node:assert/strict';
+import {readFileSync} from 'node:fs';
+import {dirname, join} from 'node:path';
import {describe, it} from 'node:test';
-import {createRequire} from 'node:module';
+import {fileURLToPath} from 'node:url';
import processor from '../lib/processor.js';
-const require = createRequire(import.meta.url);
-const fixture = require('./fixtures/feed.json');
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const fixtureData = JSON.parse(readFileSync(join(__dirname, 'fixtures/feed.json'), 'utf8'));
+const fixture = () => structuredClone(fixtureData);
describe('durationToSeconds', function () {
it('Minutes with no seconds', function () {
@@ -41,7 +44,7 @@ describe('durationToSeconds', function () {
describe('Process posts', function () {
it('Can process posts', function () {
const data = {
- posts: fixture.rss.channel.item,
+ posts: fixture().rss.channel.item,
author: {
name: 'Test Author',
slug: 'test-author',
@@ -61,7 +64,7 @@ describe('Process posts', function () {
it('Post has required fields', function () {
const data = {
- posts: fixture.rss.channel.item,
+ posts: fixture().rss.channel.item,
author: {
name: 'Test Author',
slug: 'test-author',
@@ -118,7 +121,7 @@ describe('Process posts', function () {
it('Can add a tag', function () {
const data = {
- posts: fixture.rss.channel.item,
+ posts: fixture().rss.channel.item,
author: {
name: 'Test Author',
slug: 'test-author',
@@ -152,7 +155,7 @@ describe('Process posts', function () {
it('Can use feed categories', function () {
const data = {
- posts: fixture.rss.channel.item,
+ posts: fixture().rss.channel.item,
tags: ['Lorem', 'Ipsum', 'dolor'],
author: {
name: 'Test Author',
@@ -200,7 +203,7 @@ describe('Process posts', function () {
it('Can use item categories', function () {
const data = {
- posts: fixture.rss.channel.item,
+ posts: fixture().rss.channel.item,
author: {
name: 'Test Author',
slug: 'test-author',
@@ -245,7 +248,7 @@ describe('Process posts', function () {
describe('Process content', function () {
it('Remove empty p tags', function () {
const data = {
- posts: fixture.rss.channel.item,
+ posts: fixture().rss.channel.item,
author: {
name: 'Test Author',
slug: 'test-author',
@@ -268,7 +271,7 @@ describe('Process content', function () {
it('Use Libsyn embeds', function () {
const data = {
- posts: fixture.rss.channel.item,
+ posts: fixture().rss.channel.item,
author: {
name: 'Test Author',
slug: 'test-author',
@@ -293,7 +296,7 @@ describe('Process content', function () {
it('Use Audio cards', function () {
const data = {
- posts: fixture.rss.channel.item,
+ posts: fixture().rss.channel.item,
author: {
name: 'Test Author',
slug: 'test-author',
diff --git a/packages/mg-linkfixer/lib/LinkFixer.js b/packages/mg-linkfixer/lib/LinkFixer.js
index ef4fe2a1e..9b7a4043d 100644
--- a/packages/mg-linkfixer/lib/LinkFixer.js
+++ b/packages/mg-linkfixer/lib/LinkFixer.js
@@ -2,7 +2,7 @@ import {join} from 'node:path';
import _ from 'lodash';
import {domUtils} from '@tryghost/mg-utils';
-const {parseFragment} = domUtils;
+const {processFragment} = domUtils;
// @TODO: expand this list
const htmlFields = ['html'];
@@ -130,24 +130,24 @@ export default class LinkFixer {
}
async processHTML(html) {
- const parsed = parseFragment(html);
+ return processFragment(html, (parsed) => {
+ for (const el of parsed.$('a')) {
+ let href = el.getAttribute('href');
- for (const el of parsed.$('a')) {
- let href = el.getAttribute('href');
+ if (!href) {
+ continue;
+ }
- if (!href) {
- continue;
- }
-
- // Clean the URL, matching the links stored in the linkMap
- let updatedURL = this.cleanURL(href);
+ // Clean the URL, matching the links stored in the linkMap
+ let updatedURL = this.cleanURL(href);
- if (this.linkMap[updatedURL]) {
- el.setAttribute('href', this.linkMap[updatedURL]);
+ if (this.linkMap[updatedURL]) {
+ el.setAttribute('href', this.linkMap[updatedURL]);
+ }
}
- }
- return parsed.html();
+ return parsed.html();
+ });
}
async processLexical(lexical) {
diff --git a/packages/mg-medium-export/lib/process-post.js b/packages/mg-medium-export/lib/process-post.js
index 53126482a..d861486c3 100644
--- a/packages/mg-medium-export/lib/process-post.js
+++ b/packages/mg-medium-export/lib/process-post.js
@@ -83,102 +83,102 @@ const processTags = ({tagLinks}) => {
};
const processFeatureImage = ({html, post, options}) => {
- const parsed = domUtils.parseFragment(html);
-
- // Look for data-is-featured
- let featured = parsed.$('[data-is-featured]')[0] || null;
+ return domUtils.processFragment(html, (parsed) => {
+ // Look for data-is-featured
+ let featured = parsed.$('[data-is-featured]')[0] || null;
+
+ // Look for an image that appears before content
+ let allSections = parsed.$(sectionTags.join(','));
+ let foundImg = false;
+ let preImageTags = [];
+
+ allSections.forEach((el) => {
+ if (!foundImg) {
+ preImageTags.push(el.tagName.toLowerCase());
+ }
- // Look for an image that appears before content
- let allSections = parsed.$(sectionTags.join(','));
- let foundImg = false;
- let preImageTags = [];
+ if (!foundImg && el.tagName.toLowerCase() === 'img') {
+ foundImg = el;
+ }
+ });
- allSections.forEach((el) => {
- if (!foundImg) {
- preImageTags.push(el.tagName.toLowerCase());
+ // We don't have a designated feature image, but there's an image above the content so use that image instead
+ if (!featured && !preImageTags.includes('p')) {
+ featured = foundImg;
+
+ if (options?.addPlatformTag) {
+ // tag it with #auto-feature-image so we can tell the difference
+ post.data.tags.push({
+ data: {
+ name: '#auto-feature-image'
+ }
+ });
+ }
}
- if (!foundImg && el.tagName.toLowerCase() === 'img') {
- foundImg = el;
+ if (featured) {
+ post.data.feature_image = featured.getAttribute('src');
+ post.data.feature_image_alt = featured.getAttribute('alt') || null;
+ const figure = featured.closest('figure');
+ const figcaption = figure ? figure.querySelector('figcaption') : null;
+ post.data.feature_image_caption = figcaption ? domUtils.serializeChildren(figcaption).trim() : null;
+
+ if (figure) {
+ figure.remove();
+ }
}
+
+ return parsed.html().trim();
});
+};
+
+export default ({name, html, globalUser, options}) => {
+ return domUtils.processFragment(html, (parsed) => {
+ let post = processMeta({name, parsed, options});
+
+ // Process author
+ const pAuthor = parsed.$('.p-author')[0];
+ if (pAuthor) {
+ post.data.author = processAuthor({pAuthor});
+ } else if (globalUser) {
+ post.data.author = globalUser;
+ }
- // We don't have a designated feature image, but there's an image above the content so use that image instead
- if (!featured && !preImageTags.includes('p')) {
- featured = foundImg;
+ post.data.tags = [];
- if (options?.addPlatformTag) {
- // tag it with #auto-feature-image so we can tell the difference
+ if (options?.addTag) {
post.data.tags.push({
+ url: 'migrator-added-tag',
data: {
- name: '#auto-feature-image'
+ name: options.addTag
}
});
}
- }
- if (featured) {
- post.data.feature_image = featured.getAttribute('src');
- post.data.feature_image_alt = featured.getAttribute('alt') || null;
- const figure = featured.closest('figure');
- const figcaption = figure ? figure.querySelector('figcaption') : null;
- post.data.feature_image_caption = figcaption ? domUtils.serializeChildren(figcaption).trim() : null;
-
- if (figure) {
- figure.remove();
+ // Process tags
+ const tagLinks = parsed.$('.p-tags a');
+ if (tagLinks.length) {
+ post.data.tags = [...post.data.tags, ...processTags({tagLinks})];
}
- }
- return parsed.html().trim();
-};
-
-export default ({name, html, globalUser, options}) => {
- const parsed = domUtils.parseFragment(html);
-
- let post = processMeta({name, parsed, options});
-
- // Process author
- const pAuthor = parsed.$('.p-author')[0];
- if (pAuthor) {
- post.data.author = processAuthor({pAuthor});
- } else if (globalUser) {
- post.data.author = globalUser;
- }
-
- post.data.tags = [];
-
- if (options?.addTag) {
- post.data.tags.push({
- url: 'migrator-added-tag',
- data: {
- name: options.addTag
- }
- });
- }
-
- // Process tags
- const tagLinks = parsed.$('.p-tags a');
- if (tagLinks.length) {
- post.data.tags = [...post.data.tags, ...processTags({tagLinks})];
- }
-
- if (options?.addPlatformTag) {
- post.data.tags.push({
- url: 'migrator-added-platform-tag',
- data: {
- name: '#medium'
- }
- });
- }
+ if (options?.addPlatformTag) {
+ post.data.tags.push({
+ url: 'migrator-added-platform-tag',
+ data: {
+ name: '#medium'
+ }
+ });
+ }
- // Process content
- const eContent = parsed.$('.e-content')[0];
- const contentHtml = eContent ? domUtils.serializeChildren(eContent) : '';
- post = processContent({html: contentHtml, post});
+ // Process content
+ const eContent = parsed.$('.e-content')[0];
+ const contentHtml = eContent ? domUtils.serializeChildren(eContent) : '';
+ post = processContent({html: contentHtml, post});
- // Grab the featured image
- // Do this last so that we can add tags to indicate feature image style
- post.data.html = processFeatureImage({html: post.data.html, post, options});
+ // Grab the featured image
+ // Do this last so that we can add tags to indicate feature image style
+ post.data.html = processFeatureImage({html: post.data.html, post, options});
- return post;
+ return post;
+ });
};
diff --git a/packages/mg-medium-export/lib/process-profile.js b/packages/mg-medium-export/lib/process-profile.js
index 1efb95bb9..24428f27c 100644
--- a/packages/mg-medium-export/lib/process-profile.js
+++ b/packages/mg-medium-export/lib/process-profile.js
@@ -1,4 +1,5 @@
import {domUtils} from '@tryghost/mg-utils';
+const {processFragment} = domUtils;
// Keys we've seen so far
// Profile
@@ -22,26 +23,27 @@ const mediumToGhost = {
};
export default ({html}) => {
- const parsed = domUtils.parseFragment(html);
- let profile = {
- url: parsed.$('.u-url')[0]?.getAttribute('href'),
- data: {
- name: parsed.$('.p-name')[0]?.textContent || '',
- profile_image: parsed.$('.u-photo')[0]?.getAttribute('src'),
- roles: [
- 'Contributor'
- ]
- }
- };
+ return processFragment(html, (parsed) => {
+ let profile = {
+ url: parsed.$('.u-url')[0]?.getAttribute('href'),
+ data: {
+ name: parsed.$('.p-name')[0]?.textContent || '',
+ profile_image: parsed.$('.u-photo')[0]?.getAttribute('src'),
+ roles: [
+ 'Contributor'
+ ]
+ }
+ };
- parsed.$('ul li').forEach((el) => {
- let [item, value] = el.textContent.split(': ');
- let key = mediumToGhost[item.toLowerCase()] || null;
+ parsed.$('ul li').forEach((el) => {
+ let [item, value] = el.textContent.split(': ');
+ let key = mediumToGhost[item.toLowerCase()] || null;
- if (key) {
- profile.data[key] = value;
- }
- });
+ if (key) {
+ profile.data[key] = value;
+ }
+ });
- return profile;
+ return profile;
+ });
};
diff --git a/packages/mg-medium-export/lib/process.js b/packages/mg-medium-export/lib/process.js
index 4bc16956a..902657f4e 100644
--- a/packages/mg-medium-export/lib/process.js
+++ b/packages/mg-medium-export/lib/process.js
@@ -11,7 +11,13 @@ export default (input, options) => {
let globalUser = output.users && output.users.length === 1 ? output.users[0] : null;
if (input.posts && input.posts.length > 0) {
- output.posts = input.posts.map(post => processPost({name: post.name, html: post.html, globalUser, options}));
+ output.posts = [];
+ for (let i = 0; i < input.posts.length; i++) {
+ const post = input.posts[i];
+ if (post) {
+ output.posts.push(processPost({name: post.name, html: post.html, globalUser, options}));
+ }
+ }
}
return output;
diff --git a/packages/mg-squarespace-xml/lib/process.js b/packages/mg-squarespace-xml/lib/process.js
index fcf0a18f1..73d7c7734 100644
--- a/packages/mg-squarespace-xml/lib/process.js
+++ b/packages/mg-squarespace-xml/lib/process.js
@@ -33,97 +33,91 @@ const processContent = (html, options) => {
return '';
}
- const parsed = domUtils.parseFragment(html);
-
- if (options?.removeSelectors) {
- parsed.$(options.removeSelectors).forEach((el) => {
- el.remove();
- });
- }
+ return domUtils.processFragment(html, (parsed) => {
+ if (options?.removeSelectors) {
+ parsed.$(options.removeSelectors).forEach((el) => {
+ el.remove();
+ });
+ }
- parsed.$('.sqs-audio-embed').forEach((el) => {
- let audioSrc = el.getAttribute('data-url');
- let audioTitle = el.getAttribute('data-title');
+ parsed.$('.sqs-audio-embed').forEach((el) => {
+ let audioSrc = el.getAttribute('data-url');
+ let audioTitle = el.getAttribute('data-title');
- let cardOpts = {
- env: {dom: new SimpleDom.Document()},
- payload: {
- src: audioSrc,
- title: audioTitle
- }
- };
+ let cardOpts = {
+ env: {dom: new SimpleDom.Document()},
+ payload: {
+ src: audioSrc,
+ title: audioTitle
+ }
+ };
- const buildCard = audioCard.render(cardOpts);
- const cardHTML = buildCard.nodeValue;
+ const buildCard = audioCard.render(cardOpts);
+ const cardHTML = buildCard.nodeValue;
- domUtils.replaceWith(el, cardHTML);
- });
+ domUtils.replaceWith(el, cardHTML);
+ });
- parsed.$('.newsletter-form-wrapper').forEach((el) => {
- el.remove();
- });
+ parsed.$('.newsletter-form-wrapper').forEach((el) => {
+ el.remove();
+ });
- // squarespace images without src
- parsed.$('img[data-src]').forEach((img) => {
- const src = img.getAttribute('data-src');
- if (img.classList.contains('thumb-image')) {
- // images with the `thumb-image` class might be a duplicate
- // to prevent migrating two images, we have to remove the false node
- // Walk backwards to find the noscript sibling
- let sibling = img.previousElementSibling;
- while (sibling) {
- if (sibling.tagName === 'NOSCRIPT') {
- const noscriptImg = sibling.querySelector('img');
- if (noscriptImg && noscriptImg.getAttribute('src') === src) {
- img.remove();
+ // squarespace images without src
+ parsed.$('img[data-src]').forEach((img) => {
+ const src = img.getAttribute('data-src');
+ if (img.classList.contains('thumb-image')) {
+ // images with the `thumb-image` class might be a duplicate
+ // to prevent migrating two images, we have to remove the false node
+ // Walk backwards to find the noscript sibling
+ let sibling = img.previousElementSibling;
+ while (sibling) {
+ if (sibling.tagName === 'NOSCRIPT') {
+ const noscriptImg = sibling.querySelector('img');
+ if (noscriptImg && noscriptImg.getAttribute('src') === src) {
+ img.remove();
+ }
+ break;
}
- break;
+ sibling = sibling.previousElementSibling;
}
- sibling = sibling.previousElementSibling;
+ } else {
+ img.setAttribute('src', img.getAttribute('data-src'));
}
- } else {
- img.setAttribute('src', img.getAttribute('data-src'));
- }
- });
-
- parsed.$('figure blockquote').forEach((el) => {
- const nextSibling = el.nextElementSibling;
- let captionText = '';
- if (nextSibling && nextSibling.tagName === 'FIGCAPTION') {
- captionText = `
${domUtils.serializeChildren(nextSibling)}`;
- nextSibling.remove();
- }
- el.innerHTML = `
${domUtils.serializeChildren(el)}${captionText}
`; - }); - - parsed.$('.sqs-video-wrapper').forEach((el) => { - const theHtml = decode(el.getAttribute('data-html')); - const embedWrapper = el.closest('.embed-block-wrapper'); - const parent = embedWrapper ? embedWrapper.parentElement : null; + }); - if (parent) { - domUtils.replaceWith(parent, `${domUtils.serializeChildren(el)}${captionText}
`; + }); - // TODO: this should be a parser plugin - // Wrap nested lists in HTML card - parsed.$('ul li ul, ol li ol, ol li ul, ul li ol').forEach((nestedList) => { - // Walk up to the nearest parent ul/ol (equivalent to parentsUntil('ul, ol').parent()) - let topList = nestedList.parentElement?.closest('ul, ol'); - if (topList) { - domUtils.insertBefore(topList, ''); - domUtils.insertAfter(topList, ''); - } - }); + parsed.$('.sqs-video-wrapper').forEach((el) => { + const theHtml = decode(el.getAttribute('data-html')); + const embedWrapper = el.closest('.embed-block-wrapper'); + const parent = embedWrapper ? embedWrapper.parentElement : null; - // Convert HTML back to a string - html = parsed.html(); + if (parent) { + domUtils.replaceWith(parent, `Hello World
'); - -// Query elements (returns array) -const paragraphs = parsed.$('p'); - -// Get serialized HTML -const html = parsed.html(); - -// Get text content -const text = parsed.text(); +const {processFragment, processFragmentAsync} = domUtils; + +// Parse, manipulate, and get the result in one step +const html = processFragment('Hello
World
', (parsed) => { + for (const el of parsed.$('.remove')) { + el.remove(); + } + return parsed.html(); +}); +// => 'Hello
' + +// Extract data from HTML +const title = processFragment(rawHtml, parsed => parsed.$('h1')[0]?.textContent || ''); + +// Async version for callbacks that need to await +const result = await processFragmentAsync(html, async (parsed) => { + for (const img of parsed.$('img')) { + const newSrc = await processImage(img.getAttribute('src')); + img.setAttribute('src', newSrc); + } + return parsed.html(); +}); ``` +The `parsed` fragment provides: +- **`parsed.$(selector, context?)`** — query elements (returns `Element[]`) +- **`parsed.html()`** — serialize the fragment back to an HTML string +- **`parsed.text()`** — get text content +- **`parsed.document`** — access the underlying `Document` +- **`parsed.body`** — access the `` element + +For long-lived or complex processing where a callback doesn't fit, use `parseFragment` directly: + +```js +const {parseFragment} = domUtils; + +const parsed = parseFragment(html); +// ... extensive manipulation ... +const result = parsed.html(); +``` + +### DOM Manipulation Helpers + +```js +const {replaceWith, insertBefore, insertAfter, wrap, createElement, attr} = domUtils; + +const parsed = parseFragment('Old
tags by MarkdownIt html = await MgWpAPI.process.processShortcodes({html, options}); @@ -382,7 +381,7 @@ const processPost = async (post, users, options, fileCache) => { // Check for audio enclosure in post metadata and prepend audio card const metaData = await processWPMeta(post); const audioCardHTML = processEnclosureAudio(metaData); - const hasLibsynEmbed = parseFragment(postObj.data.html).$('iframe[src*="libsyn.com"]').length > 0; + const hasLibsynEmbed = processFragment(postObj.data.html, p => p.$('iframe[src*="libsyn.com"]').length > 0); if (audioCardHTML && !hasLibsynEmbed) { postObj.data.html = `${audioCardHTML}${postObj.data.html}`; } diff --git a/yarn.lock b/yarn.lock index 69b51c7aa..8c5afe098 100644 --- a/yarn.lock +++ b/yarn.lock @@ -4631,16 +4631,6 @@ resolved "https://registry.yarnpkg.com/@types/istanbul-lib-coverage/-/istanbul-lib-coverage-2.0.6.tgz#7739c232a1fee9b4d3ce8985f314c0c6d33549d7" integrity sha512-2QF/t/auWm0lsy8XtKVPG19v3sSOQlJe/YHZgfjb/KBBHOGSV+J2q/S671rcq9uTBrLAXmZpqJiaQbMT+zNU1w== -"@types/jsdom@28.0.1": - version "28.0.1" - resolved "https://registry.yarnpkg.com/@types/jsdom/-/jsdom-28.0.1.tgz#2c014d8c0eca6135233519bff8c49f7aadfeda63" - integrity sha512-GJq2QE4TAZ5ajSoCasn5DOFm8u1mI3tIFvM5tIq3W5U/RTB6gsHwc6Yhpl91X9VSDOUVblgXmG+2+sSvFQrdlw== - dependencies: - "@types/node" "*" - "@types/tough-cookie" "*" - parse5 "^7.0.0" - undici-types "^7.21.0" - "@types/keyv@^3.1.4": version "3.1.4" resolved "https://registry.yarnpkg.com/@types/keyv/-/keyv-3.1.4.tgz#3ccdb1c6751b0c7e52300bcdacd5bcbf8faa75b6" @@ -4698,11 +4688,6 @@ dependencies: "@types/node" "*" -"@types/tough-cookie@*": - version "4.0.5" - resolved "https://registry.yarnpkg.com/@types/tough-cookie/-/tough-cookie-4.0.5.tgz#cb6e2a691b70cb177c6e3ae9c1d2e8b2ea8cd304" - integrity sha512-/Ad8+nIOV7Rl++6f1BdKxFSMgmoqEoYbHRpPcx3JEfv8VRsQe9Z4mCXeJBzxs7mbHY/XOZZuXlRNfhpVPbs6ZA== - "@types/unist@^2.0.0", "@types/unist@^2.0.2": version "2.0.11" resolved "https://registry.yarnpkg.com/@types/unist/-/unist-2.0.11.tgz#11af57b127e32487774841f7a4e54eab166d03c4" @@ -6239,6 +6224,11 @@ css-what@^6.1.0: resolved "https://registry.yarnpkg.com/css-what/-/css-what-6.2.2.tgz#cdcc8f9b6977719fdfbd1de7aec24abf756b9dea" integrity sha512-u/O3vwbptzhMs3L1fQE82ZSLHQQfto5gyZzwteVIEyeaY5Fc7R4dapF/BvRoSYFeqfBk4m0V1Vafq5Pjv25wvA== +cssom@^0.5.0: + version "0.5.0" + resolved "https://registry.yarnpkg.com/cssom/-/cssom-0.5.0.tgz#d254fa92cd8b6fbd83811b9fbaed34663cc17c36" + integrity sha512-iKuQcq+NdHqlAcwUY0o/HL69XQrUaQdMjmStJ8JFmUaiiQErlhrmuigkg/CU4E2J0IyUKUrMAgl36TvN67MqTw== + csv-parse@5.4.0: version "5.4.0" resolved "https://registry.yarnpkg.com/csv-parse/-/csv-parse-5.4.0.tgz#6793210a4a49a9a74b3fde3f9d00f3f52044fd89" @@ -7850,6 +7840,11 @@ html-escaper@^2.0.0: resolved "https://registry.yarnpkg.com/html-escaper/-/html-escaper-2.0.2.tgz#dfd60027da36a36dfcbe236262c00a5822681453" integrity sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg== +html-escaper@^3.0.3: + version "3.0.3" + resolved "https://registry.yarnpkg.com/html-escaper/-/html-escaper-3.0.3.tgz#4d336674652beb1dcbc29ef6b6ba7f6be6fdfed6" + integrity sha512-RuMffC89BOWQoY0WKGpIhn5gX3iI54O6nRA0yC124NYVtzjmFWBIiFd8M0x+ZdX0P9R4lADg1mgP8C7PxGOWuQ== + html-minifier@^4.0.0: version "4.0.0" resolved "https://registry.yarnpkg.com/html-minifier/-/html-minifier-4.0.0.tgz#cca9aad8bce1175e02e17a8c33e46d8988889f56" @@ -7879,7 +7874,7 @@ html-to-text@9.0.5: htmlparser2 "^8.0.2" selderee "^0.11.0" -htmlparser2@^10.1.0: +htmlparser2@^10.0.0, htmlparser2@^10.1.0: version "10.1.0" resolved "https://registry.yarnpkg.com/htmlparser2/-/htmlparser2-10.1.0.tgz#fe3f2e12c73b6e462d4e10395db9c1119e4d6ae4" integrity sha512-VTZkM9GWRAtEpveh7MSF6SjjrpNVNNVJfFup7xTY3UpFtm67foy9HDVXneLtFVt4pMz5kZtgNcvCniNFb1hlEQ== @@ -8453,7 +8448,7 @@ jsbn@~0.1.0: resolved "https://registry.yarnpkg.com/jsbn/-/jsbn-0.1.1.tgz#a5e654c2e5a2deb5f201d96cefbca80c0ef2f513" integrity sha512-UVU9dibq2JcFWxQPA6KCqj5O42VOmAY3zQUfEKxU0KpTGXwNoCjkX1e13eHNvw/xPynt6pU0rZ1htjWTNTSXsg== -jsdom@29.0.1, jsdom@^29.0.0: +jsdom@^29.0.0: version "29.0.1" resolved "https://registry.yarnpkg.com/jsdom/-/jsdom-29.0.1.tgz#b2db17191533dd5ba1e0d4c61fe9fa2289e87be9" integrity sha512-z6JOK5gRO7aMybVq/y/MlIpKh8JIi68FBKMUtKkK2KH/wMSRlCxQ682d08LB9fYXplyY/UXG8P4XXTScmdjApg== @@ -8669,6 +8664,17 @@ lines-and-columns@^1.1.6: resolved "https://registry.yarnpkg.com/lines-and-columns/-/lines-and-columns-1.2.4.tgz#eca284f75d2965079309dc0ad9255abb2ebc1632" integrity sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg== +linkedom@0.18.12: + version "0.18.12" + resolved "https://registry.yarnpkg.com/linkedom/-/linkedom-0.18.12.tgz#a8b1a1942b567dcb1888093df311055da1349a14" + integrity sha512-jalJsOwIKuQJSeTvsgzPe9iJzyfVaEJiEXl+25EkKevsULHvMJzpNqwvj1jOESWdmgKDiXObyjOYwlUqG7wo1Q== + dependencies: + css-select "^5.1.0" + cssom "^0.5.0" + html-escaper "^3.0.3" + htmlparser2 "^10.0.0" + uhyphen "^0.2.0" + linkify-it@^5.0.0: version "5.0.0" resolved "https://registry.yarnpkg.com/linkify-it/-/linkify-it-5.0.0.tgz#9ef238bfa6dc70bd8e7f9572b52d369af569b421" @@ -11472,16 +11478,16 @@ uglify-js@^3.1.4, uglify-js@^3.5.1: resolved "https://registry.yarnpkg.com/uglify-js/-/uglify-js-3.19.3.tgz#82315e9bbc6f2b25888858acd1fff8441035b77f" integrity sha512-v3Xu+yuwBXisp6QYTcH4UbH+xYJXqnq2m/LtQVWKWzYc1iehYnLixoQDN9FH6/j9/oybfd6W9Ghwkl8+UMKTKQ== +uhyphen@^0.2.0: + version "0.2.0" + resolved "https://registry.yarnpkg.com/uhyphen/-/uhyphen-0.2.0.tgz#8fdf0623314486e020a3c00ee5cc7a12fe722b81" + integrity sha512-qz3o9CHXmJJPGBdqzab7qAYuW8kQGKNEuoHFYrBwV6hWIMcpAmxDLXojcHfFr9US1Pe6zUswEIJIbLI610fuqA== + uint8array-extras@^1.5.0: version "1.5.0" resolved "https://registry.yarnpkg.com/uint8array-extras/-/uint8array-extras-1.5.0.tgz#10d2a85213de3ada304fea1c454f635c73839e86" integrity sha512-rvKSBiC5zqCCiDZ9kAOszZcDvdAHwwIKJG33Ykj43OKcWsnmcBRL09YTU4nOeHZ8Y2a7l1MgTd08SBe9A8Qj6A== -undici-types@^7.21.0: - version "7.24.6" - resolved "https://registry.yarnpkg.com/undici-types/-/undici-types-7.24.6.tgz#61275b485d7fd4e9d269c7cf04ec2873c9cc0f91" - integrity sha512-WRNW+sJgj5OBN4/0JpHFqtqzhpbnV0GuB+OozA9gCL7a993SmU+1JBZCzLNxYsbMfIeDL+lTsphD5jN5N+n0zg== - undici-types@~7.16.0: version "7.16.0" resolved "https://registry.yarnpkg.com/undici-types/-/undici-types-7.16.0.tgz#ffccdff36aea4884cbfce9a750a0580224f58a46"