From 01d9236c8b98b9901cba1250a93c1be1288b26e4 Mon Sep 17 00:00:00 2001
From: rabuma abraham
Date: Sun, 5 Oct 2025 16:14:34 +0200
Subject: [PATCH 1/2] Fix #986: Preserve Wikipedia
headings
- Ensure
headings with meaningful content are not removed even if parent div contains edit/talk links
- Updated _cleanHeaders, _cleanConditionally, and _prepArticle to handle this scenario
- Added test case in test/wikipedia-h2.js to verify fix
- Existing functionality and other tests remain unaffected
Fixes #986
---
Readability.js | 88 +++++++++++++++++++++++++++++---
test/wikipedia-h2.js | 116 +++++++++++++++++++++++++++++++++++++++++++
2 files changed, 196 insertions(+), 8 deletions(-)
create mode 100644 test/wikipedia-h2.js
diff --git a/Readability.js b/Readability.js
index 5cff4540..642a3c76 100644
--- a/Readability.js
+++ b/Readability.js
@@ -815,10 +815,29 @@ Readability.prototype = {
this._forEachNode(articleContent.children, function (topCandidate) {
this._cleanMatchedNodes(topCandidate, function (node, matchString) {
- return (
+ var shouldRemove =
this.REGEXPS.shareElements.test(matchString) &&
- node.textContent.length < shareElementThreshold
- );
+ node.textContent.length < shareElementThreshold;
+
+ // Fix for issue #986: Don't remove divs that contain h2 headings with meaningful content,
+ // even if they match share elements criteria. This prevents Wikipedia h2 headings from
+ // being removed along with their parent divs.
+ if (shouldRemove && node.tagName === "DIV") {
+ var h2Elements = this._getAllNodesWithTag(node, ["h2"]);
+ for (var i = 0; i < h2Elements.length; i++) {
+ var h2Text = this._getInnerText(h2Elements[i], false).trim();
+ if (h2Text.length) {
+ this.log(
+ "Preserving div with h2 heading despite share elements criteria:",
+ node,
+ h2Text
+ );
+ return false;
+ }
+ }
+ }
+
+ return shouldRemove;
});
});
@@ -2513,6 +2532,23 @@ Readability.prototype = {
var contentScore = 0;
if (weight + contentScore < 0) {
+ // Fix for issue #986: Don't remove divs that contain h2 headings with meaningful content,
+ // even if the div has negative class weight (e.g., contains "share" in class name).
+ // This prevents Wikipedia h2 headings from being removed along with their parent divs.
+ if (tag === "DIV") {
+ var h2Elements = this._getAllNodesWithTag(node, ["h2"]);
+ for (var i = 0; i < h2Elements.length; i++) {
+ var h2Text = this._getInnerText(h2Elements[i], false).trim();
+ if (h2Text.length) {
+ this.log(
+ "Preserving div with h2 heading despite negative class weight:",
+ node,
+ h2Text
+ );
+ return false;
+ }
+ }
+ }
return true;
}
@@ -2540,18 +2576,18 @@ Readability.prototype = {
"iframe",
]);
- for (var i = 0; i < embeds.length; i++) {
+ for (var k = 0; k < embeds.length; k++) {
// If this embed has attribute that matches video regex, don't delete it.
- for (var j = 0; j < embeds[i].attributes.length; j++) {
- if (this._allowedVideoRegex.test(embeds[i].attributes[j].value)) {
+ for (var j = 0; j < embeds[k].attributes.length; j++) {
+ if (this._allowedVideoRegex.test(embeds[k].attributes[j].value)) {
return false;
}
}
// For embed with
Hermitian matrices are named after Charles Hermite, who demonstrated in 1855 that matrices of this form share a property with real symmetric matrices of always having real eigenvalues. Other, equivalent notations in common use are , although note that in quantum mechanics, typically means the complex conjugate only, and not the conjugate transpose.
- Alternative characterizations[edit]
+ Alternative characterizations
Hermitian matrices can be characterized in a number of equivalent ways, some of which are listed below:
- Equality with the adjoint[edit]
+ Equality with the adjoint
A square matrix is Hermitian if and only if it is equal to its adjoint, that is, it satisfies
-
-
+
for any pair of vectors , where denotes the inner product operation.
This is also the way that the more general concept of self-adjoint operator is defined.
- Reality of quadratic forms[edit]
+ Reality of quadratic forms
A square matrix is Hermitian if and only if it is such that
Here, we offer another useful Hermitian matrix using an abstract example. If a square matrix equals the multiplication of a matrix and its conjugate transpose, that is, , then is a Hermitian positive semi-definite matrix. Furthermore, if is row full-rank, then is positive definite.
In mathematics, for a given complex Hermitian matrix M and nonzero vector x, the Rayleigh quotient[4], is defined as:[3]:p. 234[5]
@@ -233,7 +229,7 @@
The Rayleigh quotient is used in the min-max theorem to get exact values of all eigenvalues. It is also used in eigenvalue algorithms to obtain an eigenvalue approximation from an eigenvector approximation. Specifically, this is the basis for Rayleigh quotient iteration.
The range of the Rayleigh quotient (for matrix that is not necessarily Hermitian) is called a numerical range (or spectrum in functional analysis). When the matrix is Hermitian, the numerical range is equal to the spectral norm. Still in functional analysis, is known as the spectral radius. In the context of C*-algebras or algebraic quantum mechanics, the function that to M associates the Rayleigh quotient R(M, x) for a fixed x and M varying through the algebra would be referred to as "vector state" of the algebra.
On January 23, 1998, Netscape made two announcements: first, that Netscape Communicator will be free; second, that the source code will also be free.[3] One day later, Jamie Zawinski from Netscape registered mozilla.org.[4] The project was named Mozilla after the original code name of the Netscape Navigator browser which is a blending of "Mosaic and Godzilla"[5] and used to co-ordinate the development of the Mozilla Application Suite, the open source version of Netscape's internet software, Netscape Communicator.[6][7] Jamie Zawinski says he came up with the name "Mozilla" at a Netscape staff meeting.[8][9] A small group of Netscape employees were tasked with coordination of the new community.
Originally, Mozilla aimed to be a technology provider for companies, such as Netscape, who would commercialize their open source code.[10] When AOL (Netscape's parent company) greatly reduced its involvement with Mozilla in July 2003, the Mozilla Foundation was designated the legal steward of the project.[11] Soon after, Mozilla deprecated the Mozilla Suite in favor of creating independent applications for each function, primarily the Firefox web browser and the Thunderbird email client, and moved to supply them directly to the public.[12]
At the end of 2013, Mozilla announced a deal with Cisco Systems whereby Firefox would download and use a Cisco-provided binary build of an open source[17]codec to play the proprietaryH.264 video format.[18][19] As part of the deal, Cisco would pay any patent licensing fees associated with the binaries that it distributes. Mozilla's CTO, Brendan Eich, acknowledged that this is "not a complete solution" and isn't "perfect".[20] An employee in Mozilla's video formats team, writing in an unofficial capacity, justified[21] it by the need to maintain their large user base, which would be necessary in future battles for truly free video formats.
In December 2013, Mozilla announced funding for the development of non-free games[22] through its Game Creator Challenge. However, even those games that may be released under a non-free software or open source license must be made with open web technologies and Javascript as per the work criteria outlined in the announcement.
On March 24, 2014, Mozilla promoted Brendan Eich to the role of CEO. This led to boycotts and protests from the LGBT community and its supporters, as Eich previously donated US$1,000[23] in 2008 in support of California's Proposition 8, a California ballot proposition and state constitutional amendment in opposition to same-sex marriage.[24] Eich's donation first became public knowledge in 2012, while he was Mozilla’s chief technical officer, leading to angry responses on Twitter—including the use of the hashtag "#wontworkwithbigots".[25]
Protests also emerged in 2014 following the announcement of Eich's appointment as CEO of Mozilla. U.S. companies OkCupid and CREDO Mobile received media coverage for their objections, with the former asking its users to boycott the browser,[26] while Credo amassed 50,000 signatures for a petition that called for Eich's resignation
Due to the controversy, Eich voluntarily stepped down on April 3, 2014[27] and Mitchell Baker, executive chairwoman of Mozilla Corporation, posted a statement on the Mozilla blog: "We didn’t move fast enough to engage with people once the controversy started. Mozilla believes both in equality and freedom of speech. Equality is necessary for meaningful speech. And you need free speech to fight for equality."[28] Eich's resignation promoted a larger backlash from conservatives who felt he had been forced out of the company internally.[citation needed]
OkCupid co-founder and CEO Sam Yagan had also donated $500[29] to Republican candidate Chris Cannon who proceeded to vote for multiple measures viewed as "anti-gay", including the banning of same-sex marriage.[30][31][32][33] Yagan claims he did not know about Cannon's stance on gay rights and that his contribution was due to the candidate being the ranking Republican participating in the House subcommittee that oversaw Internet and Intellectual Property matters.[34][35][36][37][38]
Reader comments on articles that were published close to the events were divided between support for OkCupid's actions and opposition to them. Supporters claimed the boycott was justified and saw OkCupid's actions as a firm statement of opposition to intolerance towards the gay community. Opponents saw OkCupid's actions as hypocritical, since Eich is also the inventor of JavaScript, which is still required to browse OkCupid's website, and felt that users should not be punished for the actions of Mozilla and suspected that OkCupid's actions were a publicity stunt.[36][39]
According to Mozilla's manifesto,[40] which outlines goals, principles, and a pledge, "The Mozilla project uses a community-based approach to create world-class open source software and to develop new types of collaborative activities". Mozilla's manifesto mentions only its beliefs in regards to the Internet and Internet privacy, and has no mention of any political or social viewpoints.
Firefox began as an experimental branch of the Mozilla codebase by Dave Hyatt, Joe Hewitt and Blake Ross. They believed the commercial requirements of Netscape's sponsorship and developer-driven feature creep compromised the utility of the Mozilla browser.[46] To combat what they saw as the Mozilla Suite'ssoftware bloat, they created a stand-alone browser, with which they intended to replace the Mozilla Suite.
Firefox was originally named Phoenix but the name was changed so as to avoid trademark conflicts with Phoenix Technologies. The initially-announced replacement, Firebird, provoked objections from the Firebird project community.[47][48] The current name, Firefox, was chosen on February 9, 2004.[49]
Firefox Mobile is currently available for Android 2.2 and above devices with an ARMv7 or ARMv6 CPU.[51] The x86 architecture is not officially supported.[52]Tristan Nitot, president of Mozilla Europe, has said that it's unlikely that an iPhone or a BlackBerry version will be released, citing Apple's iTunes Store application approval policies (which forbid applications competing with Apple's own, and forbid engines which run downloaded code) and BlackBerry's limited operating system as the reasons.[53]
Firefox OS (project name: Boot to Gecko also known as B2G) is an open sourceoperating system in development by Mozilla that aims to support HTML5 apps written using "open Web" technologies rather than platform-specific native APIs. The concept behind Firefox OS is that all user-accessible software will be HTML5 applications, that use Open Web APIs to access the phone's hardware directly via JavaScript.[54]
Some devices using this OS include[55] Alcatel One Touch Fire, ZTE Open, LG Fireweb.
Thunderbird is a free, open source, cross-platform email and news client developed by the volunteers of the Mozilla Community.
On July 16, 2012, Mitchell Baker announced that Mozilla's leadership had come to the conclusion that on-going stability was the most important thing for Thunderbird and that innovation in Thunderbird was no longer a priority for Mozilla. In that update Baker also suggested that Mozilla had provided a pathway for community to innovate around Thunderbird if the community chooses.[56]
SeaMonkey[SeaMonkey (formerly the Mozilla Application Suite) is a free and open source cross platform suite of Internet software components including a web browser component, a client for sending and receiving email and USENET newsgroup messages, an HTML editor (Mozilla Composer) and the ChatZilla IRC client.
On March 10, 2005, the Mozilla Foundation announced that it would not release any official versions of Mozilla Application Suite beyond 1.7.x, since it had now focused on the standalone applications Firefox and Thunderbird.[57] SeaMonkey is now maintained by the SeaMonkey Council, which has trademarked the SeaMonkey name with help from the Mozilla Foundation.[58] The Mozilla Foundation provides project hosting for the SeaMonkey developers.
SpiderMonkey is the original JavaScript engine developed by Brendan Eich when he invented JavaScript in 1995 as a developer at Netscape. It became part of the Mozilla product family when Mozilla inherited Netscape's code-base in 1998. In 2011, Eich transferred the nominal ownership of the SpiderMonkey code and project to Dave Mandelin.[60]
Firefox uses Gecko both for rendering web pages and for rendering its user interface. Gecko is also used by Thunderbird, SeaMonkey, and many non-Mozilla applications.
Rust is a compiled programming language being developed by Mozilla Research. It is designed for safety, concurrency, and performance. Rust is intended for creating large and complex software which needs to be both safe against exploits and fast.
Rust is being used in an experimental layout engine, Servo, which is developed by Mozilla and Samsung. Servo is not used in any consumer-oriented browsers yet. However, the Servo project developers plan for parts of the Servo source code to be merged into Gecko, and Firefox, incrementally.[64][65]
XULRunner is a software platform and technology experiment by Mozilla, that allows applications built with the same technologies used by Firefox extensions (XPCOM, Javascript, HTML, CSS, XUL) to be run natively as desktop applications, without requiring Firefox to be installed on the user's machine. XULRunner binaries are available for the Windows, GNU/Linux and OS X operating systems, allowing such applications to be effectively cross platform.
Pdf.js is a library developed by Mozilla that allows in-browser rendering of pdf documents using the HTML5 Canvas and Javascript. It is included by default in recent versions of Firefox, allowing the browser to render pdf documents without requiring an external plugin; and it is available separately as an extension named "PDF Viewer" for Firefox for Android, SeaMonkey, and the Firefox versions which don't include it built-in. It can also be included as part of a website's scripts, to allow pdf rendering for any browser that implements the required HTML5 features and can run Javascript.
Shumway is an open source replacement for the Adobe Flash Player, developed by Mozilla since 2012, using open web technologies as a replacement for Flash technologies. It uses Javascript and HTML5 Canvas elements to render Flash and execute Actionscript. It is included by default in Firefox Nightly and can be installed as an extension for any recent version of Firefox. The current implementation is limited in its capabilities to render Flash content outside simple projects.
Mozilla VR is a team focused on bringing Virtual reality tools, specifications, and standards to the open Web.[66] Mozilla VR maintains A-Frame (VR), a web framework for building VR experiences, and works on advancing WebVR support within web browsers.
Mozilla Persona is a secure, cross-browser website authentication mechanism which allows a user to use a single username and password (or other authentication method) to log in to multiple sites.[67] Mozilla Persona will be shutting down on November 30, 2016.[68]
Mozilla Webmaker is Mozilla's educational initiative, Webmaker's goal is to "help millions of people move from using the web to making the web." As part of Mozilla’s non-profit mission, Webmaker aims "to help the world increase their understanding of the web, take greater control of their online lives, and create a more web literate planet."[69][70][70]
Mozilla maintains a comprehensive developer documentation website called the Mozilla Developer Network which contains information about web technologies including HTML, CSS, SVG, JavaScript, as well Mozilla-specific information. In addition, Mozilla publishes a large number of videos about web technologies and the development of Mozilla projects on the Air Mozilla website.[71][72]
The Mozilla Community consists of over 40,000 active contributors from across the globe[citation needed]. It includes both paid employees and volunteers who work towards the goals set forth[40] in the Mozilla Manifesto. Many of the sub-communities in Mozilla have formed around localization efforts for Mozilla Firefox, and the Mozilla web properties.
There are a number of sub-communities that exist based on their geographical locations, where contributors near each other work together on particular activities, such as localization, marketing, PR and user support.
MozCamps are the critical part of the Grow Mozilla initiative which aims to grow the Mozilla Community. These camps aim to bring core contributors from around the world together. They are intensive multi-day summits that include keynote speeches by Mozilla leadership, workshops and breakout sessions (led by paid and unpaid staff), and fun social outings. All of these activities combine to reward contributors for their hard work, engage them with new products and initiatives, and align all attendees on Mozilla's mission.
Mozilla Summit are the global event with active contributors and Mozilla employees to develop a shared understanding of Mozilla's mission together. Over 2,000 people representing 90 countries and 114 languages gathered in Santa Clara, Toronto and Brussels in 2013. Mozilla has since its last summit in 2013 replaced summits with all-hands where both employees and volunteers come together to collaborate the event is a scaled down version of Mozilla Summit.
diff --git a/test/wikipedia-h2.js b/test/wikipedia-h2.js
deleted file mode 100644
index 37b5bc43..00000000
--- a/test/wikipedia-h2.js
+++ /dev/null
@@ -1,116 +0,0 @@
-/* eslint-env node, mocha */
-
-/**
- * Test case for issue #986: Wikipedia h2 headings being skipped when parent div contains extra links
- *
- * This test verifies that h2 headings are preserved even when their parent div contains
- * links like "edit" or "talk" that might trigger negative class weight filtering.
- */
-
-const { JSDOM } = require("jsdom");
-const chai = require("chai");
-const expect = chai.expect;
-const Readability = require("../index").Readability;
-
-// Sample HTML that mimics Wikipedia structure with h2 headings in divs containing edit/talk links
-const testHTML = `
-
-
-
- Test Wikipedia Article
-
-
-