Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,23 @@ Words should be separated by a comma.
def wrod(wrods):
pass

Ignoring misspellings marked with "[sic]"
-----------------------------------------

The ``--ignore-sic`` option tells codespell to skip a misspelling that is
followed by the editorial ``[sic]`` marker (case-insensitive). Only the single
occurrence preceding the marker is ignored, so other misspellings on the same
line are still reported. A closing quote may sit between the word and the
marker, which is the common case when documenting a corrected typo (for example
in a changelog):

.. code-block:: text

correct the "wrod" [sic] typo in a changelog entry

Unlike ``codespell:ignore``, the marker is part of the prose itself and does not
require naming the word in a tooling comment.

Using a config file
-------------------

Expand Down
15 changes: 15 additions & 0 deletions codespell_lib/_codespell.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@
ignore_next_line_regex = re.compile(
rf"[^\w\s]\s*{codespell_ignore_next_line_tag}\b(\s+(?P<words>[\w,]*))?"
)
# Editorial "[sic]" marker following a word, allowing an intervening closing
# quote so a quoted typo like `"wrod" [sic]` is matched.
sic_regex = re.compile(r"[\"'’”)\s]*\[sic\]", re.IGNORECASE) # noqa: RUF001
USAGE = """
\t%prog [OPTIONS] [file1 file2 ... fileN]
"""
Expand Down Expand Up @@ -658,6 +661,13 @@ def convert_arg_line_to_args(self, arg_line: str) -> list[str]:
metavar="LINES",
help="print LINES of surrounding context",
)
parser.add_argument(
"--ignore-sic",
action="store_true",
default=False,
help='ignore a misspelling immediately followed by the editorial "[sic]" '
"marker (optionally preceded by a closing quote).",
)
parser.add_argument(
"--stdin-single-line",
action="store_true",
Expand Down Expand Up @@ -1049,6 +1059,11 @@ def parse_lines(
):
continue

# An "[sic]" marker right after the word flags it as an
# intentional/quoted spelling, so leave it alone.
if options.ignore_sic and sic_regex.match(line, match.end()):
continue

context_shown = False
fix = misspellings[lword].fix
fixword = fix_case(word, misspellings[lword].data)
Expand Down
40 changes: 40 additions & 0 deletions codespell_lib/tests/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,46 @@ def test_inline_ignores(
assert cs.main(d) == expected_error_count


@pytest.mark.parametrize(
("content", "expected_error_count"),
[
# marker right after the word (optional whitespace) excuses it
("they wrod [sic] it\n", 0),
("they wrod[sic] it\n", 0),
("they wrod [sic] it\n", 0),
# case-insensitive marker
("they wrod [SIC] it\n", 0),
# quoted typo followed by the marker (changelog use case)
('correct "wrod" [sic] typo\n', 0),
('correct "wrod"[sic] typo\n', 0),
# only the immediately preceding occurrence is excused
("wrod wrod [sic]\n", 1),
# a marker elsewhere on the line does not excuse the word
("wrod it [sic] anyway abilty\n", 2),
# an intervening word breaks the association
('wrod" abilty [sic]\n', 1),
# without a marker the misspelling is still reported
("they wrod it\n", 1),
# not a real marker
("they wrod (sic) it\n", 1),
("they wrod [sick] it\n", 1),
],
)
def test_ignore_sic(
tmpdir: pytest.TempPathFactory,
capsys: pytest.CaptureFixture[str],
content: str,
expected_error_count: int,
) -> None:
d = str(tmpdir)
with open(op.join(d, "bad.txt"), "w", encoding="utf-8") as f:
f.write(content)
# off by default
assert cs.main(d) == content.count("wrod") + content.count("abilty")
# opt-in
assert cs.main("--ignore-sic", d) == expected_error_count


@pytest.mark.parametrize(
("content", "expected_error_count"),
[
Expand Down