diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 0093303c..be87d66e 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -9,12 +9,12 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.12'] + python-version: ['3.13'] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} - name: Install dependencies diff --git a/.github/workflows/tabulate.yml b/.github/workflows/tabulate.yml index c4594846..81069dc2 100644 --- a/.github/workflows/tabulate.yml +++ b/.github/workflows/tabulate.yml @@ -3,26 +3,35 @@ name: pytest on: - push - pull_request + - workflow_dispatch jobs: build: strategy: matrix: - python-version: ['3.9', '3.10', '3.11', '3.12', '3.13'] + python-version: ['3.10', '3.11', '3.12', '3.13', '3.14'] os: ["ubuntu-latest", "windows-latest", "macos-latest"] runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} allow-prereleases: true - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install pytest numpy pandas + python -m pip install pytest pytest-cov numpy pandas "wcwidth>=0.6.0" - name: Run tests run: | - pytest -v --doctest-modules --ignore benchmark/benchmark.py + pytest -v --doctest-modules --ignore benchmark --doctest-glob="README.md" --cov=tabulate --cov-branch --cov-report=xml + - name: Upload coverage reports to Codecov + uses: codecov/codecov-action@v5 + with: + token: ${{ secrets.CODECOV_TOKEN }} + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true diff --git a/.gitignore b/.gitignore index 0495ac79..61d0c83e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ -/tabulate/version.py +/tabulate/_version.py build dist @@ -19,3 +19,4 @@ website-build/ ## Unit test / coverage reports .coverage .tox +/tabulate/_version.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7349858e..9d940f05 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,15 +1,13 @@ repos: -- repo: https://github.com/python/black - rev: 22.3.0 +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.15.4 hooks: - - id: black - args: [--safe] - language_version: python3 + - id: ruff-check + args: ["--fix", "--show-fixes"] + - id: ruff-format - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v2.2.3 + rev: v6.0.0 hooks: + - id: end-of-file-fixer - id: trailing-whitespace - id: check-yaml - - id: debug-statements - - id: flake8 - language_version: python3 diff --git a/CHANGELOG b/CHANGELOG index 27374413..ce390bb2 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,12 @@ -- 0.10.0: Add support for Python 3.11, 3.12, 3.13. - Drop support for Python 3.7, 3.8. +- 0.11.0: + Drop support of the legacy `youtrack` format. + Add support of the `Decimal` data type. + Improve output of `github` and `asciidoc` formats. + Add support of CSV and JSONL input data in CLI utility. + Always add `text-align: left` in HTML output. + Various bug fixes. +- 0.10.0: Add support for Python 3.11, 3.12, 3.13, 3.14. + Drop support for Python 3.7, 3.8, 3.9. PRESERVE_STERILITY global is replaced with preserve_sterility function argument. New formatting options: headersglobalalign, headersalign, colglobalalign. New output format: ``colon_grid`` (Pandoc grid_tables with alignment) diff --git a/HOWTOPUBLISH b/HOWTOPUBLISH index 29c4545c..24203ded 100644 --- a/HOWTOPUBLISH +++ b/HOWTOPUBLISH @@ -1,15 +1,17 @@ # update contributors and CHANGELOG in README python -m pre_commit run -a # and then commit changes -tox -e py39-extra,py310-extra,py311-extra,py312-extra,py313-extra -# tag version release -python -m build -s # this will update tabulate/version.py +tox -e py310-extra,py311-extra,py312-extra,py313-extra,py314-extra +# tag version release (vX.Y.Z) +python -m pip install build twine +python -m build -s # this will update tabulate/_version.py python -m pip install . # install tabulate in the current venv python -m pip install -r benchmark/requirements.txt python benchmark/benchmark.py # then update README # move tag to the last commit -python -m build -s # update tabulate/version.py +python -m build -s # update tabulate/_version.py python -m build -nswx . git push # wait for all CI builds to succeed git push --tags # if CI builds succeed twine upload --repository-url https://test.pypi.org/legacy/ dist/* twine upload dist/* +# use __token__ as username andPyPI API token as password (generate at pypi.org → Account settings → API tokens) diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 90c057b7..00000000 --- a/MANIFEST.in +++ /dev/null @@ -1,6 +0,0 @@ -include LICENSE -include README -include README.md -include CHANGELOG -include test/common.py -include benchmark.py diff --git a/README b/README deleted file mode 120000 index 42061c01..00000000 --- a/README +++ /dev/null @@ -1 +0,0 @@ -README.md \ No newline at end of file diff --git a/README.md b/README.md index f3d0fa92..0283a0c3 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,7 @@ pip install tabulate Build status ------------ -[![python-tabulate](https://github.com/astanin/python-tabulate/actions/workflows/tabulate.yml/badge.svg)](https://github.com/astanin/python-tabulate/actions/workflows/tabulate.yml) +[![python-tabulate](https://github.com/astanin/python-tabulate/actions/workflows/tabulate.yml/badge.svg)](https://github.com/astanin/python-tabulate/actions/workflows/tabulate.yml) [![codecov](https://codecov.io/github/astanin/python-tabulate/graph/badge.svg?token=Aa6wexP5wq)](https://codecov.io/github/astanin/python-tabulate) Library usage ------------- @@ -196,7 +196,6 @@ Supported table formats are: - "rst" - "mediawiki" - "moinmoin" -- "youtrack" - "html" - "unsafehtml" - "latex" @@ -234,12 +233,12 @@ bacon 0 ``` `github` follows the conventions of GitHub flavored Markdown. It -corresponds to the `pipe` format without alignment colons: +corresponds to the `pipe` format with the same alignment colons: ```pycon >>> print(tabulate(table, headers, tablefmt="github")) | item | qty | -|--------|-------| +|:-------|------:| | spam | 42 | | eggs | 451 | | bacon | 0 | @@ -501,12 +500,12 @@ format: ```pycon >>> print(tabulate(table, headers, tablefmt="asciidoc")) -[cols="8<,7>",options="header"] +[cols="<8,>7",options="header"] |==== -| item | qty -| spam | 42 -| eggs | 451 -| bacon | 0 +| item | qty +| spam | 42 +| eggs | 451 +| bacon | 0 |==== ``` @@ -577,21 +576,10 @@ MediaWiki-based sites: ```pycon >>> print(tabulate(table, headers, tablefmt="moinmoin")) -|| ''' item ''' || ''' qty ''' || -|| spam || 42 || -|| eggs || 451 || -|| bacon || 0 || - -``` - -`youtrack` format produces a table markup used in Youtrack tickets: - -```pycon ->>> print(tabulate(table, headers, tablefmt="youtrack")) -|| item || qty || -| spam | 42 | -| eggs | 451 | -| bacon | 0 | +|| ''' item ''' || ''' qty ''' || +|| spam || 42 || +|| eggs || 451 || +|| bacon || 0 || ``` @@ -616,12 +604,12 @@ and a .str property so that the raw HTML remains accessible. >>> print(tabulate(table, headers, tablefmt="html")) - + - - - + + +
item qty
item qty
spam 42
eggs 451
bacon 0
spam 42
eggs 451
bacon 0
@@ -819,7 +807,7 @@ methods `__str__` and `__float__` defined (and hence is convertible to a `float` and also has a `str` representation), the appropriate representation is selected for the column's deduced type. In order to not lose precision accidentally, types having both an `__int__` and -`__float__` represention will be considered a `float`. +`__float__` representation will be considered a `float`. Therefore, if your table contains types convertible to int/float but you'd *prefer* they be represented as strings, or your strings *might* all look @@ -1063,6 +1051,36 @@ the lines being wrapped would probably be significantly longer than this. ``` +Text is preferably wrapped on whitespaces and right after the hyphens in hyphenated words. + +break_long_words (default: True) If true, then words longer than width will be broken in order to ensure that no lines are longer than width. +If it is false, long words will not be broken, and some lines may be longer than width. +(Long words will be put on a line by themselves, in order to minimize the amount by which width is exceeded.) + +break_on_hyphens (default: True) If true, wrapping will occur preferably on whitespaces and right after hyphens in compound words, as it is customary in English. +If false, only whitespaces will be considered as potentially good places for line breaks. + +```pycon +>>> print(tabulate([["John Smith", "Middle-Manager"]], headers=["Name", "Title"], tablefmt="grid", maxcolwidths=[None, 5], break_long_words=False)) ++------------+---------+ +| Name | Title | ++============+=========+ +| John Smith | Middle- | +| | Manager | ++------------+---------+ + +``` + +```pycon +>>> print(tabulate([["John Smith", "Middle-Manager"]], headers=["Name", "Title"], tablefmt="grid", maxcolwidths=[None, 5], break_long_words=False, break_on_hyphens=False)) ++------------+----------------+ +| Name | Title | ++============+================+ +| John Smith | Middle-Manager | ++------------+----------------+ + +``` + ### Adding Separating lines One might want to add one or more separating lines to highlight different sections in a table. @@ -1098,7 +1116,7 @@ table, however, ANSI escape sequences are not removed so the original styling is Some terminals support a special grouping of ANSI escape sequences that are intended to display hyperlinks much in the same way they are shown in browsers. These are handled just as mentioned before: non-printable -ANSI escape sequences are removed prior to string length calculation. The only diifference with escaped +ANSI escape sequences are removed prior to string length calculation. The only difference with escaped hyperlinks is that column width will be based on the length of the URL _text_ rather than the URL itself (terminals would show this text). For example: @@ -1109,24 +1127,46 @@ itself (terminals would show this text). For example: Usage of the command line utility --------------------------------- - Usage: tabulate [options] [FILE ...] - - FILE a filename of the file with tabular data; - if "-" or missing, read data from stdin. - - Options: - - -h, --help show this message - -1, --header use the first row of data as a table header - -o FILE, --output FILE print table to FILE (default: stdout) - -s REGEXP, --sep REGEXP use a custom column separator (default: whitespace) - -F FPFMT, --float FPFMT floating point number format (default: g) - -I INTFMT, --int INTFMT integer point number format (default: "") - -f FMT, --format FMT set output table format; supported formats: - plain, simple, github, grid, fancy_grid, pipe, - orgtbl, rst, mediawiki, html, latex, latex_raw, - latex_booktabs, latex_longtable, tsv - (default: simple) +``` +Usage: tabulate [options] [FILE ...] + +Pretty-print tabular data. Use Python module for more features. + +FILE a filename of the file with tabular data; + if "-" or missing, read data from stdin. + +Options: + +-h, --help show this message + +INPUT: +-r, --read FILEFORMAT parse input FILEs as: + rsv (REGEXP-separated values, default), + csv (comma-separated valued, Excel dialect), + jsonl (one JSON object per line) +-s REGEXP, --sep REGEXP column separator for rsv data (default: whitespace) + +FORMAT: +--headers HEADERS HEADERS can be one of: + "firstrow" (for csv and rsv data), + "keys" (for jsonl data), + "HEADER1,HEADER2,..." (for csv and rsv data), + "KEY1:HEADER1,KEY2:HEADER2,..." (for jsonl data) +-1 use the first row of input data as a table header + (the same as --headers firstrow) +-F FPFMT, --float FPFMT floating point number format (default: g) +-I INTFMT, --int INTFMT integer point number format (default: "") +-f FMT, --format FMT set output table format (default: simple) + +Supported output formats: asciidoc, colon_grid, double_grid, double_outline, +fancy_grid, fancy_outline, github, grid, heavy_grid, heavy_outline, html, jira, +latex, latex_booktabs, latex_longtable, latex_raw, mediawiki, mixed_grid, mixed_outline, +moinmoin, orgtbl, outline, pipe, plain, presto, pretty, psql, rounded_grid, +rounded_outline, rst, simple, simple_grid, simple_outline, textile, tsv, unsafehtml. + +OUTPUT: +-o FILE, --output FILE print table to FILE (default: stdout) +``` Performance considerations -------------------------- @@ -1147,17 +1187,17 @@ simply joining lists of values with a tab, comma, or other separator. At the same time, `tabulate` is comparable to other table pretty-printers. Given a 10x10 table (a list of lists) of mixed text and numeric data, `tabulate` appears to be faster than `PrettyTable` and `texttable`. -The following mini-benchmark was run in Python 3.11.9 on Windows 11 (x64): +The following mini-benchmark was run in Python 3.13.7 on Windows 11 (x64): ================================== ========== =========== Table formatter time, μs rel. time ================================== ========== =========== - join with tabs and newlines 6.3 1.0 - csv to StringIO 6.6 1.0 - tabulate (0.10.0) 249.2 39.3 - tabulate (0.10.0, WIDE_CHARS_MODE) 325.6 51.4 - texttable (1.7.0) 579.3 91.5 - PrettyTable (3.11.0) 605.5 95.6 + csv to StringIO 11.9 1.0 + join with tabs and newlines 12.1 1.0 + PrettyTable (3.17.0) 468.0 39.3 + tabulate (0.10.0) 553.4 46.5 + tabulate (0.10.0, WIDE_CHARS_MODE) 612.2 51.4 + texttable (1.7.0) 1071.4 90.0 ================================== ========== =========== @@ -1254,4 +1294,6 @@ Vijaya Krishna Kasula, Furcy Pin, Christian Fibich, Shaun Duncan, Dimitri Papadopoulos, Élie Goudout, Racerroar888, Phill Zarfos, Keyacom, Andrew Coffey, Arpit Jain, Israel Roldan, ilya112358, Dan Nicholson, Frederik Scheerer, cdar07 (cdar), Racerroar888, -Perry Kundert. +Perry Kundert, Hnasar, Jun Koo, Jo2234, Bjorn Olsen, George Schizas, +Kadir Can Ozden, Jeff Quast, Mayukha Vadari, Rebecca Jean Herman, +Ján Jančár (J08nY). diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index a89b709e..f632a04b 100644 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -1,8 +1,10 @@ +import sys from timeit import timeit -import tabulate + import prettytable import texttable -import sys + +import tabulate setup_code = r""" from csv import writer @@ -53,13 +55,13 @@ def run_tabulate(table, widechars=False): methods = [ ("join with tabs and newlines", "join_table(table)"), ("csv to StringIO", "csv_table(table)"), - ("tabulate (%s)" % tabulate.__version__, "run_tabulate(table)"), + (f"tabulate ({tabulate.__version__})", "run_tabulate(table)"), ( - "tabulate (%s, WIDE_CHARS_MODE)" % tabulate.__version__, + f"tabulate ({tabulate.__version__}, WIDE_CHARS_MODE)", "run_tabulate(table, widechars=True)", ), - ("PrettyTable (%s)" % prettytable.__version__, "run_prettytable(table)"), - ("texttable (%s)" % texttable.__version__, "run_texttable(table)"), + (f"PrettyTable ({prettytable.__version__})", "run_prettytable(table)"), + (f"texttable ({texttable.__version__})", "run_texttable(table)"), ] @@ -72,13 +74,9 @@ def benchmark(n): if "--onlyself" in sys.argv[1:]: methods = [m for m in methods if m[0].startswith("tabulate")] - results = [ - (desc, timeit(code, setup_code, number=n) / n * 1e6) for desc, code in methods - ] - mintime = min(map(lambda x: x[1], results)) - results = [ - (desc, t, t / mintime) for desc, t in sorted(results, key=lambda x: x[1]) - ] + results = [(desc, timeit(code, setup_code, number=n) / n * 1e6) for desc, code in methods] + mintime = min(x[1] for x in results) + results = [(desc, t, t / mintime) for desc, t in sorted(results, key=lambda x: x[1])] table = tabulate.tabulate( results, ["Table formatter", "time, μs", "rel. time"], "rst", floatfmt=".1f" ) diff --git a/benchmark/requirements.txt b/benchmark/requirements.txt index 81086efe..861b7eea 100644 --- a/benchmark/requirements.txt +++ b/benchmark/requirements.txt @@ -1,2 +1,2 @@ prettytable -texttable \ No newline at end of file +texttable diff --git a/examples/people.csv b/examples/people.csv new file mode 100644 index 00000000..dfbf6dc5 --- /dev/null +++ b/examples/people.csv @@ -0,0 +1,6 @@ +id,name,email,"""favorite"" fruit" +1,Alice,alice@example.com,"apple, kiwi" +2,Bob,bob@example.com,"banana, +orange, +lychee" +3,Carol,,pear diff --git a/examples/people.jsonl b/examples/people.jsonl new file mode 100644 index 00000000..8dd35b43 --- /dev/null +++ b/examples/people.jsonl @@ -0,0 +1,2 @@ +{"id": 1, "name": "Alice", "email": "alice@example.com"} +{"id": 2, "name": "Bob", "email": "bob@example.com"} diff --git a/pyproject.toml b/pyproject.toml index cdd62d56..006e021a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,39 +1,68 @@ -# [build-system] -# requires = ["setuptools>=61.2.0", "setuptools_scm[toml]>=3.4.3"] -# build-backend = "setuptools.build_meta" +[build-system] +requires = ["flit_core>=3.12", "flit_scm"] +build-backend = "flit_scm:buildapi" [project] name = "tabulate" authors = [{name = "Sergey Astanin", email = "s.astanin@gmail.com"}] -license = {text = "MIT"} +license = "MIT" +license-files = ["LICENSE"] description = "Pretty-print tabular data" readme = "README.md" classifiers = [ "Development Status :: 4 - Beta", - "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", "Topic :: Software Development :: Libraries", ] -requires-python = ">=3.9" +requires-python = ">=3.10" dynamic = ["version"] [project.urls] Homepage = "https://github.com/astanin/python-tabulate" [project.optional-dependencies] -widechars = ["wcwidth"] +widechars = ["wcwidth>=0.6.0"] [project.scripts] -tabulate = "tabulate:_main" +tabulate = "tabulate.cli:_main" -# [tool.setuptools] -# packages = ["tabulate"] +[tool.flit.sdist] +include = ["CHANGELOG", "test/", "tox.ini"] -# [tool.setuptools_scm] -# write_to = "tabulate/version.py" +[tool.setuptools_scm] +write_to = "tabulate/_version.py" + +[dependency-groups] +dev = [ + "build>=1.4.0", + "ruff>=0.15.4", + "pre_commit>=4.5.1", + "tox>=4.47.3", + "tox-uv>=1.0", + "twine>=6.2.0", +] + +[tool.pytest.ini_options] +addopts = "-v --doctest-modules --ignore=benchmark --doctest-glob=README.md" + +[tool.ruff] +line-length = 99 +exclude = ["tabulate/_version.py"] + +[tool.ruff.lint] +extend-select = ["W", "B", "C4", "ISC", "I", "C90", "UP"] +ignore = ["B905", "E721", "C901"] + +[tool.ruff.lint.mccabe] +max-complexity = 22 + +[tool.ruff.lint.isort] +combine-as-imports = true +force-sort-within-sections = true +known-local-folder = ["common"] diff --git a/tabulate/__init__.py b/tabulate/__init__.py index a7826832..12a29507 100644 --- a/tabulate/__init__.py +++ b/tabulate/__init__.py @@ -1,38 +1,48 @@ """Pretty-print tabular data.""" -import warnings from collections import namedtuple -from collections.abc import Iterable, Sized +from collections.abc import Callable, Iterable, Sized +import dataclasses +from dataclasses import dataclass +from decimal import Decimal +from functools import partial, reduce from html import escape as htmlescape -from itertools import chain, zip_longest as izip_longest -from functools import reduce, partial +from importlib.metadata import PackageNotFoundError, version import io -import re +from itertools import chain, zip_longest as izip_longest import math +import re import textwrap -import dataclasses -import sys +import warnings try: import wcwidth # optional wide-character (CJK) support except ImportError: wcwidth = None - -def _is_file(f): - return isinstance(f, io.IOBase) +try: + __version__ = version("tabulate") # installed package +except PackageNotFoundError: + try: + from ._version import version as __version__ # editable / source checkout + except ImportError: + __version__ = "unknown" __all__ = ["tabulate", "tabulate_formats", "simple_separated_format"] -try: - from .version import version as __version__ # noqa: F401 -except ImportError: - pass # running __init__.py as a script, AppVeyor pytests - # minimum extra space in headers MIN_PADDING = 2 +# Whether or not to preserve leading/trailing whitespace in data. +PRESERVE_WHITESPACE = False + +# TextWrapper breaks words longer than 'width'. +_BREAK_LONG_WORDS = True +# TextWrapper is breaking hyphenated words. +_BREAK_ON_HYPHENS = True + + _DEFAULT_FLOATFMT = "g" _DEFAULT_INTFMT = "" _DEFAULT_MISSINGVAL = "" @@ -51,7 +61,12 @@ def _is_file(f): Line = namedtuple("Line", ["begin", "hline", "sep", "end"]) -DataRow = namedtuple("DataRow", ["begin", "sep", "end"]) +@dataclass +class DataRow: + begin: str + sep: str + end: str + escape_map: dict = None # A table structure is supposed to be: @@ -100,19 +115,21 @@ def _is_file(f): ) +def _is_file(f): + return isinstance(f, io.IOBase) + + def _is_separating_line_value(value): return type(value) is str and value.strip() == SEPARATING_LINE def _is_separating_line(row): row_type = type(row) - is_sl = (row_type == list or row_type == str) and ( + return (row_type is list or row_type is str) and ( (len(row) >= 1 and _is_separating_line_value(row[0])) or (len(row) >= 2 and _is_separating_line_value(row[1])) ) - return is_sl - def _pipe_segment_with_colons(align, colwidth): """Return a segment of a horizontal line with optional colons which @@ -133,8 +150,8 @@ def _pipe_line_with_colons(colwidths, colaligns): alignment (as in `pipe` output format).""" if not colaligns: # e.g. printing an empty data frame (github issue #15) colaligns = [""] * len(colwidths) - segments = [_pipe_segment_with_colons(a, w) for a, w in zip(colaligns, colwidths)] - return "|" + "|".join(segments) + "|" + segments = "|".join(_pipe_segment_with_colons(a, w) for a, w in zip(colaligns, colwidths)) + return f"|{segments}|" def _grid_segment_with_colons(colwidth, align): @@ -156,8 +173,8 @@ def _grid_line_with_colons(colwidths, colaligns): in a grid table.""" if not colaligns: colaligns = [""] * len(colwidths) - segments = [_grid_segment_with_colons(w, a) for a, w in zip(colaligns, colwidths)] - return "+" + "+".join(segments) + "+" + segments = "+".join(_grid_segment_with_colons(w, a) for a, w in zip(colaligns, colwidths)) + return f"+{segments}+" def _mediawiki_row_with_attrs(separator, cell_values, colwidths, colaligns): @@ -179,8 +196,8 @@ def _mediawiki_row_with_attrs(separator, cell_values, colwidths, colaligns): def _textile_row_with_attrs(cell_values, colwidths, colaligns): cell_values[0] += " " alignment = {"left": "<.", "right": ">.", "center": "=.", "decimal": ">."} - values = (alignment.get(a, "") + v for a, v in zip(colaligns, cell_values)) - return "|" + "|".join(values) + "|" + values = "|".join(alignment.get(a, "") + v for a, v in zip(colaligns, cell_values)) + return f"|{values}|" def _html_begin_table_without_header(colwidths_ignore, colaligns_ignore): @@ -190,7 +207,7 @@ def _html_begin_table_without_header(colwidths_ignore, colaligns_ignore): def _html_row_with_attrs(celltag, unsafe, cell_values, colwidths, colaligns): alignment = { - "left": "", + "left": ' style="text-align: left;"', "right": ' style="text-align: right;"', "center": ' style="text-align: center;"', "decimal": ' style="text-align: right;"', @@ -213,7 +230,7 @@ def _html_row_with_attrs(celltag, unsafe, cell_values, colwidths, colaligns): def _moin_row_with_attrs(celltag, cell_values, colwidths, colaligns, header=""): alignment = { - "left": "", + "left": '', "right": '', "center": '', "decimal": '', @@ -246,12 +263,8 @@ def make_header_line(is_header, colwidths, colaligns): alignment = {"left": "<", "right": ">", "center": "^", "decimal": ">"} # use the column widths generated by tabulate for the asciidoc column width specifiers - asciidoc_alignments = zip( - colwidths, [alignment[colalign] for colalign in colaligns] - ) - asciidoc_column_specifiers = [ - f"{width:d}{align}" for width, align in asciidoc_alignments - ] + asciidoc_alignments = zip(colwidths, [alignment[colalign] for colalign in colaligns]) + asciidoc_column_specifiers = [f"{align}{width:d}" for width, align in asciidoc_alignments] header_list = ['cols="' + (",".join(asciidoc_column_specifiers)) + '"'] # generate the list of options (currently only "header") @@ -261,16 +274,19 @@ def make_header_line(is_header, colwidths, colaligns): options_list.append("header") if options_list: - header_list += ['options="' + ",".join(options_list) + '"'] + options_list = ",".join(options_list) + header_list.append(f'options="{options_list}"') # generate the list of entries in the table header field - return "[{}]\n|====".format(",".join(header_list)) + line = "[{}]\n|====".format(",".join(header_list)) + return line.rstrip() if len(args) == 2: # two arguments are passed if called in the context of aboveline # print the table header with column widths and optional header tag - return make_header_line(False, *args) + line = make_header_line(False, *args) + return line.rstrip() elif len(args) == 3: # three arguments are passed if called in the context of dataline or headerline @@ -280,14 +296,15 @@ def make_header_line(is_header, colwidths, colaligns): data_line = "|" + "|".join(cell_values) if is_header: - return make_header_line(True, colwidths, colaligns) + "\n" + data_line + line = make_header_line(True, colwidths, colaligns) + "\n" + data_line + return line.rstrip() else: - return data_line + return data_line.rstrip() else: raise ValueError( - " _asciidoc_row() requires two (colwidths, colaligns) " - + "or three (cell_values, colwidths, colaligns) arguments) " + "_asciidoc_row() requires two (colwidths, colaligns) " + "or three (cell_values, colwidths, colaligns) arguments) " ) @@ -307,13 +324,10 @@ def make_header_line(is_header, colwidths, colaligns): } -def _latex_row(cell_values, colwidths, colaligns, escrules=LATEX_ESCAPE_RULES): - def escape_char(c): - return escrules.get(c, c) +_latex_row = DataRow("", "&", "\\\\", LATEX_ESCAPE_RULES) - escaped_values = ["".join(map(escape_char, cell)) for cell in cell_values] - rowfmt = DataRow("", "&", "\\\\") - return _build_simple_row(escaped_values, rowfmt) + +GITHUB_ESCAPE_RULES = {r"|": r"\|"} def _rst_escape_first_column(rows, headers): @@ -506,23 +520,13 @@ def escape_empty(val): padding=1, with_header_hide=None, ), - "github": TableFormat( - lineabove=Line("|", "-", "|", "|"), - linebelowheader=Line("|", "-", "|", "|"), - linebetweenrows=None, - linebelow=None, - headerrow=DataRow("|", "|", "|"), - datarow=DataRow("|", "|", "|"), - padding=1, - with_header_hide=["lineabove"], - ), "pipe": TableFormat( lineabove=_pipe_line_with_colons, linebelowheader=_pipe_line_with_colons, linebetweenrows=None, linebelow=None, - headerrow=DataRow("|", "|", "|"), - datarow=DataRow("|", "|", "|"), + headerrow=DataRow("|", "|", "|", GITHUB_ESCAPE_RULES), + datarow=DataRow("|", "|", "|", GITHUB_ESCAPE_RULES), padding=1, with_header_hide=["lineabove"], ), @@ -611,16 +615,6 @@ def escape_empty(val): padding=1, with_header_hide=None, ), - "youtrack": TableFormat( - lineabove=None, - linebelowheader=None, - linebetweenrows=None, - linebelow=None, - headerrow=DataRow("|| ", " || ", " || "), - datarow=DataRow("| ", " | ", " |"), - padding=1, - with_header_hide=None, - ), "html": TableFormat( lineabove=_html_begin_table_without_header, linebelowheader="", @@ -656,8 +650,8 @@ def escape_empty(val): linebelowheader=Line("\\hline", "", "", ""), linebetweenrows=None, linebelow=Line("\\hline\n\\end{tabular}", "", "", ""), - headerrow=partial(_latex_row, escrules={}), - datarow=partial(_latex_row, escrules={}), + headerrow=DataRow("", "&", "\\\\", {}), + datarow=DataRow("", "&", "\\\\", {}), padding=1, with_header_hide=None, ), @@ -713,8 +707,12 @@ def escape_empty(val): ), } +# "github" is an alias for "pipe": both produce GitHub-flavored Markdown with +# alignment colons in the separator row. +_table_formats["github"] = _table_formats["pipe"] + -tabulate_formats = list(sorted(_table_formats.keys())) +tabulate_formats = sorted(_table_formats.keys()) # The table formats for which multiline cells will be folded into subsequent # table rows. The key is the original format specified at the API. The value is @@ -737,6 +735,7 @@ def escape_empty(val): "pretty": "pretty", "psql": "psql", "rst": "rst", + "github": "github", "outline": "outline", "simple_outline": "simple_outline", "rounded_outline": "rounded_outline", @@ -749,7 +748,6 @@ def escape_empty(val): # TODO: Add multiline support for the remaining table formats: # - mediawiki: Replace \n with
# - moinmoin: TBD -# - youtrack: TBD # - html: Replace \n with
# - latex*: Use "makecell" package: In header, replace X\nY with # \thead{X\\Y} and in data row, replace X\nY with \makecell{X\\Y} @@ -1046,7 +1044,7 @@ def _padleft(width, s): True """ - fmt = "{0:>%ds}" % width + fmt = f"{{0:>{width}s}}" return fmt.format(s) @@ -1057,7 +1055,7 @@ def _padright(width, s): True """ - fmt = "{0:<%ds}" % width + fmt = f"{{0:<{width}s}}" return fmt.format(s) @@ -1068,7 +1066,7 @@ def _padboth(width, s): True """ - fmt = "{0:^%ds}" % width + fmt = f"{{0:^{width}s}}" return fmt.format(s) @@ -1105,13 +1103,21 @@ def _visible_width(s): """ # optional wide-character support if wcwidth is not None and WIDE_CHARS_MODE: - len_fn = wcwidth.wcswidth - else: - len_fn = len + # when already a string, it could contain terminal sequences, + # wcwidth >= 0.3.0 handles ANSI codes internally, + if hasattr(wcwidth, "width"): + return wcwidth.width(str(s)) + # while previous versions need them stripped first. + if isinstance(s, (str, bytes)): + return wcwidth.wcswidth(_strip_ansi(str(s))) + + # Otherwise, coerce to string, guaranteed to be without any control codes, + # we can use wcswidth() directly. + return wcwidth.wcswidth(str(s)) if isinstance(s, (str, bytes)): - return len_fn(_strip_ansi(s)) + return len(_strip_ansi(s)) else: - return len_fn(str(s)) + return len(str(s)) def _is_multiline(s): @@ -1135,7 +1141,7 @@ def _choose_width_fn(has_invisible, enable_widechars, is_multiline): else: line_width_fn = len if is_multiline: - width_fn = lambda s: _multiline_width(s, line_width_fn) # noqa + width_fn = lambda s: _multiline_width(s, line_width_fn) # noqa: E731 else: width_fn = line_width_fn return width_fn @@ -1175,7 +1181,9 @@ def _align_column_choose_width_fn(has_invisible, enable_widechars, is_multiline) else: line_width_fn = len if is_multiline: - width_fn = lambda s: _align_column_multiline_width(s, line_width_fn) # noqa + width_fn = lambda s: _align_column_multiline_width( # noqa: E731 + s, line_width_fn + ) else: width_fn = line_width_fn return width_fn @@ -1209,9 +1217,7 @@ def _align_column( strings, padfn = _align_column_choose_padfn( strings, alignment, has_invisible, preserve_whitespace ) - width_fn = _align_column_choose_width_fn( - has_invisible, enable_widechars, is_multiline - ) + width_fn = _align_column_choose_width_fn(has_invisible, enable_widechars, is_multiline) s_widths = list(map(width_fn, strings)) maxwidth = max(max(_flat_list(s_widths)), minwidth) @@ -1219,15 +1225,13 @@ def _align_column( if is_multiline: if not enable_widechars and not has_invisible: padded_strings = [ - "\n".join([padfn(maxwidth, s) for s in ms.splitlines()]) - for ms in strings + "\n".join([padfn(maxwidth, s) for s in ms.splitlines()]) for ms in strings ] else: # enable wide-character width corrections s_lens = [[len(s) for s in re.split("[\r\n]", ms)] for ms in strings] visible_widths = [ - [maxwidth - (w - l) for w, l in zip(mw, ml)] - for mw, ml in zip(s_widths, s_lens) + [maxwidth - (w - ln) for w, ln in zip(mw, ml)] for mw, ml in zip(s_widths, s_lens) ] # wcswidth and _visible_width don't count invisible characters; # padfn doesn't need to apply another correction @@ -1241,7 +1245,7 @@ def _align_column( else: # enable wide-character width corrections s_lens = list(map(len, strings)) - visible_widths = [maxwidth - (w - l) for w, l in zip(s_widths, s_lens)] + visible_widths = [maxwidth - (w - ln) for w, ln in zip(s_widths, s_lens)] # wcswidth and _visible_width don't count invisible characters; # padfn doesn't need to apply another correction padded_strings = [padfn(w, s) for s, w in zip(strings, visible_widths)] @@ -1306,7 +1310,7 @@ def _format(val, valtype, floatfmt, intfmt, missingval="", has_invisible=True): tabulate(tbl, headers=hrow) == good_result True - """ # noqa + """ if val is None: return missingval if isinstance(val, (bytes, str)) and not val: @@ -1317,19 +1321,13 @@ def _format(val, valtype, floatfmt, intfmt, missingval="", has_invisible=True): elif valtype is int: if isinstance(val, str): val_striped = val.encode("unicode_escape").decode("utf-8") - colored = re.search( - r"(\\[xX]+[0-9a-fA-F]+\[\d+[mM]+)([0-9.]+)(\\.*)$", val_striped - ) + colored = re.search(r"(\\[xX]+[0-9a-fA-F]+\[\d+[mM]+)([0-9.]+)(\\.*)$", val_striped) if colored: total_groups = len(colored.groups()) if total_groups == 3: digits = colored.group(2) if digits.isdigit(): - val_new = ( - colored.group(1) - + format(int(digits), intfmt) - + colored.group(3) - ) + val_new = colored.group(1) + format(int(digits), intfmt) + colored.group(3) val = val_new.encode("utf-8").decode("unicode_escape") intfmt = "" return format(val, intfmt) @@ -1342,25 +1340,29 @@ def _format(val, valtype, floatfmt, intfmt, missingval="", has_invisible=True): is_a_colored_number = has_invisible and isinstance(val, (str, bytes)) if is_a_colored_number: raw_val = _strip_ansi(val) - formatted_val = format(float(raw_val), floatfmt) + try: + formatted_val = format(float(raw_val), floatfmt) + except (ValueError, TypeError): + return f"{val}" return val.replace(raw_val, formatted_val) else: if isinstance(val, str) and "," in val: val = val.replace(",", "") # handle thousands-separators - return format(float(val), floatfmt) + if isinstance(val, Decimal): + return format(val, floatfmt) + try: + return format(float(val), floatfmt) + except (ValueError, TypeError): + return f"{val}" else: return f"{val}" -def _align_header( - header, alignment, width, visible_width, is_multiline=False, width_fn=None -): +def _align_header(header, alignment, width, visible_width, is_multiline=False, width_fn=None): "Pad string header to width chars given known visible_width of the header." if is_multiline: header_lines = re.split(_multiline_codes, header) - padded_lines = [ - _align_header(h, alignment, width, width_fn(h)) for h in header_lines - ] + padded_lines = [_align_header(h, alignment, width, width_fn(h)) for h in header_lines] return "\n".join(padded_lines) # else: not multiline ninvisible = len(header) - visible_width @@ -1402,7 +1404,7 @@ def _prepend_row_index(rows, index): if isinstance(index, Sized) and len(index) != len(rows): raise ValueError( "index must be as long as the number of data rows: " - + f"len(index)={len(index)} len(rows)={len(rows)}" + f"len(index)={len(index)} len(rows)={len(rows)}" ) sans_rows, separating_lines = _remove_separating_lines(rows) new_rows = [] @@ -1470,23 +1472,18 @@ def _normalize_tabular_data(tabular_data, headers, showindex="default"): index = None if hasattr(tabular_data, "keys") and hasattr(tabular_data, "values"): # dict-like and pandas.DataFrame? - if hasattr(tabular_data.values, "__call__"): + if callable(tabular_data.values): # likely a conventional dict keys = tabular_data.keys() try: - rows = list( - izip_longest(*tabular_data.values()) - ) # columns have to be transposed - except TypeError: # not iterable - raise TypeError(err_msg) + rows = list(izip_longest(*tabular_data.values())) # columns have to be transposed + except TypeError as e: # not iterable + raise TypeError(err_msg) from e elif hasattr(tabular_data, "index"): # values is a property, has .index => it's likely a pandas.DataFrame (pandas 0.11.0) keys = list(tabular_data) - if ( - showindex in ["default", "always", True] - and tabular_data.index.name is not None - ): + if showindex in ["default", "always", True] and tabular_data.index.name is not None: if isinstance(tabular_data.index.name, list): keys[:0] = tabular_data.index.name else: @@ -1504,17 +1501,13 @@ def _normalize_tabular_data(tabular_data, headers, showindex="default"): else: # it's a usual iterable of iterables, or a NumPy array, or an iterable of dataclasses try: rows = list(tabular_data) - except TypeError: # not iterable - raise TypeError(err_msg) + except TypeError as e: # not iterable + raise TypeError(err_msg) from e if headers == "keys" and not rows: # an empty table (issue #81) headers = [] - elif ( - headers == "keys" - and hasattr(tabular_data, "dtype") - and getattr(tabular_data.dtype, "names") - ): + elif headers == "keys" and hasattr(tabular_data, "dtype") and tabular_data.dtype.names: # numpy record array headers = tabular_data.dtype.names elif ( @@ -1553,9 +1546,7 @@ def _normalize_tabular_data(tabular_data, headers, showindex="default"): else: headers = [] elif headers: - raise ValueError( - "headers for a list of dicts is not a dict or a keyword" - ) + raise ValueError("headers for a list of dicts is not a dict or a keyword") rows = [[row.get(k) for k in keys] for row in rows] elif ( @@ -1568,16 +1559,15 @@ def _normalize_tabular_data(tabular_data, headers, showindex="default"): # print tabulate(cursor, headers='keys') headers = [column[0] for column in tabular_data.description] - elif ( - dataclasses is not None - and len(rows) > 0 - and dataclasses.is_dataclass(rows[0]) - ): + elif dataclasses is not None and len(rows) > 0 and dataclasses.is_dataclass(rows[0]): # Python's dataclass field_names = [field.name for field in dataclasses.fields(rows[0])] if headers == "keys": headers = field_names - rows = [[getattr(row, f) for f in field_names] for row in rows] + rows = [ + ([getattr(row, f) for f in field_names] if not _is_separating_line(row) else row) + for row in rows + ] elif headers == "keys" and len(rows) > 0: # keys are column indices @@ -1597,11 +1587,11 @@ def _normalize_tabular_data(tabular_data, headers, showindex="default"): headers = list(map(str, headers)) # rows = list(map(list, rows)) - rows = list(map(lambda r: r if _is_separating_line(r) else list(r), rows)) + rows = [r if _is_separating_line(r) else list(r) for r in rows] # add or remove an index column showindex_is_a_str = type(showindex) in [str, bytes] - if showindex == "default" and index is not None: + if showindex_is_a_str and showindex == "default" and index is not None: rows = _prepend_row_index(rows, index) elif isinstance(showindex, Sized) and not showindex_is_a_str: rows = _prepend_row_index(rows, list(showindex)) @@ -1623,7 +1613,14 @@ def _normalize_tabular_data(tabular_data, headers, showindex="default"): return rows, headers, headers_pad -def _wrap_text_to_colwidths(list_of_lists, colwidths, numparses=True, missingval=_DEFAULT_MISSINGVAL): +def _wrap_text_to_colwidths( + list_of_lists, + colwidths, + numparses=True, + missingval=_DEFAULT_MISSINGVAL, + break_long_words=_BREAK_LONG_WORDS, + break_on_hyphens=_BREAK_ON_HYPHENS, +): if len(list_of_lists): num_cols = len(list_of_lists[0]) else: @@ -1640,13 +1637,23 @@ def _wrap_text_to_colwidths(list_of_lists, colwidths, numparses=True, missingval continue if width is not None: - wrapper = _CustomTextWrap(width=width) + wrapper = _CustomTextWrap( + width=width, + break_long_words=break_long_words, + break_on_hyphens=break_on_hyphens, + ) # Cast based on our internal type handling. Any future custom # formatting of types (such as datetimes) may need to be more # explicit than just `str` of the object. Also doesn't work for # custom floatfmt/intfmt, nor with any missing/blank cells. casted_cell = ( - missingval if cell is None else str(cell) if _isnumber(cell) else _type(cell, numparse)(cell) + missingval + if cell is None + else ( + str(cell) + if cell == "" or _isnumber(cell) + else str(_type(cell, numparse)(cell)) + ) ) wrapped = [ "\n".join(wrapper.wrap(line)) @@ -1705,6 +1712,8 @@ def tabulate( headersalign=None, rowalign=None, maxheadercolwidths=None, + break_long_words=_BREAK_LONG_WORDS, + break_on_hyphens=_BREAK_ON_HYPHENS, ): """Format a fixed width table for pretty printing. @@ -2131,11 +2140,11 @@ def tabulate( ... headers="firstrow", tablefmt="html")) - + - - + +
strings numbers
strings numbers
spam 41.9999
eggs 451
spam 41.9999
eggs 451
@@ -2203,8 +2212,8 @@ def tabulate( Tabulate will, by default, set the width of each column to the length of the longest element in that column. However, in situations where fields are expected to reasonably be too long to look good as a single line, tabulate can help automate - word wrapping long fields for you. Use the parameter `maxcolwidth` to provide a - list of maximal column widths + word wrapping long fields for you. Use the parameter `maxcolwidths` to provide a + list of maximal column widths: >>> print(tabulate( \ [('1', 'John Smith', \ @@ -2221,7 +2230,7 @@ def tabulate( | | | better if it is wrapped a bit | +------------+------------+-------------------------------+ - Header column width can be specified in a similar way using `maxheadercolwidth` + Header column width can be specified in a similar way using `maxheadercolwidths`. """ @@ -2247,21 +2256,29 @@ def tabulate( numparses = _expand_numparse(disable_numparse, num_cols) list_of_lists = _wrap_text_to_colwidths( - list_of_lists, maxcolwidths, numparses=numparses, missingval=missingval + list_of_lists, + maxcolwidths, + numparses=numparses, + missingval=missingval, + break_long_words=break_long_words, + break_on_hyphens=break_on_hyphens, ) if maxheadercolwidths is not None: - num_cols = len(list_of_lists[0]) + num_cols = len(list_of_lists[0]) if list_of_lists else len(headers) if isinstance(maxheadercolwidths, int): # Expand scalar for all columns - maxheadercolwidths = _expand_iterable( - maxheadercolwidths, num_cols, maxheadercolwidths - ) + maxheadercolwidths = _expand_iterable(maxheadercolwidths, num_cols, maxheadercolwidths) else: # Ignore col width for any 'trailing' columns maxheadercolwidths = _expand_iterable(maxheadercolwidths, num_cols, None) numparses = _expand_numparse(disable_numparse, num_cols) headers = _wrap_text_to_colwidths( - [headers], maxheadercolwidths, numparses=numparses, missingval=missingval + [headers], + maxheadercolwidths, + numparses=numparses, + missingval=missingval, + break_long_words=break_long_words, + break_on_hyphens=break_on_hyphens, )[0] # empty values in the first column of RST tables should be escaped (issue #82) @@ -2324,17 +2341,13 @@ def tabulate( numparses = _expand_numparse(disable_numparse, len(cols)) coltypes = [_column_type(col, numparse=np) for col, np in zip(cols, numparses)] if isinstance(floatfmt, str): # old version - float_formats = len(cols) * [ - floatfmt - ] # just duplicate the string to use in each column + float_formats = len(cols) * [floatfmt] # just duplicate the string to use in each column else: # if floatfmt is list, tuple etc we have one per column float_formats = list(floatfmt) if len(float_formats) < len(cols): float_formats.extend((len(cols) - len(float_formats)) * [_DEFAULT_FLOATFMT]) if isinstance(intfmt, str): # old version - int_formats = len(cols) * [ - intfmt - ] # just duplicate the string to use in each column + int_formats = len(cols) * [intfmt] # just duplicate the string to use in each column else: # if intfmt is list, tuple etc we have one per column int_formats = list(intfmt) if len(int_formats) < len(cols): @@ -2363,7 +2376,7 @@ def tabulate( assert isinstance(colalign, Iterable) if isinstance(colalign, str): warnings.warn( - f"As a string, `colalign` is interpreted as {[c for c in colalign]}. " + f"As a string, `colalign` is interpreted as {list(colalign)}. " f'Did you mean `colglobalalign = "{colalign}"` or `colalign = ("{colalign}",)`?', stacklevel=2, ) @@ -2372,9 +2385,7 @@ def tabulate( break elif align != "global": aligns[idx] = align - minwidths = ( - [width_fn(h) + min_padding for h in headers] if headers else [0] * len(cols) - ) + minwidths = [width_fn(h) + min_padding for h in headers] if headers else [0] * len(cols) aligns_copy = aligns.copy() # Reset alignments in copy of alignments list to "left" for 'colon_grid' format, # which enforces left alignment in the text output of the data. @@ -2407,7 +2418,7 @@ def tabulate( assert isinstance(headersalign, Iterable) if isinstance(headersalign, str): warnings.warn( - f"As a string, `headersalign` is interpreted as {[c for c in headersalign]}. " + f"As a string, `headersalign` is interpreted as {list(headersalign)}. " f'Did you mean `headersglobalalign = "{headersalign}"` ' f'or `headersalign = ("{headersalign}",)`?', stacklevel=2, @@ -2421,8 +2432,7 @@ def tabulate( elif align != "global": aligns_headers[hidx] = align minwidths = [ - max(minw, max(width_fn(cl) for cl in c)) - for minw, c in zip(minwidths, t_cols) + max(minw, max(width_fn(cl) for cl in c)) for minw, c in zip(minwidths, t_cols) ] headers = [ _align_header(h, a, minw, width_fn(h), is_multiline, width_fn) @@ -2494,17 +2504,35 @@ def _pad_row(cells, padding): return cells -def _build_simple_row(padded_cells, rowfmt): +def _build_simple_row(padded_cells: list[list], rowfmt: DataRow) -> str: "Format row according to DataRow format without padding." - begin, sep, end = rowfmt - return (begin + sep.join(padded_cells) + end).rstrip() + begin = rowfmt.begin + sep = rowfmt.sep + end = rowfmt.end + escape_map: dict = rowfmt.escape_map + + if escape_map: + + def escape_char(c): + return escape_map.get(c, c) + + escaped_cells = ["".join(map(escape_char, cell)) for cell in padded_cells] + else: + escaped_cells = padded_cells + + return (begin + sep.join(escaped_cells) + end).rstrip() -def _build_row(padded_cells, colwidths, colaligns, rowfmt): +def _build_row( + padded_cells: list[list], + colwidths: list[int], + colaligns: list[str], + rowfmt: DataRow | Callable, +) -> str: "Return a string which represents a row of data cells." if not rowfmt: return None - if hasattr(rowfmt, "__call__"): + if callable(rowfmt): return rowfmt(padded_cells, colwidths, colaligns) else: return _build_simple_row(padded_cells, rowfmt) @@ -2541,8 +2569,7 @@ def _append_multiline_row( # ] cells_lines = [ - _align_cell_veritically(cl, nlines, w, rowalign) - for cl, w in zip(cells_lines, colwidths) + _align_cell_veritically(cl, nlines, w, rowalign) for cl, w in zip(cells_lines, colwidths) ] lines_cells = [[cl[i] for cl in cells_lines] for i in range(nlines)] for ln in lines_cells: @@ -2555,12 +2582,13 @@ def _build_line(colwidths, colaligns, linefmt): "Return a string which represents a horizontal line." if not linefmt: return None - if hasattr(linefmt, "__call__"): + if callable(linefmt): return linefmt(colwidths, colaligns) else: begin, fill, sep, end = linefmt cells = [fill * w for w in colwidths] - return _build_simple_row(cells, (begin, sep, end)) + rowfmt = DataRow(begin, sep, end) + return _build_simple_row(cells, rowfmt) def _append_line(lines, colwidths, colaligns, linefmt): @@ -2592,7 +2620,7 @@ def _format_table( padded_widths = [(w + 2 * pad) for w in colwidths] if is_multiline: - pad_row = lambda row, _: row # noqa do it later, in _append_multiline_row + pad_row = lambda row, _: row # noqa: E731 # do it later, in _append_multiline_row append_row = partial(_append_multiline_row, pad=pad) else: pad_row = _pad_row @@ -2644,9 +2672,7 @@ def _format_table( if _is_separating_line(row): _append_line(lines, padded_widths, colaligns, separating_line) else: - append_row( - lines, pad_row(row, pad), padded_widths, colaligns, fmt.datarow - ) + append_row(lines, pad_row(row, pad), padded_widths, colaligns, fmt.datarow) if fmt.linebelow and "linebelow" not in hidden: _append_line(lines, padded_widths, colaligns, fmt.linebelow) @@ -2690,10 +2716,8 @@ def _update_lines(self, lines, new_line): as add any colors from previous lines order to preserve the same formatting as a single unwrapped string. """ - code_matches = [x for x in _ansi_codes.finditer(new_line)] - color_codes = [ - code.string[code.span()[0] : code.span()[1]] for code in code_matches - ] + code_matches = list(_ansi_codes.finditer(new_line)) + color_codes = [code.string[code.span()[0] : code.span()[1]] for code in code_matches] # Add color codes from earlier in the unwrapped line, and then track any new ones we add. new_line = "".join(self._active_codes) + new_line @@ -2727,24 +2751,26 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): # If we're allowed to break long words, then do so: put as much # of the next chunk onto the current line as will fit. - if self.break_long_words: + if self.break_long_words and space_left > 0: # Tabulate Custom: Build the string up piece-by-piece in order to # take each charcter's width into account chunk = reversed_chunks[-1] i = 1 # Only count printable characters, so strip_ansi first, index later. - while len(_strip_ansi(chunk)[:i]) <= space_left: + stripped_chunk = _strip_ansi(chunk) + while i <= len(stripped_chunk) and self._len(stripped_chunk[:i]) <= space_left: i = i + 1 + # Always consume at least one character so _wrap_chunks makes + # progress even when the first character is wider than space_left + # (e.g. a 2-column CJK char in a 1-column-wide slot). + i = max(i, 2) # Consider escape codes when breaking words up total_escape_len = 0 last_group = 0 if _ansi_codes.search(chunk) is not None: for group, _, _, _ in _ansi_codes.findall(chunk): escape_len = len(group) - if ( - group - in chunk[last_group : i + total_escape_len + escape_len - 1] - ): + if group in chunk[last_group : i + total_escape_len + escape_len - 1]: total_escape_len += escape_len found = _ansi_codes.search(chunk[last_group:]) last_group += found.end() @@ -2777,7 +2803,7 @@ def _wrap_chunks(self, chunks): """ lines = [] if self.width <= 0: - raise ValueError("invalid width %r (must be > 0)" % self.width) + raise ValueError(f"invalid width {self.width!r} (must be > 0)") if self.max_lines is not None: if self.max_lines > 1: indent = self.subsequent_indent @@ -2791,7 +2817,6 @@ def _wrap_chunks(self, chunks): chunks.reverse() while chunks: - # Start the list of chunks that will make up the current line. # cur_len is just the length of all the chunks in cur_line. cur_line = [] @@ -2851,10 +2876,7 @@ def _wrap_chunks(self, chunks): self._update_lines(lines, indent + "".join(cur_line)) else: while cur_line: - if ( - cur_line[-1].strip() - and cur_len + self._len(self.placeholder) <= width - ): + if cur_line[-1].strip() and cur_len + self._len(self.placeholder) <= width: cur_line.append(self.placeholder) self._update_lines(lines, indent + "".join(cur_line)) break @@ -2863,10 +2885,7 @@ def _wrap_chunks(self, chunks): else: if lines: prev_line = lines[-1].rstrip() - if ( - self._len(prev_line) + self._len(self.placeholder) - <= self.width - ): + if self._len(prev_line) + self._len(self.placeholder) <= self.width: lines[-1] = prev_line + self.placeholder break self._update_lines(lines, indent + self.placeholder.lstrip()) @@ -2875,126 +2894,7 @@ def _wrap_chunks(self, chunks): return lines -def _main(): - """\ - Usage: tabulate [options] [FILE ...] - - Pretty-print tabular data. - See also https://github.com/astanin/python-tabulate - - FILE a filename of the file with tabular data; - if "-" or missing, read data from stdin. - - Options: - - -h, --help show this message - -1, --header use the first row of data as a table header - -o FILE, --output FILE print table to FILE (default: stdout) - -s REGEXP, --sep REGEXP use a custom column separator (default: whitespace) - -F FPFMT, --float FPFMT floating point number format (default: g) - -I INTFMT, --int INTFMT integer point number format (default: "") - -f FMT, --format FMT set output table format; supported formats: - plain, simple, grid, fancy_grid, pipe, orgtbl, - rst, mediawiki, html, latex, latex_raw, - latex_booktabs, latex_longtable, tsv - (default: simple) - """ - import getopt - - usage = textwrap.dedent(_main.__doc__) - try: - opts, args = getopt.getopt( - sys.argv[1:], - "h1o:s:F:I:f:", - [ - "help", - "header", - "output=", - "sep=", - "float=", - "int=", - "colalign=", - "format=", - ], - ) - except getopt.GetoptError as e: - print(e) - print(usage) - sys.exit(2) - headers = [] - floatfmt = _DEFAULT_FLOATFMT - intfmt = _DEFAULT_INTFMT - colalign = None - tablefmt = "simple" - sep = r"\s+" - outfile = "-" - for opt, value in opts: - if opt in ["-1", "--header"]: - headers = "firstrow" - elif opt in ["-o", "--output"]: - outfile = value - elif opt in ["-F", "--float"]: - floatfmt = value - elif opt in ["-I", "--int"]: - intfmt = value - elif opt in ["-C", "--colalign"]: - colalign = value.split() - elif opt in ["-f", "--format"]: - if value not in tabulate_formats: - print("%s is not a supported table format" % value) - print(usage) - sys.exit(3) - tablefmt = value - elif opt in ["-s", "--sep"]: - sep = value - elif opt in ["-h", "--help"]: - print(usage) - sys.exit(0) - files = [sys.stdin] if not args else args - with sys.stdout if outfile == "-" else open(outfile, "w") as out: - for f in files: - if f == "-": - f = sys.stdin - if _is_file(f): - _pprint_file( - f, - headers=headers, - tablefmt=tablefmt, - sep=sep, - floatfmt=floatfmt, - intfmt=intfmt, - file=out, - colalign=colalign, - ) - else: - with open(f) as fobj: - _pprint_file( - fobj, - headers=headers, - tablefmt=tablefmt, - sep=sep, - floatfmt=floatfmt, - intfmt=intfmt, - file=out, - colalign=colalign, - ) - - -def _pprint_file(fobject, headers, tablefmt, sep, floatfmt, intfmt, file, colalign): - rows = fobject.readlines() - table = [re.split(sep, r.rstrip()) for r in rows if r.strip()] - print( - tabulate( - table, - headers, - tablefmt, - floatfmt=floatfmt, - intfmt=intfmt, - colalign=colalign, - ), - file=file, - ) - - if __name__ == "__main__": + from .cli import _main + _main() diff --git a/tabulate/__main__.py b/tabulate/__main__.py new file mode 100644 index 00000000..c6efd79c --- /dev/null +++ b/tabulate/__main__.py @@ -0,0 +1,3 @@ +from tabulate.cli import _main + +_main() diff --git a/tabulate/cli.py b/tabulate/cli.py new file mode 100644 index 00000000..cfae6fea --- /dev/null +++ b/tabulate/cli.py @@ -0,0 +1,227 @@ +"""Command-line interface for tabulate.""" + +from functools import partial +import re +import sys +import textwrap + +try: + from . import ( + _DEFAULT_FLOATFMT, + _DEFAULT_INTFMT, + _is_file, + tabulate, + tabulate_formats, + ) +except ImportError: # pragma: no cover + # running as a script: python tabulate/cli.py + import os as _os + import sys as _sys + + _sys.path.insert(0, _os.path.dirname(_os.path.dirname(_os.path.abspath(__file__)))) + from tabulate import ( + _DEFAULT_FLOATFMT, + _DEFAULT_INTFMT, + _is_file, + tabulate, + tabulate_formats, + ) + + +def _main(): + """\ + Usage: tabulate [options] [FILE ...] + + Pretty-print tabular data. Use Python module for more features. + + FILE a filename of the file with tabular data; + if "-" or missing, read data from stdin. + + Options: + + -h, --help show this message + + INPUT: + -r, --read FILEFORMAT parse input FILEs as: + rsv (REGEXP-separated values, default), + csv (comma-separated valued, Excel dialect), + jsonl (one JSON object per line) + -s REGEXP, --sep REGEXP column separator for rsv data (default: whitespace) + + FORMAT: + --headers HEADERS HEADERS can be one of: + "firstrow" (for csv and rsv data), + "keys" (for jsonl data), + "HEADER1,HEADER2,..." (for csv and rsv data), + "KEY1:HEADER1,KEY2:HEADER2,..." (for jsonl data) + -1 use the first row of input data as a table header + (the same as --headers firstrow) + -F FPFMT, --float FPFMT floating point number format (default: g) + -I INTFMT, --int INTFMT integer point number format (default: "") + -f FMT, --format FMT set output table format (default: simple) + + Supported output formats: asciidoc, colon_grid, double_grid, double_outline, + fancy_grid, fancy_outline, github, grid, heavy_grid, heavy_outline, html, jira, + latex, latex_booktabs, latex_longtable, latex_raw, mediawiki, mixed_grid, mixed_outline, + moinmoin, orgtbl, outline, pipe, plain, presto, pretty, psql, rounded_grid, + rounded_outline, rst, simple, simple_grid, simple_outline, textile, tsv, unsafehtml. + + OUTPUT: + -o FILE, --output FILE print table to FILE (default: stdout) + + """ + import getopt + + usage = textwrap.dedent(_main.__doc__) + try: + opts, args = getopt.getopt( + sys.argv[1:], + "h1H:r:o:s:F:I:f:", + [ + "help", + "header", # deprecated in CLI > 0.10 + "headers=", # CLI > 0.10 + "read=", # CLI > 0.10 + "output=", + "sep=", + "float=", + "int=", + "colalign=", + "format=", + ], + ) + except getopt.GetoptError as e: + print(e, file=sys.stderr) + print(usage) + sys.exit(2) + headers = [] + floatfmt = _DEFAULT_FLOATFMT + intfmt = _DEFAULT_INTFMT + colalign = None + tablefmt = "simple" + fileformat = "rsv" + sep = r"\s+" + outfile = "-" + special_headers_values = ["firstrow", "keys"] + for opt, value in opts: + if opt in ["-1", "--header"]: + # "header" option is for backwards compatibility with CLI <= 0.10 + # CLI >= 0.11 should user --headers + headers = "firstrow" + if opt in ["-H", "--headers"]: + if value in special_headers_values: + headers = value + else: + headers = value # may need to be processed + elif opt in ["-o", "--output"]: + outfile = value + elif opt in ["-F", "--float"]: + floatfmt = value + elif opt in ["-I", "--int"]: + intfmt = value + elif opt in ["-C", "--colalign"]: + colalign = value.split() + elif opt in ["-r", "--read"]: + fileformat = value.lower() + elif opt in ["-f", "--format"]: + if value not in tabulate_formats: + print(f"{value} is not a supported output format", file=sys.stderr) + print(usage) + sys.exit(3) + tablefmt = value + elif opt in ["-s", "--sep"]: + sep = value + elif opt in ["-h", "--help"]: + print(usage) + sys.exit(0) + # choose a reader and parse headers option + if fileformat == "rsv": + reader = partial(_read_rsv_file, sep=sep) + if type(headers) is str and headers not in special_headers_values: + # parse as CSV values + headers = headers.split(",") + elif fileformat == "csv": + reader = _read_csv_file + if type(headers) is str and headers not in special_headers_values: + # parse as CSV values + headers = headers.split(",") + elif fileformat == "jsonl": + reader = _read_jsonl_file + if not headers: + headers = "keys" # reasonable default + if type(headers) is str and headers not in special_headers_values: + # "," and ":" in header titles are not supported in CLI + try: + headers2 = dict(tuple(hh.split(":", 2)) for hh in headers.split(",")) + except Exception: + print(f"cannot parse headers parameter: {headers}", file=sys.stderr) + headers2 = [] + headers = headers2 + else: + print(f"{fileformat} is not a supported file format") + sys.exit(3) + # format all input files + files = [sys.stdin] if not args else args + with sys.stdout if outfile == "-" else open(outfile, "w") as out: + for f in files: + if f == "-": + f = sys.stdin + _open_and_pprint_file( + reader, + f, + headers=headers, + tablefmt=tablefmt, + floatfmt=floatfmt, + intfmt=intfmt, + file=out, + colalign=colalign, + ) + + +def _read_rsv_file(fobject, sep): + rows = fobject.readlines() + table = [re.split(sep, r.rstrip()) for r in rows if r.strip()] + return table + + +def _read_jsonl_file(fobject): + import json + + rows: list[str] = fobject.readlines() + table = [json.loads(row) for row in rows] + return table + + +def _read_csv_file(fobject): + import csv + + reader = csv.reader(fobject, dialect="excel") + table = [list(row) for row in reader] + return table + + +def _open_and_pprint_file(reader, f, *args, **kwargs): + if _is_file(f): + _pprint_file(reader, f, *args, **kwargs) + else: + with open(f) as fobj: + _pprint_file(reader, fobj, *args, **kwargs) + + +def _pprint_file(reader, fobject, headers, tablefmt, floatfmt, intfmt, file, colalign): + table = reader(fobject) + print( + tabulate( + table, + headers, + tablefmt, + floatfmt=floatfmt, + intfmt=intfmt, + colalign=colalign, + ), + file=file, + ) + + +if __name__ == "__main__": # pragma: no cover + _main() diff --git a/test/common.py b/test/common.py index ec2fb351..fc994003 100644 --- a/test/common.py +++ b/test/common.py @@ -1,19 +1,19 @@ -import pytest # noqa -from pytest import skip, raises # noqa import warnings +import pytest # noqa: F401 +from pytest import raises, skip # noqa: F401 + def assert_equal(expected, result): - print("Expected:\n%r\n" % expected) - print("Got:\n%r\n" % result) + print(f"Expected:\n{expected!r}\n") + print(f"Got:\n{result!r}\n") assert expected == result def assert_in(result, expected_set): - nums = range(1, len(expected_set) + 1) - for i, expected in zip(nums, expected_set): - print("Expected %d:\n%s\n" % (i, expected)) - print("Got:\n%s\n" % result) + for i, expected in enumerate(expected_set, start=1): + print(f"Expected {i}:\n{expected}\n") + print(f"Got:\n{result}\n") assert result in expected_set @@ -40,6 +40,6 @@ def check_warnings(func_args_kwargs, *, num=None, category=None, contain=None): if num is not None: assert len(W) == num if category is not None: - assert all([issubclass(w.category, category) for w in W]) + assert all(issubclass(w.category, category) for w in W) if contain is not None: - assert all([contain in str(w.message) for w in W]) + assert all(contain in str(w.message) for w in W) diff --git a/test/test_api.py b/test/test_api.py index 062573c7..cb59836e 100644 --- a/test/test_api.py +++ b/test/test_api.py @@ -1,32 +1,30 @@ -"""API properties. +"""API properties.""" -""" +from tabulate import simple_separated_format, tabulate, tabulate_formats -from tabulate import tabulate, tabulate_formats, simple_separated_format from common import skip - try: - from inspect import signature, _empty + from inspect import _empty, signature except ImportError: signature = None _empty = None def test_tabulate_formats(): - "API: tabulate_formats is a list of strings" "" + "API: tabulate_formats is a list of strings" supported = tabulate_formats - print("tabulate_formats = %r" % supported) + print(f"tabulate_formats = {supported!r}") assert type(supported) is list for fmt in supported: - assert type(fmt) is str # noqa + assert type(fmt) is str def _check_signature(function, expected_sig): if not signature: skip("") actual_sig = signature(function) - print(f"expected: {expected_sig}\nactual: {str(actual_sig)}\n") + print(f"expected: {expected_sig}\nactual: {actual_sig}\n") assert len(actual_sig.parameters) == len(expected_sig) @@ -35,8 +33,8 @@ def _check_signature(function, expected_sig): def test_tabulate_signature(): - "API: tabulate() type signature is unchanged" "" - assert type(tabulate) is type(lambda: None) # noqa + "API: tabulate() type signature is unchanged" + assert type(tabulate) is type(lambda: None) expected_sig = [ ("tabular_data", _empty), ("headers", ()), @@ -56,12 +54,14 @@ def test_tabulate_signature(): ("headersalign", None), ("rowalign", None), ("maxheadercolwidths", None), + ("break_long_words", True), + ("break_on_hyphens", True), ] _check_signature(tabulate, expected_sig) def test_simple_separated_format_signature(): - "API: simple_separated_format() type signature is unchanged" "" - assert type(simple_separated_format) is type(lambda: None) # noqa + "API: simple_separated_format() type signature is unchanged" + assert type(simple_separated_format) is type(lambda: None) expected_sig = [("separator", _empty)] _check_signature(simple_separated_format, expected_sig) diff --git a/test/test_cli.py b/test/test_cli.py index e71572d3..154f1a2b 100644 --- a/test/test_cli.py +++ b/test/test_cli.py @@ -1,18 +1,38 @@ -"""Command-line interface. - -""" +"""Command-line interface.""" +import contextlib +import io import os -import sys - - import subprocess +import sys import tempfile +from unittest.mock import patch +from tabulate.cli import _main from common import assert_equal +class _UnclosableStringIO(io.StringIO): + """StringIO that ignores close() so getvalue() works after a 'with' block.""" + + def close(self): + pass # _main does `with sys.stdout as out:`, which would close a plain StringIO + + +def run_main_in_process(args, input_text=None): + """Call _main() in-process, capturing stdout. Returns the captured output.""" + stdin = io.StringIO(input_text) if input_text is not None else sys.stdin + stdout = _UnclosableStringIO() + with ( + patch("sys.argv", ["tabulate"] + args), + patch("sys.stdin", stdin), + contextlib.redirect_stdout(stdout), + ): + _main() + return stdout.getvalue() + + SAMPLE_SIMPLE_FORMAT = "\n".join( [ "----- ------ -------------", @@ -99,9 +119,7 @@ def __init__(self): self.tmpfile = None def __enter__(self): - self.tmpfile = tempfile.NamedTemporaryFile( - "w+", prefix="tabulate-test-tmp-", delete=False - ) + self.tmpfile = tempfile.NamedTemporaryFile("w+", prefix="tabulate-test-tmp-", delete=False) return self.tmpfile def __exit__(self, exc_type, exc_val, exc_tb): @@ -112,7 +130,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): def test_script_from_stdin_to_stdout(): """Command line utility: read from stdin, print to stdout""" - cmd = [sys.executable, "tabulate/__init__.py"] + cmd = [sys.executable, "tabulate/cli.py"] out = run_and_capture_stdout(cmd, input=sample_input()) expected = SAMPLE_SIMPLE_FORMAT print("got: ", repr(out)) @@ -125,7 +143,7 @@ def test_script_from_file_to_stdout(): with TemporaryTextFile() as tmpfile: tmpfile.write(sample_input()) tmpfile.seek(0) - cmd = [sys.executable, "tabulate/__init__.py", tmpfile.name] + cmd = [sys.executable, "tabulate/cli.py", tmpfile.name] out = run_and_capture_stdout(cmd) expected = SAMPLE_SIMPLE_FORMAT print("got: ", repr(out)) @@ -141,7 +159,7 @@ def test_script_from_file_to_file(): input_file.seek(0) cmd = [ sys.executable, - "tabulate/__init__.py", + "tabulate/cli.py", "-o", output_file.name, input_file.name, @@ -164,7 +182,7 @@ def test_script_from_file_to_file(): def test_script_header_option(): """Command line utility: -1, --header option""" for option in ["-1", "--header"]: - cmd = [sys.executable, "tabulate/__init__.py", option] + cmd = [sys.executable, "tabulate/cli.py", option] raw_table = sample_input(with_headers=True) out = run_and_capture_stdout(cmd, input=raw_table) expected = SAMPLE_SIMPLE_FORMAT_WITH_HEADERS @@ -177,7 +195,7 @@ def test_script_header_option(): def test_script_sep_option(): """Command line utility: -s, --sep option""" for option in ["-s", "--sep"]: - cmd = [sys.executable, "tabulate/__init__.py", option, ","] + cmd = [sys.executable, "tabulate/cli.py", option, ","] raw_table = sample_input(sep=",") out = run_and_capture_stdout(cmd, input=raw_table) expected = SAMPLE_SIMPLE_FORMAT @@ -191,7 +209,7 @@ def test_script_floatfmt_option(): for option in ["-F", "--float"]: cmd = [ sys.executable, - "tabulate/__init__.py", + "tabulate/cli.py", option, ".1e", "--format", @@ -208,7 +226,7 @@ def test_script_floatfmt_option(): def test_script_format_option(): """Command line utility: -f, --format option""" for option in ["-f", "--format"]: - cmd = [sys.executable, "tabulate/__init__.py", "-1", option, "grid"] + cmd = [sys.executable, "tabulate/cli.py", "-1", option, "grid"] raw_table = sample_input(with_headers=True) out = run_and_capture_stdout(cmd, input=raw_table) expected = SAMPLE_GRID_FORMAT_WITH_HEADERS @@ -216,3 +234,252 @@ def test_script_format_option(): print("got: ", repr(out)) print("expected:", repr(expected)) assert_equal(out.splitlines(), expected.splitlines()) + + +SAMPLE_INPUT_JSONL = "\n".join( + [ + '{"id": 1, "name": "Alice", "email": "alice@example.com"}', + '{"id": 2, "name": "Bob", "email": "bob@example.com"}', + ] +) + +SAMPLE_GRID_FORMAT = "\n".join( + [ + "+------+--------+-------------------+", + "| id | name | email |", + "+======+========+===================+", + "| 1 | Alice | alice@example.com |", + "+------+--------+-------------------+", + "| 2 | Bob | bob@example.com |", + "+------+--------+-------------------+", + ] +) + + +def test_module_jsonl_from_stdin(): + """Command line utility: python -m tabulate with JSONL input from stdin""" + cmd = [sys.executable, "-m", "tabulate", "-r", "jsonl", "-f", "grid"] + out = run_and_capture_stdout(cmd, input=SAMPLE_INPUT_JSONL) + expected = SAMPLE_GRID_FORMAT + print("got: ", repr(out)) + print("expected:", repr(expected)) + assert_equal(out.splitlines(), expected.splitlines()) + + +SAMPLE_REMAPPED_HEADERS = "\n".join( + [ + " ID First Name Email", + "---- ------------ -----------------", + " 1 Alice alice@example.com", + " 2 Bob bob@example.com", + ] +) + + +SAMPLE_INPUT_CSV = ( + 'id,name,email,"""favorite"" fruit"\n' + '1,Alice,alice@example.com,"apple, kiwi"\n' + '2,Bob,bob@example.com,"banana,\norange,\nlychee"\n' + "3,Carol,,pear\n" +) + +SAMPLE_CSV_FORMAT = "\n".join( + [ + "-- ----- ----------------- ----------------", + 'id name email "favorite" fruit', + "1 Alice alice@example.com apple, kiwi", + "2 Bob bob@example.com banana,", + " orange,", + " lychee", + "3 Carol pear", + "-- ----- ----------------- ----------------", + ] +) + + +def test_module_csv_from_stdin(): + """Command line utility: python -m tabulate with CSV input from stdin""" + cmd = [sys.executable, "-m", "tabulate", "-r", "csv"] + out = run_and_capture_stdout(cmd, input=SAMPLE_INPUT_CSV) + expected = SAMPLE_CSV_FORMAT + print("got: ", repr(out)) + print("expected:", repr(expected)) + assert_equal(out.splitlines(), expected.splitlines()) + + +def test_module_jsonl_remapped_headers(): + """Command line utility: --headers with key:header remapping for JSONL input""" + cmd = [ + sys.executable, + "-m", + "tabulate", + "-r", + "jsonl", + "--headers", + "id:ID,name:First Name,email:Email", + ] + out = run_and_capture_stdout(cmd, input=SAMPLE_INPUT_JSONL) + expected = SAMPLE_REMAPPED_HEADERS + print("got: ", repr(out)) + print("expected:", repr(expected)) + assert_equal(out.splitlines(), expected.splitlines()) + + +# --------------------------------------------------------------------------- +# In-process tests: same scenarios as above but calling _main() directly so +# that coverage.py can instrument the code in tabulate/cli.py. +# --------------------------------------------------------------------------- + + +def test_inprocess_stdin_to_stdout(): + """In-process: read RSV from stdin, print to stdout""" + out = run_main_in_process([], input_text=sample_input()) + assert_equal(out.splitlines(), SAMPLE_SIMPLE_FORMAT.splitlines()) + + +def test_inprocess_header_option(): + """In-process: -1 / --header / --headers firstrow""" + for args in [["-1"], ["--header"], ["--headers", "firstrow"]]: + out = run_main_in_process(args, input_text=sample_input(with_headers=True)) + assert_equal(out.splitlines(), SAMPLE_SIMPLE_FORMAT_WITH_HEADERS.splitlines()) + + +def test_inprocess_sep_option(): + """In-process: -s / --sep""" + for opt in ["-s", "--sep"]: + out = run_main_in_process([opt, ","], input_text=sample_input(sep=",")) + assert_equal(out.splitlines(), SAMPLE_SIMPLE_FORMAT.splitlines()) + + +def test_inprocess_floatfmt_option(): + """In-process: -F / --float""" + for opt in ["-F", "--float"]: + out = run_main_in_process([opt, ".1e", "--format", "grid"], input_text=sample_input()) + assert_equal(out.splitlines(), SAMPLE_GRID_FORMAT_WITH_DOT1E_FLOATS.splitlines()) + + +def test_inprocess_format_option(): + """In-process: -f / --format""" + for opt in ["-f", "--format"]: + out = run_main_in_process(["-1", opt, "grid"], input_text=sample_input(with_headers=True)) + assert_equal(out.splitlines(), SAMPLE_GRID_FORMAT_WITH_HEADERS.splitlines()) + + +def test_inprocess_file_to_file(): + """In-process: read from file, write to file (-o)""" + with TemporaryTextFile() as input_file: + with TemporaryTextFile() as output_file: + input_file.write(sample_input()) + input_file.flush() + run_main_in_process(["-o", output_file.name, input_file.name]) + output_file.seek(0) + out = output_file.file.read() + assert_equal(out.splitlines(), SAMPLE_SIMPLE_FORMAT.splitlines()) + + +def test_inprocess_jsonl_from_stdin(): + """In-process: JSONL input from stdin, grid format""" + out = run_main_in_process(["-r", "jsonl", "-f", "grid"], input_text=SAMPLE_INPUT_JSONL) + assert_equal(out.splitlines(), SAMPLE_GRID_FORMAT.splitlines()) + + +def test_inprocess_jsonl_remapped_headers(): + """In-process: JSONL input with key:header remapping""" + out = run_main_in_process( + ["-r", "jsonl", "--headers", "id:ID,name:First Name,email:Email"], + input_text=SAMPLE_INPUT_JSONL, + ) + assert_equal(out.splitlines(), SAMPLE_REMAPPED_HEADERS.splitlines()) + + +def test_inprocess_csv_from_stdin(): + """In-process: CSV input from stdin""" + out = run_main_in_process(["-r", "csv"], input_text=SAMPLE_INPUT_CSV) + assert_equal(out.splitlines(), SAMPLE_CSV_FORMAT.splitlines()) + + +def test_inprocess_invalid_option(): + """In-process: unrecognised option exits with code 2""" + import pytest + + with pytest.raises(SystemExit) as exc_info: + run_main_in_process(["--no-such-option"], input_text="a b\n1 2\n") + assert exc_info.value.code == 2 + + +def test_inprocess_help_option(): + """In-process: --help / -h exits with code 0""" + import pytest + + for opt in ["-h", "--help"]: + with pytest.raises(SystemExit) as exc_info: + run_main_in_process([opt], input_text="") + assert exc_info.value.code == 0 + + +def test_inprocess_invalid_format(): + """In-process: unknown --format value exits with code 3""" + import pytest + + with pytest.raises(SystemExit) as exc_info: + run_main_in_process(["-f", "nosuchformat"], input_text="a b\n1 2\n") + assert exc_info.value.code == 3 + + +def test_inprocess_invalid_fileformat(): + """In-process: unknown --read value exits with code 3""" + import pytest + + with pytest.raises(SystemExit) as exc_info: + run_main_in_process(["-r", "xml"], input_text="") + assert exc_info.value.code == 3 + + +def test_inprocess_int_option(): + """In-process: -I / --int option""" + jsonl_ints = '{"n": 1000000}\n{"n": 2000000}\n' + for opt in ["-I", "--int"]: + out = run_main_in_process(["-r", "jsonl", opt, "_"], input_text=jsonl_ints) + assert "1_000_000" in out + + +def test_inprocess_colalign_option(): + """In-process: --colalign option""" + out = run_main_in_process( + ["--colalign", "left left left", "-1"], + input_text=sample_input(with_headers=True), + ) + assert "Planet" in out + + +def test_inprocess_rsv_custom_headers(): + """In-process: --headers with custom column names for RSV input""" + out = run_main_in_process(["--headers", "Planet,Radius,Mass"], input_text=sample_input()) + assert_equal(out.splitlines(), SAMPLE_SIMPLE_FORMAT_WITH_HEADERS.splitlines()) + + +def test_inprocess_csv_custom_headers(): + """In-process: --headers with custom column names overrides CSV first row""" + csv_data = "Sun,696000,1.9891e9\nEarth,6371,5973.6\n" + out = run_main_in_process( + ["-r", "csv", "--headers", "Planet,Radius,Mass"], input_text=csv_data + ) + assert "Planet" in out and "Radius" in out and "Mass" in out + + +def test_inprocess_stdin_dash_arg(): + """In-process: '-' as filename reads from stdin""" + out = run_main_in_process(["-"], input_text=sample_input()) + assert_equal(out.splitlines(), SAMPLE_SIMPLE_FORMAT.splitlines()) + + +def test_inprocess_jsonl_malformed_headers(): + """In-process: malformed key:header mapping falls back to no headers""" + # A header spec without ':' can't be parsed into key-value pairs; + # _main catches the ValueError and proceeds with an empty headers list. + out = run_main_in_process( + ["-r", "jsonl", "--headers", "no_colon_here"], + input_text=SAMPLE_INPUT_JSONL, + ) + # output should still be produced (graceful fallback), with raw keys as headers + assert out.strip() != "" diff --git a/test/test_grapheme_clusters.py b/test/test_grapheme_clusters.py new file mode 100644 index 00000000..ef6d4e7a --- /dev/null +++ b/test/test_grapheme_clusters.py @@ -0,0 +1,282 @@ +"""Tests for Unicode grapheme cluster handling in tabulate.""" + +from unittest import mock + +import pytest + +from tabulate import tabulate + +try: + import wcwidth + + HAS_WCWIDTH = True + HAS_WCWIDTH_030 = hasattr(wcwidth, "wrap") + HAS_WCWIDTH_WIDTH = hasattr(wcwidth, "width") +except ImportError: + wcwidth = None + HAS_WCWIDTH = False + HAS_WCWIDTH_030 = False + HAS_WCWIDTH_WIDTH = False + +requires_wcwidth = pytest.mark.skipif(not HAS_WCWIDTH, reason="requires wcwidth") + +requires_wcwidth_030 = pytest.mark.skipif(not HAS_WCWIDTH_030, reason="requires wcwidth >= 0.3.0") + +requires_wcwidth_width = pytest.mark.skipif( + not HAS_WCWIDTH_WIDTH, reason="requires wcwidth with width() API" +) + + +class TestGraphemeClusterWidth: + """Tests for correct width calculation of grapheme clusters.""" + + @requires_wcwidth + def test_zwj_family_emoji_width(self): + """ZWJ family emoji has display width 2.""" + family = "\U0001f468\u200d\U0001f469\u200d\U0001f467" + assert wcwidth.wcswidth(family) == 2 + + @requires_wcwidth + def test_regional_indicator_flag_width(self): + """Regional indicator pair (flag) has display width 2.""" + us_flag = "\U0001f1fa\U0001f1f8" + assert wcwidth.wcswidth(us_flag) == 2 + + @requires_wcwidth + def test_vs16_emoji_width(self): + """VS16 variation selector creates wide emoji.""" + heart = "\u2764\ufe0f" + assert wcwidth.wcswidth(heart) == 2 + + +class TestGraphemeClusterAlignment: + """Tests for correct alignment of cells containing grapheme clusters.""" + + @requires_wcwidth + def test_zwj_alignment_in_grid(self): + """ZWJ emoji aligns correctly in grid format.""" + family = "\U0001f468\u200d\U0001f469\u200d\U0001f467" + data = [ + ["ABC", "text"], + [family, "emoji"], + ] + result = tabulate(data, headers=["col", "desc"], tablefmt="grid") + lines = result.split("\n") + + border_width = len(lines[0]) + for line in lines: + from tabulate import _visible_width + + assert _visible_width(line) == border_width + + @requires_wcwidth + def test_flag_alignment_in_grid(self): + """Regional indicator flags align correctly in grid format.""" + us_flag = "\U0001f1fa\U0001f1f8" + data = [ + ["AB", "text"], + [us_flag, "flag"], + ] + result = tabulate(data, headers=["col", "desc"], tablefmt="grid") + lines = result.split("\n") + + border_width = len(lines[0]) + for line in lines: + from tabulate import _visible_width + + assert _visible_width(line) == border_width + + +class TestGraphemeClusterWrapping: + """Tests for grapheme cluster preservation during text wrapping. + + These tests require wcwidth >= 0.3.0 for iter_graphemes and wrap() APIs. + """ + + @requires_wcwidth_030 + def test_zwj_not_broken_during_wrap(self): + """ZWJ sequence preserved as single unit during wrap.""" + family = "\U0001f468\u200d\U0001f469\u200d\U0001f467" + data = [[f"A{family}B"]] + result = tabulate(data, tablefmt="plain", maxcolwidths=3) + + graphemes_in_result = [] + for line in result.split("\n"): + graphemes_in_result.extend(list(wcwidth.iter_graphemes(line.strip()))) + + assert family in graphemes_in_result + + @requires_wcwidth_030 + def test_flag_not_broken_during_wrap(self): + """Regional indicator flag preserved as single unit during wrap.""" + us_flag = "\U0001f1fa\U0001f1f8" + gb_flag = "\U0001f1ec\U0001f1e7" + fr_flag = "\U0001f1eb\U0001f1f7" + flags = us_flag + gb_flag + fr_flag + + data = [[flags]] + result = tabulate(data, tablefmt="plain", maxcolwidths=5) + + graphemes_in_result = [] + for line in result.split("\n"): + graphemes_in_result.extend(list(wcwidth.iter_graphemes(line.strip()))) + + assert us_flag in graphemes_in_result + assert gb_flag in graphemes_in_result + assert fr_flag in graphemes_in_result + + @requires_wcwidth_030 + def test_vs16_not_broken_during_wrap(self): + """VS16 variation selector kept with base character during wrap.""" + heart = "\u2764\ufe0f" + data = [[heart * 3]] + result = tabulate(data, tablefmt="plain", maxcolwidths=4) + + graphemes_in_result = [] + for line in result.split("\n"): + graphemes_in_result.extend(list(wcwidth.iter_graphemes(line.strip()))) + + heart_count = sum(1 for g in graphemes_in_result if g == heart) + assert heart_count == 3 + + @requires_wcwidth_030 + def test_skin_tone_modifier_not_broken(self): + """Skin tone modifier preserved with emoji during wrap.""" + wave_light = "\U0001f44b\U0001f3fb" + data = [[f"Hi{wave_light}there"]] + result = tabulate(data, tablefmt="plain", maxcolwidths=5) + + graphemes_in_result = [] + for line in result.split("\n"): + graphemes_in_result.extend(list(wcwidth.iter_graphemes(line.strip()))) + + assert wave_light in graphemes_in_result + + +class TestComplexGraphemeClusters: + """Tests for complex grapheme cluster scenarios. + + These tests require wcwidth >= 0.3.0 for iter_graphemes API. + """ + + @requires_wcwidth_030 + def test_multiple_zwj_sequences_in_cell(self): + """Multiple ZWJ sequences in single cell handled correctly.""" + family = "\U0001f468\u200d\U0001f469\u200d\U0001f467" + technologist = "\U0001f468\U0001f3fb\u200d\U0001f4bb" + data = [[f"{family} and {technologist}"]] + result = tabulate(data, tablefmt="plain", maxcolwidths=15) + + graphemes_in_result = [] + for line in result.split("\n"): + graphemes_in_result.extend(list(wcwidth.iter_graphemes(line.strip()))) + + assert family in graphemes_in_result + assert technologist in graphemes_in_result + + @requires_wcwidth_030 + def test_flags_with_text_wrap(self): + """Flags interspersed with text wrap correctly.""" + us_flag = "\U0001f1fa\U0001f1f8" + data = [[f"Visit {us_flag} USA today!"]] + result = tabulate(data, tablefmt="plain", maxcolwidths=10) + + graphemes_in_result = [] + for line in result.split("\n"): + graphemes_in_result.extend(list(wcwidth.iter_graphemes(line.strip()))) + + assert us_flag in graphemes_in_result + + @requires_wcwidth_030 + def test_combining_marks_preserved(self): + """Combining diacritical marks stay with base character.""" + e_acute = "e\u0301" + data = [[f"caf{e_acute} au lait"]] + result = tabulate(data, tablefmt="plain", maxcolwidths=5) + + graphemes_in_result = [] + for line in result.split("\n"): + graphemes_in_result.extend(list(wcwidth.iter_graphemes(line.strip()))) + + assert e_acute in graphemes_in_result + + +class TestAnsiWithGraphemeClusters: + """Tests for ANSI escape codes combined with grapheme clusters.""" + + @requires_wcwidth + def test_ansi_colored_zwj_width(self): + """ANSI colored ZWJ emoji has correct width.""" + family = "\U0001f468\u200d\U0001f469\u200d\U0001f467" + colored = f"\x1b[31m{family}\x1b[0m" + + from tabulate import _visible_width + + assert _visible_width(colored) == 2 + + @requires_wcwidth + def test_ansi_colored_zwj_alignment(self): + """ANSI colored ZWJ emoji aligns correctly.""" + family = "\U0001f468\u200d\U0001f469\u200d\U0001f467" + colored = f"\x1b[31m{family}\x1b[0m" + data = [ + ["AB", "text"], + [colored, "emoji"], + ] + result = tabulate(data, headers=["col", "desc"], tablefmt="grid") + lines = result.split("\n") + + from tabulate import _visible_width + + border_width = _visible_width(lines[0]) + for line in lines: + assert _visible_width(line) == border_width + + @requires_wcwidth_030 + def test_ansi_colored_flag_wrap(self): + """ANSI colored flag not broken during wrap.""" + us_flag = "\U0001f1fa\U0001f1f8" + colored = f"\x1b[34m{us_flag}\x1b[0m" + data = [[f"A{colored}B"]] + result = tabulate(data, tablefmt="plain", maxcolwidths=4) + + assert "\U0001f1fa" in result + assert "\U0001f1f8" in result + lines = [line.strip() for line in result.split("\n") if line.strip()] + flag_parts_same_line = any("\U0001f1fa" in line and "\U0001f1f8" in line for line in lines) + assert flag_parts_same_line + + +class TestVisibleWidthFallback: + """Tests for _visible_width wcwidth version compatibility. + + Covers both the modern wcwidth.width() path (>= 0.3.0) and the legacy + wcswidth() path used when width() is not available. + """ + + @requires_wcwidth_width + def test_visible_width_new_api_strips_ansi(self): + """_visible_width returns correct width via wcwidth.width() with ANSI codes.""" + from tabulate import _visible_width + + # Two Korean chars (each 2 cols wide) wrapped in ANSI color codes. + # wcwidth.width() handles ANSI internally, so no explicit stripping needed. + colored_wide = "\x1b[31m한글\x1b[0m" + assert _visible_width(colored_wide) == 4 + + @requires_wcwidth + def test_visible_width_legacy_api_strips_ansi(self): + """_visible_width strips ANSI before wcswidth() when width() is unavailable.""" + import tabulate as tabulate_module + from tabulate import _visible_width + + # Build a mock wcwidth that exposes only wcswidth(), not width(). + # spec= limits auto-created attributes, so hasattr(mock, "width") is False. + legacy_wcwidth = mock.MagicMock(spec=["wcswidth"]) + legacy_wcwidth.wcswidth.side_effect = wcwidth.wcswidth + + colored_wide = "\x1b[31m한글\x1b[0m" + with mock.patch.object(tabulate_module, "wcwidth", legacy_wcwidth): + result = _visible_width(colored_wide) + + assert result == 4 diff --git a/test/test_input.py b/test/test_input.py index b910a346..3cc32374 100644 --- a/test/test_input.py +++ b/test/test_input.py @@ -1,6 +1,7 @@ """Test support of the various forms of tabular data.""" -from tabulate import tabulate +from tabulate import SEPARATING_LINE, tabulate + from common import assert_equal, assert_in, raises, skip try: @@ -12,17 +13,15 @@ def test_iterable_of_iterables(): "Input: an iterable of iterables." - ii = iter(map(lambda x: iter(x), [range(5), range(5, 0, -1)])) - expected = "\n".join( - ["- - - - -", "0 1 2 3 4", "5 4 3 2 1", "- - - - -"] - ) + ii = iter(map(iter, [range(5), range(5, 0, -1)])) + expected = "\n".join(["- - - - -", "0 1 2 3 4", "5 4 3 2 1", "- - - - -"]) result = tabulate(ii) assert_equal(expected, result) def test_iterable_of_iterables_headers(): "Input: an iterable of iterables with headers." - ii = iter(map(lambda x: iter(x), [range(5), range(5, 0, -1)])) + ii = iter(map(iter, [range(5), range(5, 0, -1)])) expected = "\n".join( [ " a b c d e", @@ -37,7 +36,7 @@ def test_iterable_of_iterables_headers(): def test_iterable_of_iterables_firstrow(): "Input: an iterable of iterables with the first row as headers" - ii = iter(map(lambda x: iter(x), ["abcde", range(5), range(5, 0, -1)])) + ii = iter(map(iter, ["abcde", range(5), range(5, 0, -1)])) expected = "\n".join( [ " a b c d e", @@ -83,9 +82,7 @@ def test_list_of_lists_firstrow(): def test_list_of_lists_keys(): "Input: a list of lists with column indices as headers." ll = [["a", "one", 1], ["b", "two", None]] - expected = "\n".join( - ["0 1 2", "--- --- ---", "a one 1", "b two"] - ) + expected = "\n".join(["0 1 2", "--- --- ---", "a one 1", "b two"]) result = tabulate(ll, headers="keys") assert_equal(expected, result) @@ -96,14 +93,10 @@ def test_dict_like(): dd = {"a": range(3), "b": range(101, 105)} # keys' order (hence columns' order) is not deterministic in Python 3 # => we have to consider both possible results as valid - expected1 = "\n".join( - [" a b", "--- ---", " 0 101", " 1 102", " 2 103", " 104"] - ) - expected2 = "\n".join( - [" b a", "--- ---", "101 0", "102 1", "103 2", "104"] - ) + expected1 = "\n".join([" a b", "--- ---", " 0 101", " 1 102", " 2 103", " 104"]) + expected2 = "\n".join([" b a", "--- ---", "101 0", "102 1", "103 2", "104"]) result = tabulate(dd, "keys") - print("Keys' order: %s" % dd.keys()) + print(f"Keys' order: {dd.keys()}") assert_in(result, [expected1, expected2]) @@ -173,7 +166,7 @@ def test_numpy_record_array(): [("Alice", 23, 169.5), ("Bob", 27, 175.0)], dtype={ "names": ["name", "age", "height"], - "formats": ["a32", "uint8", "float32"], + "formats": ["S32", "uint8", "float32"], }, ) expected = "\n".join( @@ -199,7 +192,7 @@ def test_numpy_record_array_keys(): [("Alice", 23, 169.5), ("Bob", 27, 175.0)], dtype={ "names": ["name", "age", "height"], - "formats": ["a32", "uint8", "float32"], + "formats": ["S32", "uint8", "float32"], }, ) expected = "\n".join( @@ -225,7 +218,7 @@ def test_numpy_record_array_headers(): [("Alice", 23, 169.5), ("Bob", 27, 175.0)], dtype={ "names": ["name", "age", "height"], - "formats": ["a32", "uint8", "float32"], + "formats": ["S32", "uint8", "float32"], }, ) expected = "\n".join( @@ -270,9 +263,7 @@ def test_pandas_firstrow(): df = pandas.DataFrame( [["one", 1], ["two", None]], columns=["string", "number"], index=["a", "b"] ) - expected = "\n".join( - ["a one 1.0", "--- ----- -----", "b two nan"] - ) + expected = "\n".join(["a one 1.0", "--- ----- -----", "b two nan"]) result = tabulate(df, headers="firstrow") assert_equal(expected, result) except ImportError: @@ -313,6 +304,7 @@ def test_sqlite3(): cursor.execute("INSERT INTO people VALUES (?, ?, ?)", values) cursor.execute("SELECT name, age, height FROM people ORDER BY name") result = tabulate(cursor, headers=["whom", "how old", "how tall"]) + conn.close() expected = """\ whom how old how tall ------ --------- ---------- @@ -337,6 +329,7 @@ def test_sqlite3_keys(): 'SELECT name "whom", age "how old", height "how tall" FROM people ORDER BY name' ) result = tabulate(cursor, headers="keys") + conn.close() expected = """\ whom how old how tall ------ --------- ---------- @@ -364,9 +357,7 @@ def test_list_of_namedtuples_keys(): NT = namedtuple("NT", ["foo", "bar"]) lt = [NT(1, 2), NT(3, 4)] - expected = "\n".join( - [" foo bar", "----- -----", " 1 2", " 3 4"] - ) + expected = "\n".join([" foo bar", "----- -----", " 1 2", " 3 4"]) result = tabulate(lt, headers="keys") assert_equal(expected, result) @@ -392,12 +383,8 @@ def test_list_of_userdicts(): def test_list_of_dicts_keys(): "Input: a list of dictionaries, with keys as headers." lod = [{"foo": 1, "bar": 2}, {"foo": 3, "bar": 4}] - expected1 = "\n".join( - [" foo bar", "----- -----", " 1 2", " 3 4"] - ) - expected2 = "\n".join( - [" bar foo", "----- -----", " 2 1", " 4 3"] - ) + expected1 = "\n".join([" foo bar", "----- -----", " 1 2", " 3 4"]) + expected2 = "\n".join([" bar foo", "----- -----", " 2 1", " 4 3"]) result = tabulate(lod, headers="keys") assert_in(result, [expected1, expected2]) @@ -405,12 +392,8 @@ def test_list_of_dicts_keys(): def test_list_of_userdicts_keys(): "Input: a list of UserDicts." lod = [UserDict(foo=1, bar=2), UserDict(foo=3, bar=4)] - expected1 = "\n".join( - [" foo bar", "----- -----", " 1 2", " 3 4"] - ) - expected2 = "\n".join( - [" bar foo", "----- -----", " 2 1", " 4 3"] - ) + expected1 = "\n".join([" foo bar", "----- -----", " 1 2", " 3 4"]) + expected2 = "\n".join([" bar foo", "----- -----", " 2 1", " 4 3"]) result = tabulate(lod, headers="keys") assert_in(result, [expected1, expected2]) @@ -435,12 +418,8 @@ def test_list_of_dicts_firstrow(): "Input: a list of dictionaries, with the first dict as headers." lod = [{"foo": "FOO", "bar": "BAR"}, {"foo": 3, "bar": 4, "baz": 5}] # if some key is missing in the first dict, use the key name instead - expected1 = "\n".join( - [" FOO BAR baz", "----- ----- -----", " 3 4 5"] - ) - expected2 = "\n".join( - [" BAR FOO baz", "----- ----- -----", " 4 3 5"] - ) + expected1 = "\n".join([" FOO BAR baz", "----- ----- -----", " 3 4 5"]) + expected2 = "\n".join([" BAR FOO baz", "----- ----- -----", " 4 3 5"]) result = tabulate(lod, headers="firstrow") assert_in(result, [expected1, expected2]) @@ -449,12 +428,8 @@ def test_list_of_dicts_with_dict_of_headers(): "Input: a dict of user headers for a list of dicts (issue #23)" table = [{"letters": "ABCDE", "digits": 12345}] headers = {"digits": "DIGITS", "letters": "LETTERS"} - expected1 = "\n".join( - [" DIGITS LETTERS", "-------- ---------", " 12345 ABCDE"] - ) - expected2 = "\n".join( - ["LETTERS DIGITS", "--------- --------", "ABCDE 12345"] - ) + expected1 = "\n".join([" DIGITS LETTERS", "-------- ---------", " 12345 ABCDE"]) + expected2 = "\n".join(["LETTERS DIGITS", "--------- --------", "ABCDE 12345"]) result = tabulate(table, headers=headers) assert_in(result, [expected1, expected2]) @@ -520,6 +495,28 @@ def test_py37orlater_list_of_dataclasses_headers(): skip("test_py37orlater_list_of_dataclasses_headers is skipped") +def test_py37orlater_list_of_dataclasses_with_separating_line(): + "Input: a list of dataclasses with a separating line" + try: + from dataclasses import make_dataclass + + Person = make_dataclass("Person", ["name", "age", "height"]) + ld = [Person("Alice", 23, 169.5), SEPARATING_LINE, Person("Bob", 27, 175.0)] + result = tabulate(ld, headers="keys") + expected = "\n".join( + [ + "name age height", + "------ ----- --------", + "Alice 23 169.5", + "------ ----- --------", + "Bob 27 175", + ] + ) + assert_equal(expected, result) + except ImportError: + skip("test_py37orlater_list_of_dataclasses_keys is skipped") + + def test_list_bytes(): "Input: a list of bytes. (issue #192)" lb = [["你好".encode()], ["你好"]] diff --git a/test/test_internal.py b/test/test_internal.py index e7564d37..49ae0ba6 100644 --- a/test/test_internal.py +++ b/test/test_internal.py @@ -2,7 +2,7 @@ import tabulate as T -from common import assert_equal, skip, rows_to_pipe_table_str, cols_to_pipe_str +from common import assert_equal, cols_to_pipe_str, rows_to_pipe_table_str, skip def test_multiline_width(): @@ -176,7 +176,7 @@ def test_wrap_text_to_colwidths(): def test_wrap_text_wide_chars(): "Internal: Wrap wide characters based on column width" try: - import wcwidth # noqa + import wcwidth # noqa: F401 except ImportError: skip("test_wrap_text_wide_chars is skipped") @@ -240,11 +240,18 @@ def test_wrap_text_to_colwidths_single_ansi_colors_full_cell(): def test_wrap_text_to_colwidths_colors_wide_char(): """Internal: autowrapped text can retain a ANSI colors with wide chars""" try: - import wcwidth # noqa + import wcwidth # noqa: F401 except ImportError: skip("test_wrap_text_to_colwidths_colors_wide_char is skipped") - data = [[("\033[31m약간 감싸면 더 잘 보일 수있는 다소 긴" " 설명입니다 설명입니다 설명입니다 설명입니다 설명\033[0m")]] + data = [ + [ + ( + "\033[31m약간 감싸면 더 잘 보일 수있는 다소 긴" + " 설명입니다 설명입니다 설명입니다 설명입니다 설명\033[0m" + ) + ] + ] result = T._wrap_text_to_colwidths(data, [30]) expected = [ diff --git a/test/test_output.py b/test/test_output.py index e3d369ae..ea3da87f 100644 --- a/test/test_output.py +++ b/test/test_output.py @@ -1,9 +1,12 @@ """Test output of the various forms of tabular data.""" +from decimal import Decimal + from pytest import mark -from common import assert_equal, raises, skip, check_warnings -from tabulate import tabulate, simple_separated_format, SEPARATING_LINE +from tabulate import SEPARATING_LINE, simple_separated_format, tabulate + +from common import assert_equal, check_warnings, raises, skip # _test_table shows # - coercion of a string to a number, @@ -16,9 +19,7 @@ def test_plain(): "Output: plain with headers" - expected = "\n".join( - ["strings numbers", "spam 41.9999", "eggs 451"] - ) + expected = "\n".join(["strings numbers", "spam 41.9999", "eggs 451"]) result = tabulate(_test_table, _test_table_headers, tablefmt="plain") assert_equal(expected, result) @@ -94,9 +95,7 @@ def test_plain_multiline_with_empty_cells(): def test_plain_multiline_with_empty_cells_headerless(): "Output: plain with multiline cells and empty cells without headers" table = [["0", "", ""], ["1", "", ""], ["2", "very long data", "fold\nthis"]] - expected = "\n".join( - ["0", "1", "2 very long data fold", " this"] - ) + expected = "\n".join(["0", "1", "2 very long data fold", " this"]) result = tabulate(table, tablefmt="plain") assert_equal(expected, result) @@ -105,9 +104,7 @@ def test_plain_maxcolwidth_autowraps(): "Output: maxcolwidth will result in autowrapping longer cells" table = [["hdr", "fold"], ["1", "very long data"]] expected = "\n".join([" hdr fold", " 1 very long", " data"]) - result = tabulate( - table, headers="firstrow", tablefmt="plain", maxcolwidths=[10, 10] - ) + result = tabulate(table, headers="firstrow", tablefmt="plain", maxcolwidths=[10, 10]) assert_equal(expected, result) @@ -122,16 +119,14 @@ def test_plain_maxcolwidth_autowraps_with_sep(): expected = "\n".join( [" hdr fold", " 1 very long", " data", "", " 2 last line"] ) - result = tabulate( - table, headers="firstrow", tablefmt="plain", maxcolwidths=[10, 10] - ) + result = tabulate(table, headers="firstrow", tablefmt="plain", maxcolwidths=[10, 10]) assert_equal(expected, result) def test_plain_maxcolwidth_autowraps_wide_chars(): "Output: maxcolwidth and autowrapping functions with wide characters" try: - import wcwidth # noqa + import wcwidth # noqa: F401 except ImportError: skip("test_wrap_text_wide_chars is skipped") @@ -150,9 +145,7 @@ def test_plain_maxcolwidth_autowraps_wide_chars(): " 설명입니다 설명입니다 설명", ] ) - result = tabulate( - table, headers="firstrow", tablefmt="plain", maxcolwidths=[10, 30] - ) + result = tabulate(table, headers="firstrow", tablefmt="plain", maxcolwidths=[10, 30]) assert_equal(expected, result) @@ -189,9 +182,7 @@ def test_maxcolwidth_pad_tailing_widths(): " short", ] ) - result = tabulate( - table, headers="firstrow", tablefmt="plain", maxcolwidths=[None, 6] - ) + result = tabulate(table, headers="firstrow", tablefmt="plain", maxcolwidths=[None, 6]) assert_equal(expected, result) @@ -354,9 +345,7 @@ def test_orgtbl_multiline_2_with_sep_line(): def test_simple_headerless(): "Output: simple without headers" - expected = "\n".join( - ["---- --------", "spam 41.9999", "eggs 451", "---- --------"] - ) + expected = "\n".join(["---- --------", "spam 41.9999", "eggs 451", "---- --------"]) result = tabulate(_test_table, tablefmt="simple") assert_equal(expected, result) @@ -380,9 +369,9 @@ def test_simple_headerless_with_sep_line_with_padding_in_tablefmt(): "Output: simple without headers with sep line with padding in tablefmt" expected = "\n".join( [ - "|------|----------|", + "|:-----|---------:|", "| spam | 41.9999 |", - "|------|----------|", + "|:-----|---------:|", "| eggs | 451 |", ] ) @@ -500,7 +489,7 @@ def test_github(): expected = "\n".join( [ "| strings | numbers |", - "|-----------|-----------|", + "|:----------|----------:|", "| spam | 41.9999 |", "| eggs | 451 |", ] @@ -509,6 +498,62 @@ def test_github(): assert_equal(expected, result) +def test_github_multiline(): + "Output: github with multiline cells with headers" + table = [[2, "foo\nbar"]] + headers = ("more\nspam eggs", "more spam\n& eggs") + expected = "\n".join( + [ + "| more | more spam |", + "| spam eggs | & eggs |", + "|------------:|:------------|", + "| 2 | foo |", + "| | bar |", + ] + ) + result = tabulate(table, headers, tablefmt="github") + assert_equal(expected, result) + + +def test_github_with_colalign(): + "Output: github with explicit column alignment" + expected = "\n".join( + [ + "| Name | Age |", + "|:-------|------:|", + "| Alice | 24 |", + "| Bob | 19 |", + ] + ) + result = tabulate( + [["Alice", 24], ["Bob", 19]], + ["Name", "Age"], + tablefmt="github", + colalign=("left", "right"), + ) + assert_equal(expected, result) + + +def test_github_no_alignment(): + "Output: github without alignment hints when numalign/stralign are disabled" + expected = "\n".join( + [ + "| strings | numbers |", + "|-----------|-----------|", + "| spam | 41.9999 |", + "| eggs | 451 |", + ] + ) + result = tabulate( + _test_table, + _test_table_headers, + tablefmt="github", + numalign=None, + stralign=None, + ) + assert_equal(expected, result) + + def test_grid(): "Output: grid with headers" expected = "\n".join( @@ -529,7 +574,7 @@ def test_grid(): def test_grid_wide_characters(): "Output: grid with wide characters in headers" try: - import wcwidth # noqa + import wcwidth # noqa: F401 except ImportError: skip("test_grid_wide_characters is skipped") headers = list(_test_table_headers) @@ -664,7 +709,7 @@ def test_simple_grid(): def test_simple_grid_wide_characters(): "Output: simple_grid with wide characters in headers" try: - import wcwidth # noqa + import wcwidth # noqa: F401 except ImportError: skip("test_simple_grid_wide_characters is skipped") headers = list(_test_table_headers) @@ -799,7 +844,7 @@ def test_rounded_grid(): def test_rounded_grid_wide_characters(): "Output: rounded_grid with wide characters in headers" try: - import wcwidth # noqa + import wcwidth # noqa: F401 except ImportError: skip("test_rounded_grid_wide_characters is skipped") headers = list(_test_table_headers) @@ -934,7 +979,7 @@ def test_heavy_grid(): def test_heavy_grid_wide_characters(): "Output: heavy_grid with wide characters in headers" try: - import wcwidth # noqa + import wcwidth # noqa: F401 except ImportError: skip("test_heavy_grid_wide_characters is skipped") headers = list(_test_table_headers) @@ -1069,7 +1114,7 @@ def test_mixed_grid(): def test_mixed_grid_wide_characters(): "Output: mixed_grid with wide characters in headers" try: - import wcwidth # noqa + import wcwidth # noqa: F401 except ImportError: skip("test_mixed_grid_wide_characters is skipped") headers = list(_test_table_headers) @@ -1204,7 +1249,7 @@ def test_double_grid(): def test_double_grid_wide_characters(): "Output: double_grid with wide characters in headers" try: - import wcwidth # noqa + import wcwidth # noqa: F401 except ImportError: skip("test_double_grid_wide_characters is skipped") headers = list(_test_table_headers) @@ -1339,7 +1384,7 @@ def test_fancy_grid(): def test_fancy_grid_wide_characters(): "Output: fancy_grid with wide characters in headers" try: - import wcwidth # noqa + import wcwidth # noqa: F401 except ImportError: skip("test_fancy_grid_wide_characters is skipped") headers = list(_test_table_headers) @@ -1508,7 +1553,7 @@ def test_colon_grid(): def test_colon_grid_wide_characters(): "Output: colon_grid with wide chars in header" try: - import wcwidth # noqa + import wcwidth # noqa: F401 except ImportError: skip("test_colon_grid_wide_characters is skipped") headers = list(_test_table_headers) @@ -1524,9 +1569,7 @@ def test_colon_grid_wide_characters(): "+-----------+---------+", ] ) - result = tabulate( - _test_table, headers, tablefmt="colon_grid", colalign=["left", "right"] - ) + result = tabulate(_test_table, headers, tablefmt="colon_grid", colalign=["left", "right"]) assert_equal(expected, result) @@ -1602,7 +1645,7 @@ def test_outline(): def test_outline_wide_characters(): "Output: outline with wide characters in headers" try: - import wcwidth # noqa + import wcwidth # noqa: F401 except ImportError: skip("test_outline_wide_characters is skipped") headers = list(_test_table_headers) @@ -1654,7 +1697,7 @@ def test_simple_outline(): def test_simple_outline_wide_characters(): "Output: simple_outline with wide characters in headers" try: - import wcwidth # noqa + import wcwidth # noqa: F401 except ImportError: skip("test_simple_outline_wide_characters is skipped") headers = list(_test_table_headers) @@ -1706,7 +1749,7 @@ def test_rounded_outline(): def test_rounded_outline_wide_characters(): "Output: rounded_outline with wide characters in headers" try: - import wcwidth # noqa + import wcwidth # noqa: F401 except ImportError: skip("test_rounded_outline_wide_characters is skipped") headers = list(_test_table_headers) @@ -1758,7 +1801,7 @@ def test_heavy_outline(): def test_heavy_outline_wide_characters(): "Output: heavy_outline with wide characters in headers" try: - import wcwidth # noqa + import wcwidth # noqa: F401 except ImportError: skip("test_heavy_outline_wide_characters is skipped") headers = list(_test_table_headers) @@ -1810,7 +1853,7 @@ def test_mixed_outline(): def test_mixed_outline_wide_characters(): "Output: mixed_outline with wide characters in headers" try: - import wcwidth # noqa + import wcwidth # noqa: F401 except ImportError: skip("test_mixed_outline_wide_characters is skipped") headers = list(_test_table_headers) @@ -1862,7 +1905,7 @@ def test_double_outline(): def test_double_outline_wide_characters(): "Output: double_outline with wide characters in headers" try: - import wcwidth # noqa + import wcwidth # noqa: F401 except ImportError: skip("test_double_outline_wide_characters is skipped") headers = list(_test_table_headers) @@ -1914,7 +1957,7 @@ def test_fancy_outline(): def test_fancy_outline_wide_characters(): "Output: fancy_outline with wide characters in headers" try: - import wcwidth # noqa + import wcwidth # noqa: F401 except ImportError: skip("test_fancy_outline_wide_characters is skipped") headers = list(_test_table_headers) @@ -1963,9 +2006,7 @@ def test_pipe(): def test_pipe_headerless(): "Output: pipe without headers" - expected = "\n".join( - ["|:-----|---------:|", "| spam | 41.9999 |", "| eggs | 451 |"] - ) + expected = "\n".join(["|:-----|---------:|", "| spam | 41.9999 |", "| eggs | 451 |"]) result = tabulate(_test_table, tablefmt="pipe") assert_equal(expected, result) @@ -2084,11 +2125,11 @@ def test_asciidoc(): "Output: asciidoc with headers" expected = "\n".join( [ - '[cols="11<,11>",options="header"]', + '[cols="<11,>11",options="header"]', "|====", - "| strings | numbers ", - "| spam | 41.9999 ", - "| eggs | 451 ", + "| strings | numbers", + "| spam | 41.9999", + "| eggs | 451", "|====", ] ) @@ -2100,10 +2141,10 @@ def test_asciidoc_headerless(): "Output: asciidoc without headers" expected = "\n".join( [ - '[cols="6<,10>"]', + '[cols="<6,>10"]', "|====", - "| spam | 41.9999 ", - "| eggs | 451 ", + "| spam | 41.9999", + "| eggs | 451", "|====", ] ) @@ -2400,9 +2441,7 @@ def test_rst_with_empty_values_in_first_column(): def test_rst_headerless(): "Output: rst without headers" - expected = "\n".join( - ["==== ========", "spam 41.9999", "eggs 451", "==== ========"] - ) + expected = "\n".join(["==== ========", "spam 41.9999", "eggs 451", "==== ========"]) result = tabulate(_test_table, tablefmt="rst") assert_equal(expected, result) @@ -2524,34 +2563,21 @@ def test_moinmoin(): "Output: moinmoin with headers" expected = "\n".join( [ - "|| ''' strings ''' || ''' numbers ''' ||", - '|| spam || 41.9999 ||', - '|| eggs || 451 ||', + "|| ''' strings ''' || ''' numbers ''' ||", + '|| spam || 41.9999 ||', + '|| eggs || 451 ||', ] ) result = tabulate(_test_table, _test_table_headers, tablefmt="moinmoin") assert_equal(expected, result) -def test_youtrack(): - "Output: youtrack with headers" - expected = "\n".join( - [ - "|| strings || numbers ||", - "| spam | 41.9999 |", - "| eggs | 451 |", - ] - ) - result = tabulate(_test_table, _test_table_headers, tablefmt="youtrack") - assert_equal(expected, result) - - def test_moinmoin_headerless(): "Output: moinmoin without headers" expected = "\n".join( [ - '|| spam || 41.9999 ||', - '|| eggs || 451 ||', + '|| spam || 41.9999 ||', + '|| eggs || 451 ||', ] ) result = tabulate(_test_table, tablefmt="moinmoin") @@ -2573,11 +2599,11 @@ def test_html(): [ "", "", - '', # noqa + '', "", "", - '', - '', + '', + '', "", "
<strings> <&numbers&>
<strings> <&numbers&>
spam > 41.9999
eggs & 451
spam > 41.9999
eggs & 451
", ] @@ -2594,11 +2620,11 @@ def test_unsafehtml(): [ "", "", - "", # noqa + '', "", "", - '', - '', + '', + '', "", "
strings numbers
strings numbers
spam 41.9999
eggs 451.0
spam 41.9999
eggs 451.0
", ] @@ -2617,8 +2643,8 @@ def test_html_headerless(): [ "", "", - '', - '', + '', + '', "", "
spam > 41.9999
eggs &451
spam > 41.9999
eggs &451
", ] @@ -2635,8 +2661,8 @@ def test_unsafehtml_headerless(): [ "", "", - '', - '', + '', + '', "", "
spam41.9999
eggs451.0
spam41.9999
eggs451.0
", ] @@ -2817,9 +2843,7 @@ def test_intfmt_with_string_as_integer(): @mark.skip(reason="It detects all values as floats but there are strings and integers.") def test_intfmt_with_string_with_floats(): "Output: integer format" - result = tabulate( - [[82000.38], ["1500.47"], ["2463"], [92165]], intfmt=",", tablefmt="plain" - ) + result = tabulate([[82000.38], ["1500.47"], ["2463"], [92165]], intfmt=",", tablefmt="plain") expected = "82000.4\n 1500.47\n 2463\n92,165" assert_equal(expected, result) @@ -2863,27 +2887,31 @@ def test_floatfmt(): def test_floatfmt_thousands(): "Output: floating point format" - result = tabulate( - [["1.23456789"], [1.0], ["1,234.56"]], floatfmt=".3f", tablefmt="plain" - ) + result = tabulate([["1.23456789"], [1.0], ["1,234.56"]], floatfmt=".3f", tablefmt="plain") expected = " 1.235\n 1.000\n1234.560" assert_equal(expected, result) def test_floatfmt_multi(): "Output: floating point format different for each column" + result = tabulate([[0.12345, 0.12345, 0.12345]], floatfmt=(".1f", ".3f"), tablefmt="plain") + expected = "0.1 0.123 0.12345" + assert_equal(expected, result) + + +def test_floatfmt_decimal(): result = tabulate( - [[0.12345, 0.12345, 0.12345]], floatfmt=(".1f", ".3f"), tablefmt="plain" + [[Decimal("99999998999.999980"), 1234.5, 1.2345678, "inf"]], + floatfmt=".6f", + tablefmt="plain", ) - expected = "0.1 0.123 0.12345" + expected = "99999998999.999980 1234.500000 1.234568 inf" assert_equal(expected, result) def test_colalign_multi(): "Output: string columns with custom colalign" - result = tabulate( - [["one", "two"], ["three", "four"]], colalign=("right",), tablefmt="plain" - ) + result = tabulate([["one", "two"], ["three", "four"]], colalign=("right",), tablefmt="plain") expected = " one two\nthree four" assert_equal(expected, result) @@ -2949,9 +2977,7 @@ def test_colalign_or_headersalign_too_long(): colalign = ("global", "left", "center") headers = ["h"] headersalign = ("center", "right", "same") - result = tabulate( - table, headers=headers, colalign=colalign, headersalign=headersalign - ) + result = tabulate(table, headers=headers, colalign=colalign, headersalign=headersalign) expected = "\n".join([" h", "--- ---", " 1 2", "111 222"]) assert_equal(expected, result) @@ -2960,9 +2986,7 @@ def test_warning_when_colalign_or_headersalign_is_string(): """Test user warnings when `colalign` or `headersalign` is a string.""" table = [[1, "bar"]] opt = {"colalign": "center", "headers": ["foo", "2"], "headersalign": "center"} - check_warnings( - (tabulate, [table], opt), num=2, category=UserWarning, contain="As a string" - ) + check_warnings((tabulate, [table], opt), num=2, category=UserWarning, contain="As a string") def test_float_conversions(): @@ -2992,9 +3016,7 @@ def test_float_conversions(): def test_missingval(): "Output: substitution of missing values" - result = tabulate( - [["Alice", 10], ["Bob", None]], missingval="n/a", tablefmt="plain" - ) + result = tabulate([["Alice", 10], ["Bob", None]], missingval="n/a", tablefmt="plain") expected = "Alice 10\nBob n/a" assert_equal(expected, result) @@ -3274,15 +3296,11 @@ def test_disable_numparse_list(): "Output: Default table output, but with number parsing selectively disabled" table_headers = ["h1", "h2", "h3"] test_table = [["foo", "bar", "42992e1"]] - expected = "\n".join( - ["h1 h2 h3", "---- ---- -------", "foo bar 42992e1"] - ) + expected = "\n".join(["h1 h2 h3", "---- ---- -------", "foo bar 42992e1"]) result = tabulate(test_table, table_headers, disable_numparse=[2]) assert_equal(expected, result) - expected = "\n".join( - ["h1 h2 h3", "---- ---- ------", "foo bar 429920"] - ) + expected = "\n".join(["h1 h2 h3", "---- ---- ------", "foo bar 429920"]) result = tabulate(test_table, table_headers, disable_numparse=[0, 1]) assert_equal(expected, result) @@ -3291,9 +3309,7 @@ def test_preserve_whitespace(): "Output: Default table output, but with preserved leading whitespace." table_headers = ["h1", "h2", "h3"] test_table = [[" foo", " bar ", "foo"]] - expected = "\n".join( - ["h1 h2 h3", "----- ------- ----", " foo bar foo"] - ) + expected = "\n".join(["h1 h2 h3", "----- ------- ----", " foo bar foo"]) result = tabulate(test_table, table_headers, preserve_whitespace=True) assert_equal(expected, result) @@ -3302,3 +3318,34 @@ def test_preserve_whitespace(): expected = "\n".join(["h1 h2 h3", "---- ---- ----", "foo bar foo"]) result = tabulate(test_table, table_headers, preserve_whitespace=False) assert_equal(expected, result) + + +def test_break_long_words(): + "Output: Default table output, with breakwords true." + table_headers = ["h1", "h2", "h3"] + test_table = [[" foo1", " bar2 ", "foo3"]] + + # Table is not wrapped on 3 letters due to long word + expected = "h1 h2 h3\n---- ---- ----\nfoo1 bar2 foo3" + result = tabulate(test_table, table_headers, maxcolwidths=3, break_long_words=False) + assert_equal(expected, result) + + # Table max width is 3 letters + expected = "h1 h2 h3\n---- ---- ----\nf ba foo\noo1 r2 3" + result = tabulate(test_table, table_headers, maxcolwidths=3, break_long_words=True) + assert_equal(expected, result) + + +def test_break_on_hyphens(): + "Output: Default table output, with break on hyphens true." + table_headers = ["h1", "h2", "h3"] + test_table = [[" foo-bar", " bar-bar ", "foo-foo"]] + # Table max width is 5, long lines breaks on hyphens + expected = "h1 h2 h3\n---- ---- -----\nfoo bar- foo-f\n-bar bar oo" + result = tabulate(test_table, table_headers, maxcolwidths=5, break_on_hyphens=False) + assert_equal(expected, result) + + # Table data is no longer breaks on hyphens + expected = "h1 h2 h3\n---- ---- ----\nfoo- bar- foo-\nbar bar foo" + result = tabulate(test_table, table_headers, maxcolwidths=5, break_on_hyphens=True) + assert_equal(expected, result) diff --git a/test/test_regression.py b/test/test_regression.py index bf262470..95556769 100644 --- a/test/test_regression.py +++ b/test/test_regression.py @@ -1,6 +1,7 @@ """Regression tests.""" -from tabulate import tabulate, TableFormat, Line, DataRow +from tabulate import DataRow, Line, TableFormat, tabulate + from common import assert_equal, skip @@ -96,7 +97,7 @@ def mk_iter_of_iters(): def mk_iter(): yield from range(3) - for r in range(3): + for _ in range(3): yield mk_iter() def mk_headers(): @@ -151,9 +152,7 @@ def test_simple_separated_format_with_headers(): from tabulate import simple_separated_format expected = " a| b\n 1| 2" - formatted = tabulate( - [[1, 2]], headers=["a", "b"], tablefmt=simple_separated_format("|") - ) + formatted = tabulate([[1, 2]], headers=["a", "b"], tablefmt=simple_separated_format("|")) assert_equal(expected, formatted) @@ -172,7 +171,7 @@ def test_numeric_column_headers(): expected = " 42\n----\n 1\n 2" assert_equal(expected, result) - lod = [{p: i for p in range(5)} for i in range(5)] + lod = [dict.fromkeys(range(5), i) for i in range(5)] result = tabulate(lod, "keys") expected = "\n".join( [ @@ -239,9 +238,7 @@ def test_isconvertible_on_set_values(): def test_ansi_color_for_decimal_numbers(): "Regression: ANSI colors for decimal numbers (issue #36)" table = [["Magenta", "\033[95m" + "1.1" + "\033[0m"]] - expected = "\n".join( - ["------- ---", "Magenta \x1b[95m1.1\x1b[0m", "------- ---"] - ) + expected = "\n".join(["------- ---", "Magenta \x1b[95m1.1\x1b[0m", "------- ---"]) result = tabulate(table) assert_equal(expected, result) @@ -258,15 +255,18 @@ def test_alignment_of_decimal_numbers_with_ansi_color(): def test_alignment_of_decimal_numbers_with_commas(): "Regression: alignment for decimal numbers with comma separators" - skip("test is temporarily disable until the feature is reimplemented") - # table = [["c1r1", "14502.05"], ["c1r2", 105]] - # result = tabulate(table, tablefmt="grid", floatfmt=',.2f') - # expected = "\n".join( - # ['+------+-----------+', '| c1r1 | 14,502.05 |', - # '+------+-----------+', '| c1r2 | 105.00 |', - # '+------+-----------+'] - # ) - # assert_equal(expected, result) + table = [["c1r1", "14502.05"], ["c1r2", 105]] + result = tabulate(table, tablefmt="grid", floatfmt=",.2f") + expected = "\n".join( + [ + "+------+-----------+", + "| c1r1 | 14,502.05 |", + "+------+-----------+", + "| c1r2 | 105.00 |", + "+------+-----------+", + ] + ) + assert_equal(expected, result) def test_long_integers(): @@ -300,7 +300,7 @@ class textclass(str): def test_mix_normal_and_wide_characters(): "Regression: wide characters in a grid format (issue #51)" try: - import wcwidth # noqa + import wcwidth # noqa: F401 ru_text = "\u043f\u0440\u0438\u0432\u0435\u0442" cn_text = "\u4f60\u597d" @@ -322,7 +322,7 @@ def test_mix_normal_and_wide_characters(): def test_multiline_with_wide_characters(): "Regression: multiline tables with varying number of wide characters (github issue #28)" try: - import wcwidth # noqa + import wcwidth # noqa: F401 table = [["가나\n가ab", "가나", "가나"]] result = tabulate(table, tablefmt="fancy_grid") @@ -341,7 +341,7 @@ def test_multiline_with_wide_characters(): def test_align_long_integers(): "Regression: long integers should be aligned as integers (issue #61)" - table = [[int(1)], [int(234)]] + table = [[1], [234]] result = tabulate(table, tablefmt="plain") expected = "\n".join([" 1", "234"]) assert_equal(expected, result) @@ -357,7 +357,7 @@ def test_numpy_array_as_headers(): expected = "foo bar" assert_equal(expected, result) except ImportError: - raise skip("") + raise skip("") from None def test_boolean_columns(): @@ -472,6 +472,22 @@ def count(start, step=1): assert_equal(expected, result) +def test_numpy_array_as_showindex(): + "Regression: numpy array as showindex must not raise ValueError on == comparison" + try: + import numpy as np + except ImportError: + raise skip("") from None + + table = [["a"], ["b"], ["c"]] + # np.array([...]) == "default" returns an element-wise boolean array whose + # truth value is ambiguous; the fix short-circuits the comparison when + # showindex is not a string. + expected = "10 a\n20 b\n30 c" + result = tabulate(table, showindex=np.array([10, 20, 30]), tablefmt="plain") + assert_equal(expected, result) + + def test_preserve_line_breaks_with_maxcolwidths(): "Regression: preserve line breaks when using maxcolwidths (github issue #190)" table = [["123456789 bbb\nccc"]] @@ -532,7 +548,7 @@ def test_numpy_int64_as_integer(): ) assert_equal(expected, result) except ImportError: - raise skip("") + raise skip("") from None def test_empty_table_with_colalign(): @@ -545,3 +561,40 @@ def test_empty_table_with_colalign(): ] ) assert_equal(expected, table) + + +def test_empty_table_with_maxheadercolwidths(): + "Regression: empty table with maxheadercolwidths kwarg (issue #365)" + result = tabulate([], headers=["one", "two", "three"], maxheadercolwidths=5) + expected = "\n".join( + [ + "one two three", + "----- ----- -------", + ] + ) + assert_equal(expected, result) + + +def test_mixed_bool_strings_and_numeric_strings(): + "Regression: column with bool-like strings and numeric strings should not crash (issue #209)" + result = tabulate([["False"], ["1."]]) + expected = "\n".join(["-----", "False", " 1", "-----"]) + assert_equal(expected, result) + + +def test_asciidoc_without_trailing_whitespace(): + "Regression: asciidoc format output must not generate trailing whitespace (issue #408)" + result = tabulate([["foo"]], headers=("longheader",), tablefmt="asciidoc") + expected = '[cols="<14",options="header"]\n|====\n| longheader\n| foo\n|====' + assert_equal(expected, result) + + result = tabulate([["longtext"]], headers=("bar",), tablefmt="asciidoc") + expected = '[cols="<10",options="header"]\n|====\n| bar\n| longtext\n|====' + assert_equal(expected, result) + + +def test_github_escape_pipe_character(): + "Regression: github format must escape pipe character with a backslash (issue #241)" + result = tabulate([["foo|bar"]], headers=("spam|eggs",), tablefmt="github") + expected = "| spam\\|eggs |\n|:------------|\n| foo\\|bar |" + assert_equal(expected, result) diff --git a/test/test_textwrapper.py b/test/test_textwrapper.py index 02dcc415..e6bab0f5 100644 --- a/test/test_textwrapper.py +++ b/test/test_textwrapper.py @@ -1,11 +1,11 @@ """Discretely test functionality of our custom TextWrapper""" import datetime - -from tabulate import _CustomTextWrap as CTW, tabulate, _strip_ansi from textwrap import TextWrapper as OTW -from common import skip, assert_equal +from tabulate import _CustomTextWrap as CTW, _strip_ansi, tabulate + +from common import assert_equal, skip def test_wrap_multiword_non_wide(): @@ -15,9 +15,9 @@ def test_wrap_multiword_non_wide(): orig = OTW(width=width) cust = CTW(width=width) - assert orig.wrap(data) == cust.wrap( - data - ), "Failure on non-wide char multiword regression check for width " + str(width) + assert [line.rstrip() for line in orig.wrap(data)] == [ + line.rstrip() for line in cust.wrap(data) + ], f"Failure on non-wide char multiword regression check for width {width}" def test_wrap_multiword_non_wide_with_hypens(): @@ -27,9 +27,9 @@ def test_wrap_multiword_non_wide_with_hypens(): orig = OTW(width=width) cust = CTW(width=width) - assert orig.wrap(data) == cust.wrap( - data - ), "Failure on non-wide char hyphen regression check for width " + str(width) + assert [line.rstrip() for line in orig.wrap(data)] == [ + line.rstrip() for line in cust.wrap(data) + ], f"Failure on non-wide char hyphen regression check for width {width}" def test_wrap_longword_non_wide(): @@ -39,15 +39,15 @@ def test_wrap_longword_non_wide(): orig = OTW(width=width) cust = CTW(width=width) - assert orig.wrap(data) == cust.wrap( - data - ), "Failure on non-wide char longword regression check for width " + str(width) + assert orig.wrap(data) == cust.wrap(data), ( + f"Failure on non-wide char longword regression check for width {width}" + ) def test_wrap_wide_char_multiword(): """TextWrapper: wrapping support for wide characters with multiple words""" try: - import wcwidth # noqa + import wcwidth # noqa: F401 except ImportError: skip("test_wrap_wide_char is skipped") @@ -63,7 +63,7 @@ def test_wrap_wide_char_multiword(): def test_wrap_wide_char_longword(): """TextWrapper: wrapping wide char word that needs to be broken up""" try: - import wcwidth # noqa + import wcwidth # noqa: F401 except ImportError: skip("test_wrap_wide_char_longword is skipped") @@ -111,9 +111,7 @@ def test_wrapper_len_ignores_color_chars(): def test_wrap_full_line_color(): """TextWrapper: Wrap a line when the full thing is enclosed in color tags""" # This has both a text color and a background color - data = ( - "\033[31m\033[104mThis is a test string for testing TextWrap with colors\033[0m" - ) + data = "\033[31m\033[104mThis is a test string for testing TextWrap with colors\033[0m" expected = [ "\033[31m\033[104mThis is a test\033[0m", @@ -176,6 +174,44 @@ def test_wrap_color_line_longword(): assert_equal(expected, result) +def test_wrap_color_line_longword_zerowidth(): + """Lines with zero-width symbols (accents) must include those symbols with the prior symbol. + Let's exercise the calculation where the available symbols never satisfy the available width, + and ensure chunk calculation succeeds and ANSI colors are maintained. + + Most combining marks combine with the preceding character (even in right-to-left alphabets): + - "e\u0301" → "é" (e + combining acute accent) + - "a\u0308" → "ä" (a + combining diaeresis) + - "n\u0303" → "ñ" (n + combining tilde) + Enclosing Marks: Some combining marks enclose the base character: + - "A\u20dd" → Ⓐ Combining enclosing circle + Multiple Combining Marks: You can stack multiple combining marks on a single base character: + - "e\u0301\u0308" → e with both acute accent and diaeresis + Zero width space → "ab" with a : + - "a\u200bb" + + """ + try: + import wcwidth # noqa + except ImportError: + skip("test_wrap_wide_char is skipped") + + # Exactly filled, with a green zero-width segment at the end. + data = ( + "This_is_A\u20dd_\033[31mte\u0301st_string_\u200b" + "to_te\u0301\u0308st_a\u0308ccent\033[32m\u200b\033[0m" + ) + + expected = [ + "This_is_A\u20dd_\033[31mte\u0301\033[0m", + "\033[31mst_string_\u200bto\033[0m", + "\033[31m_te\u0301\u0308st_a\u0308ccent\033[32m\u200b\033[0m", + ] + wrapper = CTW(width=12) + result = wrapper.wrap(data) + assert_equal(expected, result) + + def test_wrap_color_line_multiple_escapes(): data = "012345(\x1b[32ma\x1b[0mbc\x1b[32mdefghij\x1b[0m)" expected = [ @@ -264,3 +300,65 @@ def test_wrap_none_value_with_missingval(): ] expected = "\n".join(expected) assert_equal(expected, result) + + +def test_wrap_optional_bool_strs(): + """TextWrapper: Show that str bools and None can be wrapped without crashing""" + data = [ + ["First Entry", "True"], + ["Second Entry", None], + ] + headers = ["Title", "When"] + result = tabulate(data, headers=headers, tablefmt="grid", maxcolwidths=[7, 5]) + + expected = [ + "+---------+--------+", + "| Title | When |", + "+=========+========+", + "| First | True |", + "| Entry | |", + "+---------+--------+", + "| Second | |", + "| Entry | |", + "+---------+--------+", + ] + expected = "\n".join(expected) + assert_equal(expected, result) + + +def test_wrap_wide_char_no_column_overflow(): + "TextWrapper: wide chars must not overflow the requested column width." + try: + import wcwidth + except ImportError: + skip("test_wrap_wide_char_no_column_overflow is skipped") + + # Each Korean character occupies 2 display columns. + data = "\ud55c\uae00\ud14c\uc2a4\ud2b8" # 한글테스트 + for width in [2, 3, 4, 5, 6]: + wrapper = CTW(width=width) + lines = wrapper.wrap(data) + for line in lines: + display_width = wcwidth.wcswidth(line) + assert display_width <= width, ( + f"Line {line!r} has display width {display_width} " + f"which exceeds requested column width {width}" + ) + + +def test_wrap_wide_char_narrower_than_char_width(): + """TextWrapper: column width smaller than a single wide char must not hang (issue #399). + + When the requested width is 1 but every character is 2 display columns + wide, _handle_long_word must still make progress (one character per line) + rather than looping forever. + """ + try: + import wcwidth # noqa: F401 + except ImportError: + skip("test_wrap_wide_char_narrower_than_char_width is skipped") + + data = "\ud55c\uae00" # 한글 -- each char is 2 display cols wide + # width=1 is narrower than any character; each char should still get its own line + result = CTW(width=1).wrap(data) + assert result == ["\ud55c", "\uae00"] diff --git a/tox.ini b/tox.ini index 9605e79b..4a0ae289 100644 --- a/tox.ini +++ b/tox.ini @@ -1,4 +1,4 @@ -# Tox (http://tox.testrun.org/) is a tool for running tests +# Tox (http://tox.readthedocs.org/) is a tool for running tests # in multiple virtualenvs. This configuration file will run the # test suite on all supported python versions. To use it, "pip install tox" # and then run "tox" from this directory. @@ -8,19 +8,19 @@ # for testing and it is disabled by default. [tox] -envlist = lint, py{38, 39, 310, 311, 312, 313} +envlist = lint, py{310, 311, 312, 313, 314} isolated_build = True [gh] python = - 3.9: py39-extra 3.10: py310-extra 3.11: py311-extra 3.12: py312-extra 3.13: py313-extra + 3.14: py314-extra [testenv] -commands = pytest -v --doctest-modules --ignore benchmark {posargs} +commands = pytest -v --doctest-modules --ignore benchmark --doctest-glob="README.md" {posargs} deps = pytest passenv = @@ -29,52 +29,22 @@ passenv = SSL_CERT_FILE [testenv:lint] -commands = python -m pre_commit run -a +commands = + ruff check + ruff format --check deps = - pre-commit - -[testenv:py38] -basepython = python3.8 -commands = pytest -v --doctest-modules --ignore benchmark {posargs} -deps = - pytest - -[testenv:py38-extra] -basepython = python3.8 -commands = pytest -v --doctest-modules --ignore benchmark {posargs} -deps = - pytest - numpy - pandas - wcwidth - - -[testenv:py39] -basepython = python3.9 -commands = pytest -v --doctest-modules --ignore benchmark {posargs} -deps = - pytest - -[testenv:py39-extra] -basepython = python3.9 -commands = pytest -v --doctest-modules --ignore benchmark {posargs} -deps = - pytest - numpy - pandas - wcwidth - + ruff [testenv:py310] basepython = python3.10 -commands = pytest -v --doctest-modules --ignore benchmark {posargs} +commands = pytest -v --doctest-modules --ignore benchmark --doctest-glob="README.md" {posargs} deps = pytest [testenv:py310-extra] basepython = python3.10 setenv = PYTHONDEVMODE = 1 -commands = pytest -v --doctest-modules --ignore benchmark {posargs} +commands = pytest -v --doctest-modules --ignore benchmark --doctest-glob="README.md" {posargs} deps = pytest numpy @@ -84,14 +54,14 @@ deps = [testenv:py311] basepython = python3.11 -commands = pytest -v --doctest-modules --ignore benchmark {posargs} +commands = pytest -v --doctest-modules --ignore benchmark --doctest-glob="README.md" {posargs} deps = pytest [testenv:py311-extra] basepython = python3.11 setenv = PYTHONDEVMODE = 1 -commands = pytest -v --doctest-modules --ignore benchmark {posargs} +commands = pytest -v --doctest-modules --ignore benchmark --doctest-glob="README.md" {posargs} deps = pytest numpy @@ -100,14 +70,14 @@ deps = [testenv:py312] basepython = python3.12 -commands = pytest -v --doctest-modules --ignore benchmark {posargs} +commands = pytest -v --doctest-modules --ignore benchmark --doctest-glob="README.md" {posargs} deps = pytest [testenv:py312-extra] basepython = python3.12 setenv = PYTHONDEVMODE = 1 -commands = pytest -v --doctest-modules --ignore benchmark {posargs} +commands = pytest -v --doctest-modules --ignore benchmark --doctest-glob="README.md" {posargs} deps = pytest numpy @@ -116,21 +86,32 @@ deps = [testenv:py313] basepython = python3.13 -commands = pytest -v --doctest-modules --ignore benchmark {posargs} +commands = pytest -v --doctest-modules --ignore benchmark --doctest-glob="README.md" {posargs} deps = pytest [testenv:py313-extra] basepython = python3.13 setenv = PYTHONDEVMODE = 1 -commands = pytest -v --doctest-modules --ignore benchmark {posargs} +commands = pytest -v --doctest-modules --ignore benchmark --doctest-glob="README.md" {posargs} deps = pytest numpy pandas wcwidth -[flake8] -max-complexity = 22 -max-line-length = 99 -ignore = E203, W503, C901, E402, B011 +[testenv:py314] +basepython = python3.14 +commands = pytest -v --doctest-modules --ignore benchmark --doctest-glob="README.md" {posargs} +deps = + pytest + +[testenv:py314-extra] +basepython = python3.14 +setenv = PYTHONDEVMODE = 1 +commands = pytest -v --doctest-modules --ignore benchmark --doctest-glob="README.md" {posargs} +deps = + pytest + numpy + pandas + wcwidth