diff --git a/ci/deps/azure-37-locale_slow.yaml b/ci/deps/azure-37-locale_slow.yaml index 7f658fe62d268..0c47b1a72774f 100644 --- a/ci/deps/azure-37-locale_slow.yaml +++ b/ci/deps/azure-37-locale_slow.yaml @@ -18,7 +18,7 @@ dependencies: - lxml - matplotlib=3.0.0 - numpy=1.16.* - - openpyxl=2.6.0 + - openpyxl=3.0.0 - python-dateutil - python-blosc - pytz=2017.3 diff --git a/ci/deps/azure-37-minimum_versions.yaml b/ci/deps/azure-37-minimum_versions.yaml index f184ea87c89fe..9cc158b76cd41 100644 --- a/ci/deps/azure-37-minimum_versions.yaml +++ b/ci/deps/azure-37-minimum_versions.yaml @@ -19,7 +19,7 @@ dependencies: - numba=0.46.0 - numexpr=2.6.8 - numpy=1.16.5 - - openpyxl=2.6.0 + - openpyxl=3.0.0 - pytables=3.5.1 - python-dateutil=2.7.3 - pytz=2017.3 diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst index 49039f05b889a..06e1af75053d3 100644 --- a/doc/source/getting_started/install.rst +++ b/doc/source/getting_started/install.rst @@ -274,7 +274,7 @@ html5lib 1.0.1 HTML parser for read_html (see :ref lxml 4.3.0 HTML parser for read_html (see :ref:`note `) matplotlib 2.2.3 Visualization numba 0.46.0 Alternative execution engine for rolling operations -openpyxl 2.6.0 Reading / writing for xlsx files +openpyxl 3.0.0 Reading / writing for xlsx files pandas-gbq 0.12.0 Google Big Query access psycopg2 2.7 PostgreSQL engine for sqlalchemy pyarrow 0.15.0 Parquet, ORC, and feather reading / writing diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 799bc88ffff4e..7b2c993cde024 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -186,7 +186,7 @@ Optional libraries below the lowest tested version may still work, but are not c +-----------------+-----------------+---------+ | numba | 0.46.0 | | +-----------------+-----------------+---------+ -| openpyxl | 2.6.0 | | +| openpyxl | 3.0.0 | X | +-----------------+-----------------+---------+ | pyarrow | 0.15.0 | | +-----------------+-----------------+---------+ diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py index bcad9f1ddab09..eb2b4caddb7a6 100644 --- a/pandas/compat/_optional.py +++ b/pandas/compat/_optional.py @@ -17,7 +17,7 @@ "matplotlib": "2.2.3", "numexpr": "2.6.8", "odfpy": "1.3.0", - "openpyxl": "2.6.0", + "openpyxl": "3.0.0", "pandas_gbq": "0.12.0", "pyarrow": "0.15.0", "pytest": "5.0.1", diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index d0fe64a82d187..ef70706920dc4 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -1,13 +1,12 @@ from __future__ import annotations -from distutils.version import LooseVersion import mmap from typing import TYPE_CHECKING, Dict, List, Optional import numpy as np from pandas._typing import FilePathOrBuffer, Scalar, StorageOptions -from pandas.compat._optional import get_version, import_optional_dependency +from pandas.compat._optional import import_optional_dependency from pandas.io.excel._base import BaseExcelReader, ExcelWriter from pandas.io.excel._util import validate_freeze_panes @@ -531,14 +530,8 @@ def _convert_cell(self, cell, convert_float: bool) -> Scalar: return cell.value def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]: - # GH 39001 - # Reading of excel file depends on dimension data being correct but - # writers sometimes omit or get it wrong - import openpyxl - version = LooseVersion(get_version(openpyxl)) - - if version >= "3.0.0" and self.book.read_only: + if self.book.read_only: sheet.reset_dimensions() data: List[List[Scalar]] = [] @@ -552,7 +545,7 @@ def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]: # Trim trailing empty rows data = data[: last_row_with_data + 1] - if version >= "3.0.0" and self.book.read_only and len(data) > 0: + if self.book.read_only and len(data) > 0: # With dimension reset, openpyxl no longer pads rows max_width = max(len(data_row) for data_row in data) if min(len(data_row) for data_row in data) < max_width: diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py index 0962b719efd4d..8128e958141e2 100644 --- a/pandas/tests/io/excel/test_openpyxl.py +++ b/pandas/tests/io/excel/test_openpyxl.py @@ -1,11 +1,8 @@ -from distutils.version import LooseVersion from pathlib import Path import numpy as np import pytest -from pandas.compat._optional import get_version - import pandas as pd from pandas import DataFrame import pandas._testing as tm @@ -157,10 +154,6 @@ def test_read_with_bad_dimension( datapath, ext, header, expected_data, filename, read_only, request ): # GH 38956, 39001 - no/incorrect dimension information - version = LooseVersion(get_version(openpyxl)) - if (read_only or read_only is None) and version < "3.0.0": - msg = "openpyxl read-only sheet is incorrect when dimension data is wrong" - request.node.add_marker(pytest.mark.xfail(reason=msg)) path = datapath("io", "data", "excel", f"{filename}{ext}") if read_only is None: result = pd.read_excel(path, header=header) @@ -195,10 +188,6 @@ def test_append_mode_file(ext): @pytest.mark.parametrize("read_only", [True, False, None]) def test_read_with_empty_trailing_rows(datapath, ext, read_only, request): # GH 39181 - version = LooseVersion(get_version(openpyxl)) - if (read_only or read_only is None) and version < "3.0.0": - msg = "openpyxl read-only sheet is incorrect when dimension data is wrong" - request.node.add_marker(pytest.mark.xfail(reason=msg)) path = datapath("io", "data", "excel", f"empty_trailing_rows{ext}") if read_only is None: result = pd.read_excel(path)