From b8871839ccfc189eb887d40fa24bf874a14235b2 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Wed, 3 Feb 2021 18:30:13 -0500
Subject: [PATCH 1/9] REG: read_excel with engine specified raises on
 non-path/non-buffer

---
 doc/source/whatsnew/v1.2.2.rst         |  2 +-
 pandas/io/excel/_base.py               | 22 +++++++++++++++-------
 pandas/tests/io/excel/test_openpyxl.py |  8 ++++++++
 pandas/tests/io/excel/test_readers.py  |  9 ++++++++-
 4 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.2.rst b/doc/source/whatsnew/v1.2.2.rst
index 0ee1abaa2a0eb..dc27ceba138c9 100644
--- a/doc/source/whatsnew/v1.2.2.rst
+++ b/doc/source/whatsnew/v1.2.2.rst
@@ -21,7 +21,7 @@ Fixed regressions
 - Fixed regression in :meth:`~DataFrame.to_pickle` failing to create bz2/xz compressed pickle files with ``protocol=5`` (:issue:`39002`)
 - Fixed regression in :func:`pandas.testing.assert_series_equal` and :func:`pandas.testing.assert_frame_equal` always raising ``AssertionError`` when comparing extension dtypes (:issue:`39410`)
 - Fixed regression in :meth:`~DataFrame.to_csv` opening ``codecs.StreamWriter`` in binary mode instead of in text mode and ignoring user-provided ``mode`` (:issue:`39247`)
--
+- Fixed regression in :func:`read_excel` that incorrectly raised when the argument ``io`` was a non-path and non-buffer and the ``engine`` argument was specified (:issue:`39528`)
 
 .. ---------------------------------------------------------------------------
 
diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index 213be7c05b370..1d77cc12caf8e 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -1069,26 +1069,34 @@ def __init__(
 
             xlrd_version = LooseVersion(get_version(xlrd))
 
-        if xlrd_version is not None and isinstance(path_or_buffer, xlrd.Book):
-            ext = "xls"
-        else:
+        ext = None
+        if engine is None:
+            # Only determine ext if it is needed
             ext = inspect_excel_format(
                 content_or_path=path_or_buffer, storage_options=storage_options
             )
 
-        if engine is None:
             # ext will always be valid, otherwise inspect_excel_format would raise
             engine = config.get_option(f"io.excel.{ext}.reader", silent=True)
             if engine == "auto":
                 engine = get_default_engine(ext, mode="reader")
 
-        if engine == "xlrd" and ext != "xls" and xlrd_version is not None:
-            if xlrd_version >= "2":
+        if engine == "xlrd" and xlrd_version is not None:
+            if ext is None:
+                # Need ext to determine ext in order to raise/warn
+                if isinstance(path_or_buffer, xlrd.Book):
+                    ext = "xls"
+                else:
+                    ext = inspect_excel_format(
+                        path_or_buffer, storage_options=storage_options
+                    )
+
+            if ext != "xls" and xlrd_version >= "2":
                 raise ValueError(
                     f"Your version of xlrd is {xlrd_version}. In xlrd >= 2.0, "
                     f"only the xls format is supported. Install openpyxl instead."
                 )
-            else:
+            elif ext != "xls":
                 caller = inspect.stack()[1]
                 if (
                     caller.filename.endswith(
diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py
index 3155e22d3ff5d..7d24de2a17529 100644
--- a/pandas/tests/io/excel/test_openpyxl.py
+++ b/pandas/tests/io/excel/test_openpyxl.py
@@ -116,3 +116,11 @@ def test_to_excel_with_openpyxl_engine(ext):
         ).highlight_max()
 
         styled.to_excel(filename, engine="openpyxl")
+
+
+def test_read_workbook(datapath, ext):
+    filename = datapath("io", "data", "excel", "test1" + ext)
+    wb = openpyxl.load_workbook(filename)
+    result = pd.read_excel(wb, engine="openpyxl")
+    expected = pd.read_excel(filename)
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
index b2e87de5580e6..a594718bd62d9 100644
--- a/pandas/tests/io/excel/test_readers.py
+++ b/pandas/tests/io/excel/test_readers.py
@@ -2,6 +2,7 @@
 from functools import partial
 import os
 from urllib.error import URLError
+from zipfile import BadZipFile
 
 import numpy as np
 import pytest
@@ -685,7 +686,13 @@ def test_missing_file_raises(self, read_ext):
 
     def test_corrupt_bytes_raises(self, read_ext, engine):
         bad_stream = b"foo"
-        with pytest.raises(ValueError, match="File is not a recognized excel file"):
+        if engine is None or engine == "xlrd":
+            error = ValueError
+            msg = "File is not a recognized excel file"
+        else:
+            error = BadZipFile
+            msg = "File is not a zip file"
+        with pytest.raises(error, match=msg):
             pd.read_excel(bad_stream)
 
     @tm.network

From 601ad87cf02d2ddc68a026fd01ed87ceb24505c3 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Wed, 3 Feb 2021 18:37:51 -0500
Subject: [PATCH 2/9] Restore special-casing for xlrd.Book even when engine is
 None

---
 pandas/io/excel/_base.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index 1d77cc12caf8e..84b5cae09acce 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -1072,9 +1072,12 @@ def __init__(
         ext = None
         if engine is None:
             # Only determine ext if it is needed
-            ext = inspect_excel_format(
-                content_or_path=path_or_buffer, storage_options=storage_options
-            )
+            if xlrd_version is not None and isinstance(path_or_buffer, xlrd.Book):
+                ext = "xls"
+            else:
+                ext = inspect_excel_format(
+                    content_or_path=path_or_buffer, storage_options=storage_options
+                )
 
             # ext will always be valid, otherwise inspect_excel_format would raise
             engine = config.get_option(f"io.excel.{ext}.reader", silent=True)

From d835dffdc779d90f6857432c3cd6f36ba4bf4b90 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Wed, 3 Feb 2021 18:44:20 -0500
Subject: [PATCH 3/9] GH # in test

---
 pandas/tests/io/excel/test_openpyxl.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py
index 7d24de2a17529..bc2aca862546b 100644
--- a/pandas/tests/io/excel/test_openpyxl.py
+++ b/pandas/tests/io/excel/test_openpyxl.py
@@ -119,6 +119,7 @@ def test_to_excel_with_openpyxl_engine(ext):
 
 
 def test_read_workbook(datapath, ext):
+    # GH 39528
     filename = datapath("io", "data", "excel", "test1" + ext)
     wb = openpyxl.load_workbook(filename)
     result = pd.read_excel(wb, engine="openpyxl")

From e6684e9a0f0f3d953ac05de3f7d68ddc327902d4 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Thu, 4 Feb 2021 18:19:24 -0500
Subject: [PATCH 4/9] Added wb.close() to test

---
 pandas/tests/io/excel/test_openpyxl.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py
index bc2aca862546b..d426811c43efa 100644
--- a/pandas/tests/io/excel/test_openpyxl.py
+++ b/pandas/tests/io/excel/test_openpyxl.py
@@ -123,5 +123,6 @@ def test_read_workbook(datapath, ext):
     filename = datapath("io", "data", "excel", "test1" + ext)
     wb = openpyxl.load_workbook(filename)
     result = pd.read_excel(wb, engine="openpyxl")
+    wb.close()
     expected = pd.read_excel(filename)
     tm.assert_frame_equal(result, expected)

From ba2bc75a36d594ffec889e819e120a8e2aa70ea2 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Fri, 5 Feb 2021 16:44:55 -0500
Subject: [PATCH 5/9] Added logic/tests for determining if a sheet is read-only

---
 pandas/io/excel/_openpyxl.py           |  6 ++--
 pandas/tests/io/excel/test_openpyxl.py | 39 ++++++++++++++++++++++++--
 2 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py
index 64c64b5009b0c..6bc742f6d9577 100644
--- a/pandas/io/excel/_openpyxl.py
+++ b/pandas/io/excel/_openpyxl.py
@@ -533,7 +533,9 @@ def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]:
 
         version = LooseVersion(get_version(openpyxl))
 
-        if version >= "3.0.0":
+        is_readonly = hasattr(sheet, "reset_dimensions")
+
+        if version >= "3.0.0" and is_readonly:
             sheet.reset_dimensions()
 
         data: List[List[Scalar]] = []
@@ -541,7 +543,7 @@ def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]:
             converted_row = [self._convert_cell(cell, convert_float) for cell in row]
             data.append(converted_row)
 
-        if version >= "3.0.0" and len(data) > 0:
+        if version >= "3.0.0" and is_readonly and len(data) > 0:
             # With dimension reset, openpyxl no longer pads rows
             max_width = max(len(data_row) for data_row in data)
             if min(len(data_row) for data_row in data) < max_width:
diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py
index 830da757d23e8..3bc502e8b33c6 100644
--- a/pandas/tests/io/excel/test_openpyxl.py
+++ b/pandas/tests/io/excel/test_openpyxl.py
@@ -151,10 +151,45 @@ def test_read_with_bad_dimension(datapath, ext, header, expected_data, filename)
     tm.assert_frame_equal(result, expected)
 
 
-def test_read_workbook(datapath, ext):
+@pytest.mark.parametrize(
+    "header, expected_data",
+    [
+        (
+            0,
+            {
+                "Title": [np.nan, "A", 1, 2, 3],
+                "Unnamed: 1": [np.nan, "B", 4, 5, 6],
+                "Unnamed: 2": [np.nan, "C", 7, 8, 9],
+            },
+        ),
+        (2, {"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}),
+    ],
+)
+@pytest.mark.parametrize(
+    "filename", ["dimension_missing", "dimension_small", "dimension_large"]
+)
+@pytest.mark.parametrize("read_only", [True, False])
+@pytest.mark.xfail(
+    LooseVersion(get_version(openpyxl)) < "3.0.0",
+    reason="openpyxl read-only sheet is incorrect when dimension data is wrong",
+)
+def test_read_wb_with_bad_dimension(
+    datapath, ext, filename, header, expected_data, read_only
+):
+    # GH 38956, 39001 - no/incorrect dimension information
+    path = datapath("io", "data", "excel", f"{filename}{ext}")
+    wb = openpyxl.load_workbook(path, read_only=read_only)
+    result = pd.read_excel(wb, engine="openpyxl", header=header)
+    wb.close()
+    expected = DataFrame(expected_data)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("read_only", [True, False])
+def test_read_workbook(datapath, ext, read_only):
     # GH 39528
     filename = datapath("io", "data", "excel", "test1" + ext)
-    wb = openpyxl.load_workbook(filename)
+    wb = openpyxl.load_workbook(filename, read_only=read_only)
     result = pd.read_excel(wb, engine="openpyxl")
     wb.close()
     expected = pd.read_excel(filename)

From 1381eccfa8e8ca19fde8dd0c483c33ba7a43fd32 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Fri, 5 Feb 2021 16:51:19 -0500
Subject: [PATCH 6/9] Added comment

---
 pandas/io/excel/_openpyxl.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py
index 6bc742f6d9577..274b18b2605cc 100644
--- a/pandas/io/excel/_openpyxl.py
+++ b/pandas/io/excel/_openpyxl.py
@@ -533,6 +533,8 @@ def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]:
 
         version = LooseVersion(get_version(openpyxl))
 
+        # There is no good way of determining if a sheet is read-only
+        # https://foss.heptapod.net/openpyxl/openpyxl/-/issues/1605
         is_readonly = hasattr(sheet, "reset_dimensions")
 
         if version >= "3.0.0" and is_readonly:

From a3db3eb54b4439edbbd4ac49880649116d3e9288 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Fri, 5 Feb 2021 17:20:43 -0500
Subject: [PATCH 7/9] Combine and reorg tests

---
 pandas/tests/io/excel/test_openpyxl.py | 61 ++++++++------------------
 1 file changed, 18 insertions(+), 43 deletions(-)

diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py
index 3bc502e8b33c6..2f901bde0ba09 100644
--- a/pandas/tests/io/excel/test_openpyxl.py
+++ b/pandas/tests/io/excel/test_openpyxl.py
@@ -122,32 +122,14 @@ def test_to_excel_with_openpyxl_engine(ext):
         styled.to_excel(filename, engine="openpyxl")
 
 
-@pytest.mark.parametrize(
-    "header, expected_data",
-    [
-        (
-            0,
-            {
-                "Title": [np.nan, "A", 1, 2, 3],
-                "Unnamed: 1": [np.nan, "B", 4, 5, 6],
-                "Unnamed: 2": [np.nan, "C", 7, 8, 9],
-            },
-        ),
-        (2, {"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}),
-    ],
-)
-@pytest.mark.parametrize(
-    "filename", ["dimension_missing", "dimension_small", "dimension_large"]
-)
-@pytest.mark.xfail(
-    LooseVersion(get_version(openpyxl)) < "3.0.0",
-    reason="openpyxl read-only sheet is incorrect when dimension data is wrong",
-)
-def test_read_with_bad_dimension(datapath, ext, header, expected_data, filename):
-    # GH 38956, 39001 - no/incorrect dimension information
-    path = datapath("io", "data", "excel", f"{filename}{ext}")
-    result = pd.read_excel(path, header=header)
-    expected = DataFrame(expected_data)
+@pytest.mark.parametrize("read_only", [True, False])
+def test_read_workbook(datapath, ext, read_only):
+    # GH 39528
+    filename = datapath("io", "data", "excel", "test1" + ext)
+    wb = openpyxl.load_workbook(filename, read_only=read_only)
+    result = pd.read_excel(wb, engine="openpyxl")
+    wb.close()
+    expected = pd.read_excel(filename)
     tm.assert_frame_equal(result, expected)
 
 
@@ -168,29 +150,22 @@ def test_read_with_bad_dimension(datapath, ext, header, expected_data, filename)
 @pytest.mark.parametrize(
     "filename", ["dimension_missing", "dimension_small", "dimension_large"]
 )
-@pytest.mark.parametrize("read_only", [True, False])
+# When read_only is None, use read_excel instead of a workbook
+@pytest.mark.parametrize("read_only", [True, False, None])
 @pytest.mark.xfail(
     LooseVersion(get_version(openpyxl)) < "3.0.0",
     reason="openpyxl read-only sheet is incorrect when dimension data is wrong",
 )
-def test_read_wb_with_bad_dimension(
-    datapath, ext, filename, header, expected_data, read_only
+def test_read_with_bad_dimension(
+    datapath, ext, header, expected_data, filename, read_only
 ):
     # GH 38956, 39001 - no/incorrect dimension information
     path = datapath("io", "data", "excel", f"{filename}{ext}")
-    wb = openpyxl.load_workbook(path, read_only=read_only)
-    result = pd.read_excel(wb, engine="openpyxl", header=header)
-    wb.close()
+    if read_only is None:
+        result = pd.read_excel(path, header=header)
+    else:
+        wb = openpyxl.load_workbook(path, read_only=read_only)
+        result = pd.read_excel(wb, engine="openpyxl", header=header)
+        wb.close()
     expected = DataFrame(expected_data)
     tm.assert_frame_equal(result, expected)
-
-
-@pytest.mark.parametrize("read_only", [True, False])
-def test_read_workbook(datapath, ext, read_only):
-    # GH 39528
-    filename = datapath("io", "data", "excel", "test1" + ext)
-    wb = openpyxl.load_workbook(filename, read_only=read_only)
-    result = pd.read_excel(wb, engine="openpyxl")
-    wb.close()
-    expected = pd.read_excel(filename)
-    tm.assert_frame_equal(result, expected)

From 33acfaa19077248e04a0ffa0be5ab82aa3b5ee36 Mon Sep 17 00:00:00 2001
From: rhshadrach <rhshadrach@gmail.com>
Date: Fri, 5 Feb 2021 19:59:10 -0500
Subject: [PATCH 8/9] Only mark as xfail for read-only cases

---
 pandas/tests/io/excel/test_openpyxl.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py
index 2f901bde0ba09..e6c50a174b088 100644
--- a/pandas/tests/io/excel/test_openpyxl.py
+++ b/pandas/tests/io/excel/test_openpyxl.py
@@ -152,14 +152,16 @@ def test_read_workbook(datapath, ext, read_only):
 )
 # When read_only is None, use read_excel instead of a workbook
 @pytest.mark.parametrize("read_only", [True, False, None])
-@pytest.mark.xfail(
-    LooseVersion(get_version(openpyxl)) < "3.0.0",
-    reason="openpyxl read-only sheet is incorrect when dimension data is wrong",
-)
 def test_read_with_bad_dimension(
-    datapath, ext, header, expected_data, filename, read_only
+    datapath, ext, header, expected_data, filename, read_only, request
 ):
     # GH 38956, 39001 - no/incorrect dimension information
+    version = LooseVersion(get_version(openpyxl))
+    if (read_only or read_only is None) and version < "3.0.0":
+        msg = "openpyxl read-only sheet is incorrect when dimension data is wrong"
+        request.node.add_marker(
+            pytest.mark.xfail(LooseVersion(get_version(openpyxl)) < "3.0.0", reason=msg)
+        )
     path = datapath("io", "data", "excel", f"{filename}{ext}")
     if read_only is None:
         result = pd.read_excel(path, header=header)

From 224d2a18aa87d6551b9f4a9201a713d1f77eafcc Mon Sep 17 00:00:00 2001
From: rhshadrach <rhshadrach@gmail.com>
Date: Fri, 5 Feb 2021 20:01:36 -0500
Subject: [PATCH 9/9] fixup

---
 pandas/tests/io/excel/test_openpyxl.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py
index e6c50a174b088..da12829b579fe 100644
--- a/pandas/tests/io/excel/test_openpyxl.py
+++ b/pandas/tests/io/excel/test_openpyxl.py
@@ -159,9 +159,7 @@ def test_read_with_bad_dimension(
     version = LooseVersion(get_version(openpyxl))
     if (read_only or read_only is None) and version < "3.0.0":
         msg = "openpyxl read-only sheet is incorrect when dimension data is wrong"
-        request.node.add_marker(
-            pytest.mark.xfail(LooseVersion(get_version(openpyxl)) < "3.0.0", reason=msg)
-        )
+        request.node.add_marker(pytest.mark.xfail(reason=msg))
     path = datapath("io", "data", "excel", f"{filename}{ext}")
     if read_only is None:
         result = pd.read_excel(path, header=header)