From 62d2e153265ee2b7abb845be925070fa77c69c48 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 30 Nov 2020 09:44:41 +0100
Subject: [PATCH 1/5] BUG: preserve nullable dtype for float result in
 IntegerArray arithmetic ops

---
 pandas/core/arrays/integer.py                 |  5 +-
 .../tests/arrays/integer/test_arithmetic.py   | 46 +++++++++++++------
 2 files changed, 34 insertions(+), 17 deletions(-)

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index 2897c18acfb09..2738c58e06fd9 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -636,8 +636,9 @@ def _maybe_mask_result(self, result, mask, other, op_name: str):
         if (is_float_dtype(other) or is_float(other)) or (
             op_name in ["rtruediv", "truediv"]
         ):
-            result[mask] = np.nan
-            return result
+            from pandas.core.arrays import FloatingArray
+
+            return FloatingArray(result, mask, copy=False)
 
         if result.dtype == "timedelta64[ns]":
             from pandas.core.arrays import TimedeltaArray
diff --git a/pandas/tests/arrays/integer/test_arithmetic.py b/pandas/tests/arrays/integer/test_arithmetic.py
index cf382dd5e37e0..91ab2a8af6fba 100644
--- a/pandas/tests/arrays/integer/test_arithmetic.py
+++ b/pandas/tests/arrays/integer/test_arithmetic.py
@@ -5,7 +5,7 @@
 
 import pandas as pd
 import pandas._testing as tm
-from pandas.core.arrays import integer_array
+from pandas.core.arrays import FloatingArray, integer_array
 import pandas.core.ops as ops
 
 # Basic test for the arithmetic array ops
@@ -43,13 +43,12 @@ def test_sub(dtype):
 
 
 def test_div(dtype):
-    # for now division gives a float numpy array
     a = pd.array([1, 2, 3, None, 5], dtype=dtype)
     b = pd.array([0, 1, None, 3, 4], dtype=dtype)
 
     result = a / b
-    expected = np.array([np.inf, 2, np.nan, np.nan, 1.25], dtype="float64")
-    tm.assert_numpy_array_equal(result, expected)
+    expected = pd.array([np.inf, 2, None, None, 1.25], dtype="Float64")
+    tm.assert_extension_array_equal(result, expected)
 
 
 @pytest.mark.parametrize("zero, negative", [(0, False), (0.0, False), (-0.0, True)])
@@ -57,10 +56,13 @@ def test_divide_by_zero(zero, negative):
     # https://github.com/pandas-dev/pandas/issues/27398
     a = pd.array([0, 1, -1, None], dtype="Int64")
     result = a / zero
-    expected = np.array([np.nan, np.inf, -np.inf, np.nan])
+    expected = FloatingArray(
+        np.array([np.nan, np.inf, -np.inf, 1], dtype="float64"),
+        np.array([False, False, False, True]),
+    )
     if negative:
         expected *= -1
-    tm.assert_numpy_array_equal(result, expected)
+    tm.assert_extension_array_equal(result, expected)
 
 
 def test_floordiv(dtype):
@@ -97,8 +99,11 @@ def test_pow_scalar():
     tm.assert_extension_array_equal(result, expected)
 
     result = a ** np.nan
-    expected = np.array([np.nan, np.nan, 1, np.nan, np.nan], dtype="float64")
-    tm.assert_numpy_array_equal(result, expected)
+    expected = FloatingArray(
+        np.array([np.nan, np.nan, 1, np.nan, np.nan], dtype="float64"),
+        np.array([False, False, False, True, False]),
+    )
+    tm.assert_extension_array_equal(result, expected)
 
     # reversed
     a = a[1:]  # Can't raise integers to negative powers.
@@ -116,8 +121,11 @@ def test_pow_scalar():
     tm.assert_extension_array_equal(result, expected)
 
     result = np.nan ** a
-    expected = np.array([1, np.nan, np.nan, np.nan], dtype="float64")
-    tm.assert_numpy_array_equal(result, expected)
+    expected = FloatingArray(
+        np.array([1, np.nan, np.nan, np.nan], dtype="float64"),
+        np.array([False, False, True, False]),
+    )
+    tm.assert_extension_array_equal(result, expected)
 
 
 def test_pow_array():
@@ -131,10 +139,10 @@ def test_pow_array():
 def test_rpow_one_to_na():
     # https://github.com/pandas-dev/pandas/issues/22022
     # https://github.com/pandas-dev/pandas/issues/29997
-    arr = integer_array([np.nan, np.nan])
+    arr = pd.array([np.nan, np.nan], dtype="Int64")
     result = np.array([1.0, 2.0]) ** arr
-    expected = np.array([1.0, np.nan])
-    tm.assert_numpy_array_equal(result, expected)
+    expected = pd.array([1.0, np.nan], dtype="Float64")
+    tm.assert_extension_array_equal(result, expected)
 
 
 @pytest.mark.parametrize("other", [0, 0.5])
@@ -196,9 +204,17 @@ def test_arith_coerce_scalar(data, all_arithmetic_operators):
 
     result = op(s, other)
     expected = op(s.astype(float), other)
+    expected = expected.astype("Float64")
     # rfloordiv results in nan instead of inf
     if all_arithmetic_operators == "__rfloordiv__":
-        expected[(expected == np.inf) | (expected == -np.inf)] = np.nan
+        mask = (
+            ((expected == np.inf) | (expected == -np.inf)).fillna(False).to_numpy(bool)
+        )
+        expected.array._data[mask] = np.nan
+    # rmod results in NaN that wasn't NA in original nullable Series -> unmask it
+    elif all_arithmetic_operators == "__rmod__":
+        mask = (s == 0).fillna(False).to_numpy(bool)
+        expected.array._mask[mask] = False
 
     tm.assert_series_equal(result, expected)
 
@@ -211,7 +227,7 @@ def test_arithmetic_conversion(all_arithmetic_operators, other):
 
     s = pd.Series([1, 2, 3], dtype="Int64")
     result = op(s, other)
-    assert result.dtype is np.dtype("float")
+    assert result.dtype == "Float64"
 
 
 def test_cross_type_arithmetic():

From d8a0c03a9d84ceda2863507bc1f0167cd1e6985a Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 30 Nov 2020 10:03:11 +0100
Subject: [PATCH 2/5] same change for boolean

---
 pandas/core/arrays/boolean.py                  |  7 ++++---
 pandas/tests/arrays/boolean/test_arithmetic.py | 13 ++++++++-----
 pandas/tests/arrays/masked/test_arithmetic.py  |  6 +-----
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
index c6c7396a980b0..44cc108ed9cfd 100644
--- a/pandas/core/arrays/boolean.py
+++ b/pandas/core/arrays/boolean.py
@@ -706,10 +706,11 @@ def _maybe_mask_result(self, result, mask, other, op_name: str):
         if (is_float_dtype(other) or is_float(other)) or (
             op_name in ["rtruediv", "truediv"]
         ):
-            result[mask] = np.nan
-            return result
+            from pandas.core.arrays import FloatingArray
+
+            return FloatingArray(result, mask, copy=False)
 
-        if is_bool_dtype(result):
+        elif is_bool_dtype(result):
             return BooleanArray(result, mask, copy=False)
 
         elif is_integer_dtype(result):
diff --git a/pandas/tests/arrays/boolean/test_arithmetic.py b/pandas/tests/arrays/boolean/test_arithmetic.py
index 1a4ab9799e8e5..01de64568a011 100644
--- a/pandas/tests/arrays/boolean/test_arithmetic.py
+++ b/pandas/tests/arrays/boolean/test_arithmetic.py
@@ -5,6 +5,7 @@
 
 import pandas as pd
 import pandas._testing as tm
+from pandas.arrays import FloatingArray
 
 
 @pytest.fixture
@@ -51,13 +52,15 @@ def test_sub(left_array, right_array):
 
 
 def test_div(left_array, right_array):
-    # for now division gives a float numpy array
     result = left_array / right_array
-    expected = np.array(
-        [1.0, np.inf, np.nan, 0.0, np.nan, np.nan, np.nan, np.nan, np.nan],
-        dtype="float64",
+    expected = FloatingArray(
+        np.array(
+            [1.0, np.inf, np.nan, 0.0, np.nan, np.nan, np.nan, np.nan, np.nan],
+            dtype="float64",
+        ),
+        np.array([False, False, True, False, False, True, True, True, True]),
     )
-    tm.assert_numpy_array_equal(result, expected)
+    tm.assert_extension_array_equal(result, expected)
 
 
 @pytest.mark.parametrize(
diff --git a/pandas/tests/arrays/masked/test_arithmetic.py b/pandas/tests/arrays/masked/test_arithmetic.py
index 6de10fd896878..1d2833c5da276 100644
--- a/pandas/tests/arrays/masked/test_arithmetic.py
+++ b/pandas/tests/arrays/masked/test_arithmetic.py
@@ -43,11 +43,7 @@ def test_array_scalar_like_equivalence(data, all_arithmetic_operators):
     for scalar in [scalar, data.dtype.type(scalar)]:
         result = op(data, scalar)
         expected = op(data, scalar_array)
-        if isinstance(expected, ExtensionArray):
-            tm.assert_extension_array_equal(result, expected)
-        else:
-            # TODO div still gives float ndarray -> remove this once we have Float EA
-            tm.assert_numpy_array_equal(result, expected)
+        tm.assert_extension_array_equal(result, expected)
 
 
 def test_array_NA(data, all_arithmetic_operators):

From e9ecfa76ef9f7207a5fc681f14e29dbec00e792b Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 30 Nov 2020 11:50:23 +0100
Subject: [PATCH 3/5] update base extension tests

---
 pandas/tests/arrays/boolean/test_function.py  |  7 +++++++
 pandas/tests/arrays/floating/test_function.py |  7 +++++++
 pandas/tests/arrays/integer/test_function.py  |  8 ++++++++
 pandas/tests/arrays/string_/test_string.py    | 12 ++++++++++++
 pandas/tests/extension/test_boolean.py        |  6 +++++-
 pandas/tests/extension/test_floating.py       |  4 ++++
 pandas/tests/extension/test_integer.py        |  9 +++++----
 pandas/tests/extension/test_string.py         |  4 ++++
 8 files changed, 52 insertions(+), 5 deletions(-)

diff --git a/pandas/tests/arrays/boolean/test_function.py b/pandas/tests/arrays/boolean/test_function.py
index 7665c350e3443..0f8743489b412 100644
--- a/pandas/tests/arrays/boolean/test_function.py
+++ b/pandas/tests/arrays/boolean/test_function.py
@@ -85,6 +85,13 @@ def test_value_counts_na():
     tm.assert_series_equal(result, expected)
 
 
+def test_value_counts_with_normalize():
+    s = pd.Series([True, False, pd.NA], dtype="boolean")
+    result = s.value_counts(normalize=True)
+    expected = pd.Series([1, 1], index=[False, True], dtype="Float64") / 2
+    tm.assert_series_equal(result, expected)
+
+
 def test_diff():
     a = pd.array(
         [True, True, False, False, True, None, True, None, False], dtype="boolean"
diff --git a/pandas/tests/arrays/floating/test_function.py b/pandas/tests/arrays/floating/test_function.py
index baf60a363ad29..ef95eac316397 100644
--- a/pandas/tests/arrays/floating/test_function.py
+++ b/pandas/tests/arrays/floating/test_function.py
@@ -113,6 +113,13 @@ def test_value_counts_empty():
     tm.assert_series_equal(result, expected)
 
 
+def test_value_counts_with_normalize():
+    s = pd.Series([0.1, 0.2, 0.1, pd.NA], dtype="Float64")
+    result = s.value_counts(normalize=True)
+    expected = pd.Series([2, 1], index=[0.1, 0.2], dtype="Float64") / 3
+    tm.assert_series_equal(result, expected)
+
+
 @pytest.mark.parametrize("skipna", [True, False])
 @pytest.mark.parametrize("min_count", [0, 4])
 def test_floating_array_sum(skipna, min_count, dtype):
diff --git a/pandas/tests/arrays/integer/test_function.py b/pandas/tests/arrays/integer/test_function.py
index 9cdea1c71f109..521547cc7357d 100644
--- a/pandas/tests/arrays/integer/test_function.py
+++ b/pandas/tests/arrays/integer/test_function.py
@@ -127,6 +127,14 @@ def test_value_counts_empty():
     tm.assert_series_equal(result, expected)
 
 
+def test_value_counts_with_normalize():
+    # GH 33172
+    s = pd.Series([1, 2, 1, pd.NA], dtype="Int64")
+    result = s.value_counts(normalize=True)
+    expected = pd.Series([2, 1], index=[1, 2], dtype="Float64") / 3
+    tm.assert_series_equal(result, expected)
+
+
 @pytest.mark.parametrize("skipna", [True, False])
 @pytest.mark.parametrize("min_count", [0, 4])
 def test_integer_array_sum(skipna, min_count, any_nullable_int_dtype):
diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
index e35a632734779..c70d55b07661d 100644
--- a/pandas/tests/arrays/string_/test_string.py
+++ b/pandas/tests/arrays/string_/test_string.py
@@ -495,6 +495,18 @@ def test_value_counts_na(dtype, request):
     tm.assert_series_equal(result, expected)
 
 
+def test_value_counts_with_normalize(dtype, request):
+    if dtype == "arrow_string":
+        reason = "TypeError: boolean value of NA is ambiguous"
+        mark = pytest.mark.xfail(reason=reason)
+        request.node.add_marker(mark)
+
+    s = pd.Series(["a", "b", "a", pd.NA], dtype=dtype)
+    result = s.value_counts(normalize=True)
+    expected = pd.Series([2, 1], index=["a", "b"], dtype="Float64") / 3
+    tm.assert_series_equal(result, expected)
+
+
 @pytest.mark.parametrize(
     "values, expected",
     [
diff --git a/pandas/tests/extension/test_boolean.py b/pandas/tests/extension/test_boolean.py
index 8acbeaf0b8170..ced7ea9261310 100644
--- a/pandas/tests/extension/test_boolean.py
+++ b/pandas/tests/extension/test_boolean.py
@@ -130,7 +130,7 @@ def _check_op(self, s, op, other, op_name, exc=NotImplementedError):
             elif op_name in ("__truediv__", "__rtruediv__"):
                 # combine with bools does not generate the correct result
                 #  (numpy behaviour for div is to regard the bools as numeric)
-                expected = s.astype(float).combine(other, op)
+                expected = s.astype(float).combine(other, op).astype("Float64")
             if op_name == "__rpow__":
                 # for rpow, combine does not propagate NaN
                 expected[result.isna()] = np.nan
@@ -235,6 +235,10 @@ def test_searchsorted(self, data_for_sorting, as_series):
     def test_value_counts(self, all_data, dropna):
         return super().test_value_counts(all_data, dropna)
 
+    @pytest.mark.skip(reason="uses nullable integer")
+    def test_value_counts_with_normalize(self, data):
+        pass
+
     def test_argmin_argmax(self, data_for_sorting, data_missing_for_sorting):
         # override because there are only 2 unique values
 
diff --git a/pandas/tests/extension/test_floating.py b/pandas/tests/extension/test_floating.py
index 00881178de1b4..c08c31e90fecc 100644
--- a/pandas/tests/extension/test_floating.py
+++ b/pandas/tests/extension/test_floating.py
@@ -184,6 +184,10 @@ def test_value_counts(self, all_data, dropna):
 
         self.assert_series_equal(result, expected)
 
+    @pytest.mark.skip(reason="uses nullable integer")
+    def test_value_counts_with_normalize(self, data):
+        pass
+
 
 class TestCasting(base.BaseCastingTests):
     pass
diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py
index 725533765ca2c..b1461dcbd9e53 100644
--- a/pandas/tests/extension/test_integer.py
+++ b/pandas/tests/extension/test_integer.py
@@ -130,10 +130,7 @@ def _check_op(self, s, op, other, op_name, exc=NotImplementedError):
             expected = s.combine(other, op)
 
             if op_name in ("__rtruediv__", "__truediv__", "__div__"):
-                expected = expected.fillna(np.nan).astype(float)
-                if op_name == "__rtruediv__":
-                    # TODO reverse operators result in object dtype
-                    result = result.astype(float)
+                expected = expected.fillna(np.nan).astype("Float64")
             elif op_name.startswith("__r"):
                 # TODO reverse operators result in object dtype
                 # see https://github.com/pandas-dev/pandas/issues/22024
@@ -224,6 +221,10 @@ def test_value_counts(self, all_data, dropna):
 
         self.assert_series_equal(result, expected)
 
+    @pytest.mark.skip(reason="uses nullable integer")
+    def test_value_counts_with_normalize(self, data):
+        pass
+
 
 class TestCasting(base.BaseCastingTests):
     pass
diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py
index db1940226e04e..d49c4c5cf4889 100644
--- a/pandas/tests/extension/test_string.py
+++ b/pandas/tests/extension/test_string.py
@@ -118,6 +118,10 @@ class TestMethods(base.BaseMethodsTests):
     def test_value_counts(self, all_data, dropna):
         return super().test_value_counts(all_data, dropna)
 
+    @pytest.mark.skip(reason="returns nullable")
+    def test_value_counts_with_normalize(self, data):
+        pass
+
 
 class TestCasting(base.BaseCastingTests):
     pass

From ac687e5ecdec103e23aff3d987d2089aa8ba54b5 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 30 Nov 2020 14:06:29 +0100
Subject: [PATCH 4/5] fix series arithmetic tests

---
 pandas/core/arrays/integer.py          |  4 +++-
 pandas/tests/series/test_arithmetic.py | 10 ++--------
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index 2738c58e06fd9..66d92238a9b08 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -539,13 +539,15 @@ def _cmp_method(self, other, op):
         return BooleanArray(result, mask)
 
     def _arith_method(self, other, op):
+        from pandas.core.arrays import FloatingArray
+
         op_name = op.__name__
         omask = None
 
         if getattr(other, "ndim", 0) > 1:
             raise NotImplementedError("can only perform ops with 1-d structures")
 
-        if isinstance(other, IntegerArray):
+        if isinstance(other, (IntegerArray, FloatingArray)):
             other, omask = other._data, other._mask
 
         elif is_list_like(other):
diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py
index c5196cea5d3bb..09109447e799a 100644
--- a/pandas/tests/series/test_arithmetic.py
+++ b/pandas/tests/series/test_arithmetic.py
@@ -838,14 +838,8 @@ class TestInplaceOperations:
         (
             ("Int64", "Int64", "Int64", "Int64"),
             ("float", "float", "float", "float"),
-            ("Int64", "float", "float", "float"),
-            pytest.param(
-                "Int64",
-                "Float64",
-                "Float64",
-                "Float64",
-                marks=pytest.mark.xfail(reason="Not implemented yet"),
-            ),
+            ("Int64", "float", "Float64", "Float64"),
+            ("Int64", "Float64", "Float64", "Float64"),
         ),
     )
     def test_series_inplace_ops(self, dtype1, dtype2, dtype_expected, dtype_mul):

From 2649eb9029933da29fbc0d1cbbeafa78f745445d Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 1 Dec 2020 08:27:02 +0100
Subject: [PATCH 5/5] add sentence to whatsnew section of float dtype

---
 doc/source/whatsnew/v1.2.0.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 9168041a4f474..84eb3b3f15780 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -196,6 +196,9 @@ Alternatively, you can also use the dtype object:
 
    pd.Series([1.5, None], dtype=pd.Float32Dtype())
 
+Operations with the existing integer or boolean nullable data types that
+give float results will now also use the nullable floating data types (:issue:`38178`).
+
 .. warning::
 
    Experimental: the new floating data types are currently experimental, and their