pandas-dev · jreback · Jul 17, 2019 · Jul 9, 2019 · Jul 9, 2019 · Jul 9, 2019
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
@@ -400,7 +400,7 @@ of ``object`` dtype. :attr:`Series.str` will now infer the dtype data *within* t
 .. _whatsnew_0250.api_breaking.groupby_categorical:
 
 Categorical dtypes are preserved during groupby
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Previously, columns that were categorical, but not the groupby key(s) would be converted to ``object`` dtype during groupby operations. Pandas now will preserve these dtypes. (:issue:`18502`)
 
@@ -741,6 +741,44 @@ consistent with NumPy and the rest of pandas (:issue:`21801`).
    cat.argsort()
    cat[cat.argsort()]
 
+.. _whatsnew_0250.api_breaking.list_of_dict:
+
+Column order is preserved when passing a list of dicts to DataFrame
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Starting with Python 3.7 the key-order of `dict` is `guaranteed <https://mail.python.org/pipermail/python-dev/2017-December/151283.html>`_. In practice, this has been true since
+Python 3.6. The DataFrame constructor now treats a list of dicts in the same way as
+it has a list of `OrderedDict` since v0.19.0. This change applies only when pandas is running
+on python>=3.6 (:issue:`27309`). As a consequence, the column order produced by `DataFrame()`
+in such cases has changed.
+
+.. code-block:: ipython
+
+   In [1]: data = [
+      ...:     {'name': 'Joe', 'state': 'NY', 'age': 18},
+      ...:     {'name': 'Jane', 'state': 'KY', 'age': 19}
+      ...: ]
+
+*Previous behavior* (lexicographically sorted)
+
+.. code-block:: ipython
+
+   In [1]: pd.DataFrame(data)
+   Out[1]:
+      age  name state
+   0   18   Joe    NY
+   1   19  Jane    KY
+
+*New behavior*: preserve order of the dicts
+
+.. code-block:: ipython
+
+   In [2]: pd.DataFrame(data)
+   Out[2]:
+      name state  age
+   0   Joe    NY   18
+   1  Jane    KY   19
+
 .. _whatsnew_0250.api_breaking.deps:
 
 Increased minimum versions for dependencies

diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
@@ -10,7 +10,7 @@
 from pandas._libs import lib
 from pandas._libs.tslibs import IncompatibleFrequency, OutOfBoundsDatetime
 import pandas.compat as compat
-from pandas.compat import raise_with_traceback
+from pandas.compat import PY36, raise_with_traceback
 
 from pandas.core.dtypes.cast import (
     construct_1d_arraylike_from_scalar,
@@ -538,7 +538,8 @@ def _list_of_series_to_arrays(data, columns, coerce_float=False, dtype=None):
 def _list_of_dict_to_arrays(data, columns, coerce_float=False, dtype=None):
     if columns is None:
         gen = (list(x.keys()) for x in data)
-        sort = not any(isinstance(d, OrderedDict) for d in data)
+        types = (dict, OrderedDict) if PY36 else OrderedDict
+        sort = not any(isinstance(d, types) for d in data)
         columns = lib.fast_unique_multiple_list_gen(gen, sort=sort)
 
     # assure that they are of the base dict class and not of derived

diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
@@ -1119,7 +1119,7 @@ def test_constructor_generator(self):
         expected = DataFrame({0: range(10), 1: "a"})
         tm.assert_frame_equal(result, expected, check_dtype=False)
 
-    def test_constructor_list_of_dicts(self):
+    def test_constructor_list_of_odicts(self):
         data = [
             OrderedDict([["a", 1.5], ["b", 3], ["c", 4], ["d", 6]]),
             OrderedDict([["a", 1.5], ["b", 3], ["d", 6]]),
@@ -1340,6 +1340,26 @@ def test_constructor_list_of_namedtuples(self):
         result = DataFrame(tuples, columns=["y", "z"])
         tm.assert_frame_equal(result, expected)
 
+    def test_constructor_list_of_dict_order(self):
+        # GH10056
+        data = [
+            {"First": 1, "Second": 4, "Third": 7, "Fourth": 10},
+            {"Second": 5, "First": 2, "Fourth": 11, "Third": 8},
+            {"Second": 6, "First": 3, "Fourth": 12, "Third": 9, "YYY": 14, "XXX": 13},
+        ]
+        expected = DataFrame(
+            {
+                "First": [1, 2, 3],
+                "Second": [4, 5, 6],
+                "Third": [7, 8, 9],
+                "Fourth": [10, 11, 12],
+                "YYY": [None, None, 14],
+                "XXX": [None, None, 13],
+            }
+        )
+        result = DataFrame(data)
+        tm.assert_frame_equal(result, expected, check_like=not PY36)
+
     def test_constructor_orient(self, float_string_frame):
         data_dict = float_string_frame.T._series
         recons = DataFrame.from_dict(data_dict, orient="index")

diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py
@@ -7,6 +7,8 @@
 import numpy as np
 import pytest
 
+from pandas.compat import PY36
+
 from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype
 
 import pandas as pd
@@ -230,8 +232,10 @@ def test_setitem_dtype_upcast(self):
         assert df["c"].dtype == np.float64
 
         df.loc[0, "c"] = "foo"
-        expected = DataFrame([{"a": 1, "c": "foo"}, {"a": 3, "b": 2, "c": np.nan}])
-        tm.assert_frame_equal(df, expected)
+        expected = DataFrame(
+            [{"a": 1, "b": np.nan, "c": "foo"}, {"a": 3, "b": 2, "c": np.nan}]
+        )
+        tm.assert_frame_equal(df, expected, check_like=not PY36)
 
         # GH10280
         df = DataFrame(

diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py
@@ -3,6 +3,8 @@
 import numpy as np
 import pytest
 
+from pandas.compat import PY36
+
 from pandas import DataFrame, Index
 import pandas.util.testing as tm
 
@@ -351,9 +353,9 @@ def test_non_ascii_key(self):
         ).decode("utf8")
 
         testdata = {
+            b"\xc3\x9cnic\xc3\xb8de".decode("utf8"): [0, 1],
             "sub.A": [1, 3],
             "sub.B": [2, 4],
-            b"\xc3\x9cnic\xc3\xb8de".decode("utf8"): [0, 1],
         }
         expected = DataFrame(testdata)
 
@@ -366,21 +368,21 @@ def test_missing_field(self, author_missing_data):
         ex_data = [
             {
                 "info": np.nan,
-                "author_name.first": np.nan,
-                "author_name.last_name": np.nan,
                 "info.created_at": np.nan,
                 "info.last_updated": np.nan,
+                "author_name.first": np.nan,
+                "author_name.last_name": np.nan,
             },
             {
                 "info": None,
-                "author_name.first": "Jane",
-                "author_name.last_name": "Doe",
                 "info.created_at": "11/08/1993",
                 "info.last_updated": "26/05/2012",
+                "author_name.first": "Jane",
+                "author_name.last_name": "Doe",
             },
         ]
         expected = DataFrame(ex_data)
-        tm.assert_frame_equal(result, expected)
+        tm.assert_frame_equal(result, expected, check_like=not PY36)
 
     @pytest.mark.parametrize(
         "max_level,expected",
@@ -508,12 +510,13 @@ def test_missing_meta(self, missing_metadata):
             data=missing_metadata, record_path="addresses", meta="name", errors="ignore"
         )
         ex_data = [
-            ["Massillon", 9562, "OH", "Morris St.", 44646, "Alice"],
-            ["Elizabethton", 8449, "TN", "Spring St.", 37643, np.nan],
+            [9562, "Morris St.", "Massillon", "OH", 44646, "Alice"],
+            [8449, "Spring St.", "Elizabethton", "TN", 37643, np.nan],
         ]
         columns = ["city", "number", "state", "street", "zip", "name"]
+        columns = ["number", "street", "city", "state", "zip", "name"]
         expected = DataFrame(ex_data, columns=columns)
-        tm.assert_frame_equal(result, expected)
+        tm.assert_frame_equal(result, expected, check_like=not PY36)
 
     def test_donot_drop_nonevalues(self):
         # GH21356
@@ -684,7 +687,7 @@ def test_with_large_max_level(self):
                 "CreatedBy.user.family_tree.father.name": "Father001",
                 "CreatedBy.user.family_tree.father.father.Name": "Father002",
                 "CreatedBy.user.family_tree.father.father.father.name": "Father003",
-                "CreatedBy.user.family_tree.father.father.father.father.Name": "Father004",
+                "CreatedBy.user.family_tree.father.father.father.father.Name": "Father004",  # noqa: E501
             }
         ]
         output = nested_to_record(input_data, max_level=max_level)