From afa72b4ad93016e0c75e8628b429b222a1e6bd7b Mon Sep 17 00:00:00 2001 From: pilkibun Date: Tue, 9 Jul 2019 11:23:48 -0500 Subject: [PATCH 01/53] ENH: Support new case of implied column ordering in Dataframe() --- pandas/core/frame.py | 16 ++++++++++++--- pandas/tests/frame/test_constructors.py | 27 ++++++++++++++++++++++++- 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a1989fd62b6ee..064dcb69caefe 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -439,9 +439,19 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, copy=False): data = list(data) if len(data) > 0: if is_list_like(data[0]) and getattr(data[0], "ndim", 1) == 1: - if is_named_tuple(data[0]) and columns is None: + infer_columns = columns is None + if is_named_tuple(data[0]) and infer_columns: columns = data[0]._fields - arrays, columns = to_arrays(data, columns, dtype=dtype) + arrays, arr_names = to_arrays(data, columns, dtype=dtype) + arr_names = ensure_index(arr_names) + + columns = arr_names + if is_dict_like(data[0]) and infer_columns: + _columns = list(columns) + if set(_columns[: len(data[0])]) == set(data[0]): + _columns[: len(data[0])] = list(data[0]) + columns = _columns + columns = ensure_index(columns) # set the index @@ -453,7 +463,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, copy=False): else: index = ibase.default_index(len(data)) - mgr = arrays_to_mgr(arrays, columns, index, columns, dtype=dtype) + mgr = arrays_to_mgr(arrays, arr_names, index, columns, dtype=dtype) else: mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy) else: diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index eca827f82e296..98ac7cb2d8c93 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1119,7 +1119,7 @@ def test_constructor_generator(self): expected = DataFrame({0: range(10), 1: "a"}) tm.assert_frame_equal(result, expected, check_dtype=False) - def test_constructor_list_of_dicts(self): + def test_constructor_list_of_odicts(self): data = [ OrderedDict([["a", 1.5], ["b", 3], ["c", 4], ["d", 6]]), OrderedDict([["a", 1.5], ["b", 3], ["d", 6]]), @@ -1340,6 +1340,31 @@ def test_constructor_list_of_namedtuples(self): result = DataFrame(tuples, columns=["y", "z"]) tm.assert_frame_equal(result, expected) + @pytest.mark.skipif(not PY36, reason="Guaranteed dict order is Python>=3.7") + def test_constructor_list_of_dict_order(self): + # GH10056 + data = [ + {"First": 1, "Second": 4, "Third": 7, "Fourth": 10}, + {"Second": 5, "First": 2, "Fourth": 11, "Third": 8}, + {"Second": 6, "First": 3, "Fourth": 12, "Third": 9, "YYY": 14, "XXX": 13}, + ] + expected = DataFrame( + { + "First": [1, 2, 3], + "Second": [4, 5, 6], + "Third": [7, 8, 9], + "Fourth": [10, 11, 12], + "XXX": [None, None, 13], + "YYY": [None, None, 14], + } + ) + result = DataFrame(data) + assert set(result.columns) == set(expected.columns) + # order of first 4 columns dictated by data[0] + tm.assert_frame_equal(result.iloc[:, :4], expected.iloc[:, :4]) + # the order of the remaining two is arbitrary, but assert the data matches + tm.assert_frame_equal(result[["XXX", "YYY"]], expected[["XXX", "YYY"]]) + def test_constructor_orient(self, float_string_frame): data_dict = float_string_frame.T._series recons = DataFrame.from_dict(data_dict, orient="index") From 8a4113c8ecbf688564b9fc1bace4bfef38cd01be Mon Sep 17 00:00:00 2001 From: pilkibun Date: Tue, 9 Jul 2019 13:22:23 -0500 Subject: [PATCH 02/53] Safer --- pandas/core/frame.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 064dcb69caefe..b88e99156d687 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -448,8 +448,11 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, copy=False): columns = arr_names if is_dict_like(data[0]) and infer_columns: _columns = list(columns) - if set(_columns[: len(data[0])]) == set(data[0]): - _columns[: len(data[0])] = list(data[0]) + known_columns = set(data[0]) + extra_columns = [_ for _ in _columns if _ not in known_columns] + if set(_columns[: len(data[0])]) == known_columns: + _columns[: len(known_columns)] = list(data[0]) + _columns[len(known_columns):] = extra_columns columns = _columns columns = ensure_index(columns) From 408ad8b4be0b8c8f55bba433f7b4770e332a481f Mon Sep 17 00:00:00 2001 From: pilkibun Date: Tue, 9 Jul 2019 15:38:28 -0500 Subject: [PATCH 03/53] Restrict to Index case --- pandas/core/frame.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b88e99156d687..879e3d6c18418 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -446,7 +446,8 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, copy=False): arr_names = ensure_index(arr_names) columns = arr_names - if is_dict_like(data[0]) and infer_columns: + # GH#10056 + if is_dict_like(data[0]) and infer_columns and (type(columns) is Index): _columns = list(columns) known_columns = set(data[0]) extra_columns = [_ for _ in _columns if _ not in known_columns] From b7320961599c861be2076527d34295d2b3044bdf Mon Sep 17 00:00:00 2001 From: pilkibun Date: Tue, 9 Jul 2019 16:20:31 -0500 Subject: [PATCH 04/53] Fix tests --- pandas/tests/io/json/test_normalize.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index a32103d7b29b9..5e32ef381c19c 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -365,16 +365,16 @@ def test_missing_field(self, author_missing_data): result = json_normalize(author_missing_data) ex_data = [ { - "info": np.nan, "author_name.first": np.nan, "author_name.last_name": np.nan, + "info": np.nan, "info.created_at": np.nan, "info.last_updated": np.nan, }, { - "info": None, "author_name.first": "Jane", "author_name.last_name": "Doe", + "info": None, "info.created_at": "11/08/1993", "info.last_updated": "26/05/2012", }, @@ -508,10 +508,10 @@ def test_missing_meta(self, missing_metadata): data=missing_metadata, record_path="addresses", meta="name", errors="ignore" ) ex_data = [ - ["Massillon", 9562, "OH", "Morris St.", 44646, "Alice"], - ["Elizabethton", 8449, "TN", "Spring St.", 37643, np.nan], + [9562, "Morris St.", "Massillon", "OH", 44646, "Alice"], + [8449, "Spring St.", "Elizabethton", "TN", 37643, np.nan], ] - columns = ["city", "number", "state", "street", "zip", "name"] + columns = ["number", "street", "city", "state", "zip", "name"] expected = DataFrame(ex_data, columns=columns) tm.assert_frame_equal(result, expected) From be57fd9f46a8c4e87fe658afae51d340b1cbc317 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Tue, 9 Jul 2019 17:02:54 -0500 Subject: [PATCH 05/53] Style --- pandas/core/frame.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 879e3d6c18418..3e84d3332e723 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -447,13 +447,17 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, copy=False): columns = arr_names # GH#10056 - if is_dict_like(data[0]) and infer_columns and (type(columns) is Index): + if ( + is_dict_like(data[0]) + and infer_columns + and (type(columns) is Index) + ): _columns = list(columns) known_columns = set(data[0]) extra_columns = [_ for _ in _columns if _ not in known_columns] if set(_columns[: len(data[0])]) == known_columns: _columns[: len(known_columns)] = list(data[0]) - _columns[len(known_columns):] = extra_columns + _columns[len(known_columns) :] = extra_columns columns = _columns columns = ensure_index(columns) From 717716b566c0855685d8dafd8a527812cb6d84c7 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Tue, 9 Jul 2019 18:40:09 -0500 Subject: [PATCH 06/53] Fix test --- pandas/tests/io/json/test_normalize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index 5e32ef381c19c..808dc30250f1d 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -351,9 +351,9 @@ def test_non_ascii_key(self): ).decode("utf8") testdata = { + b"\xc3\x9cnic\xc3\xb8de".decode("utf8"): [0, 1], "sub.A": [1, 3], "sub.B": [2, 4], - b"\xc3\x9cnic\xc3\xb8de".decode("utf8"): [0, 1], } expected = DataFrame(testdata) From e9d49894579c57544af17cad12d2e1b545f9fdb1 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Wed, 10 Jul 2019 00:37:40 -0500 Subject: [PATCH 07/53] rename --- pandas/core/frame.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3e84d3332e723..4415ca24899d7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -439,8 +439,8 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, copy=False): data = list(data) if len(data) > 0: if is_list_like(data[0]) and getattr(data[0], "ndim", 1) == 1: - infer_columns = columns is None - if is_named_tuple(data[0]) and infer_columns: + infer_columns_names = columns is None + if is_named_tuple(data[0]) and infer_columns_names: columns = data[0]._fields arrays, arr_names = to_arrays(data, columns, dtype=dtype) arr_names = ensure_index(arr_names) @@ -449,7 +449,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, copy=False): # GH#10056 if ( is_dict_like(data[0]) - and infer_columns + and infer_columns_names and (type(columns) is Index) ): _columns = list(columns) From 63adbfefaad4006dcd2de60cbedd00a8e68e3c62 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Wed, 10 Jul 2019 00:38:23 -0500 Subject: [PATCH 08/53] Restrict to PY37 --- pandas/core/frame.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4415ca24899d7..7390b750ebe7a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8,6 +8,7 @@ alignment and a host of useful data manipulation methods having to do with the labeling information """ +from pandas.compat import PY37 import collections from collections import OrderedDict, abc import functools @@ -448,7 +449,8 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, copy=False): columns = arr_names # GH#10056 if ( - is_dict_like(data[0]) + PY37 + and is_dict_like(data[0]) and infer_columns_names and (type(columns) is Index) ): From 0ed89ffe33a7e4f7709de6267aad189904b4aba5 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Wed, 10 Jul 2019 00:39:37 -0500 Subject: [PATCH 09/53] Style --- pandas/core/frame.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7390b750ebe7a..3c1ec61fa42c6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8,7 +8,6 @@ alignment and a host of useful data manipulation methods having to do with the labeling information """ -from pandas.compat import PY37 import collections from collections import OrderedDict, abc import functools @@ -25,7 +24,7 @@ from pandas._config import get_option from pandas._libs import algos as libalgos, lib -from pandas.compat import PY36, raise_with_traceback +from pandas.compat import PY36, PY37, raise_with_traceback from pandas.compat.numpy import function as nv from pandas.util._decorators import ( Appender, From 0a480162a3f54ac576754281f838428705e39727 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Wed, 10 Jul 2019 00:40:30 -0500 Subject: [PATCH 10/53] Restrict to PY36 --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3c1ec61fa42c6..e5d274fcfc597 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -24,7 +24,7 @@ from pandas._config import get_option from pandas._libs import algos as libalgos, lib -from pandas.compat import PY36, PY37, raise_with_traceback +from pandas.compat import PY36, raise_with_traceback from pandas.compat.numpy import function as nv from pandas.util._decorators import ( Appender, @@ -448,7 +448,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, copy=False): columns = arr_names # GH#10056 if ( - PY37 + PY36 and is_dict_like(data[0]) and infer_columns_names and (type(columns) is Index) From 4b7353650d30045c1c3afde80682cd26cb58c45e Mon Sep 17 00:00:00 2001 From: pilkibun Date: Wed, 10 Jul 2019 01:16:03 -0500 Subject: [PATCH 11/53] Work around fake test failure on PY35 --- pandas/tests/io/json/test_normalize.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index 808dc30250f1d..1e5caea9c3597 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -4,6 +4,7 @@ import pytest from pandas import DataFrame, Index +from pandas.compat import PY36 import pandas.util.testing as tm from pandas.io.json import json_normalize @@ -513,6 +514,11 @@ def test_missing_meta(self, missing_metadata): ] columns = ["number", "street", "city", "state", "zip", "name"] expected = DataFrame(ex_data, columns=columns) + if not PY36: + # json_normalize order is not guaranteed, so columns + # depends on implementation. Opt to test on PY36/37 + # and force column order on PY35. + expected = expected[columns] tm.assert_frame_equal(result, expected) def test_donot_drop_nonevalues(self): From eb64d319dfdcfd6db629721eebe48f80cfd3c631 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Wed, 10 Jul 2019 01:39:04 -0500 Subject: [PATCH 12/53] Style --- pandas/tests/io/json/test_normalize.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index 1e5caea9c3597..c8d6cc83adcd2 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -3,8 +3,9 @@ import numpy as np import pytest -from pandas import DataFrame, Index from pandas.compat import PY36 + +from pandas import DataFrame, Index import pandas.util.testing as tm from pandas.io.json import json_normalize From b5db0bcdf928253b111df667c6e3a0873a89d372 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Wed, 10 Jul 2019 11:50:47 -0500 Subject: [PATCH 13/53] fix test --- pandas/tests/io/json/test_normalize.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index c8d6cc83adcd2..793063962c495 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -520,6 +520,7 @@ def test_missing_meta(self, missing_metadata): # depends on implementation. Opt to test on PY36/37 # and force column order on PY35. expected = expected[columns] + result = result[columns] tm.assert_frame_equal(result, expected) def test_donot_drop_nonevalues(self): From 76d7d5423abfe451478bdb6539d2f548fa89a37c Mon Sep 17 00:00:00 2001 From: pilkibun Date: Thu, 11 Jul 2019 02:54:26 -0500 Subject: [PATCH 14/53] ENH: treat dict like OrderedDict for PY36+ in dataframe constructor --- pandas/core/internals/construction.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index b4752039cf5b1..346c7dce20a40 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -10,7 +10,7 @@ from pandas._libs import lib from pandas._libs.tslibs import IncompatibleFrequency, OutOfBoundsDatetime import pandas.compat as compat -from pandas.compat import raise_with_traceback +from pandas.compat import raise_with_traceback, PY36 from pandas.core.dtypes.cast import ( construct_1d_arraylike_from_scalar, @@ -538,7 +538,8 @@ def _list_of_series_to_arrays(data, columns, coerce_float=False, dtype=None): def _list_of_dict_to_arrays(data, columns, coerce_float=False, dtype=None): if columns is None: gen = (list(x.keys()) for x in data) - sort = not any(isinstance(d, OrderedDict) for d in data) + additional = dict if PY36 else OrderedDict + sort = not any(isinstance(d, (additional, OrderedDict)) for d in data) columns = lib.fast_unique_multiple_list_gen(gen, sort=sort) # assure that they are of the base dict class and not of derived From 4571e167e347a44f70d281879377f938c300350f Mon Sep 17 00:00:00 2001 From: pilkibun Date: Thu, 11 Jul 2019 02:56:38 -0500 Subject: [PATCH 15/53] Restore frame.py --- pandas/core/frame.py | 25 +++---------------------- 1 file changed, 3 insertions(+), 22 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e5d274fcfc597..a1989fd62b6ee 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -439,28 +439,9 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, copy=False): data = list(data) if len(data) > 0: if is_list_like(data[0]) and getattr(data[0], "ndim", 1) == 1: - infer_columns_names = columns is None - if is_named_tuple(data[0]) and infer_columns_names: + if is_named_tuple(data[0]) and columns is None: columns = data[0]._fields - arrays, arr_names = to_arrays(data, columns, dtype=dtype) - arr_names = ensure_index(arr_names) - - columns = arr_names - # GH#10056 - if ( - PY36 - and is_dict_like(data[0]) - and infer_columns_names - and (type(columns) is Index) - ): - _columns = list(columns) - known_columns = set(data[0]) - extra_columns = [_ for _ in _columns if _ not in known_columns] - if set(_columns[: len(data[0])]) == known_columns: - _columns[: len(known_columns)] = list(data[0]) - _columns[len(known_columns) :] = extra_columns - columns = _columns - + arrays, columns = to_arrays(data, columns, dtype=dtype) columns = ensure_index(columns) # set the index @@ -472,7 +453,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, copy=False): else: index = ibase.default_index(len(data)) - mgr = arrays_to_mgr(arrays, arr_names, index, columns, dtype=dtype) + mgr = arrays_to_mgr(arrays, columns, index, columns, dtype=dtype) else: mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy) else: From 71f3c7938aac753afd2e515dc7528cf30bc25e45 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Thu, 11 Jul 2019 03:28:35 -0500 Subject: [PATCH 16/53] restore test_normalize.py --- pandas/tests/io/json/test_normalize.py | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index 793063962c495..a625c912d1d8e 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -3,8 +3,6 @@ import numpy as np import pytest -from pandas.compat import PY36 - from pandas import DataFrame, Index import pandas.util.testing as tm @@ -353,9 +351,9 @@ def test_non_ascii_key(self): ).decode("utf8") testdata = { - b"\xc3\x9cnic\xc3\xb8de".decode("utf8"): [0, 1], "sub.A": [1, 3], "sub.B": [2, 4], + b"\xc3\x9cnic\xc3\xb8de".decode("utf8"): [0, 1], } expected = DataFrame(testdata) @@ -367,16 +365,16 @@ def test_missing_field(self, author_missing_data): result = json_normalize(author_missing_data) ex_data = [ { + "info": np.nan, "author_name.first": np.nan, "author_name.last_name": np.nan, - "info": np.nan, "info.created_at": np.nan, "info.last_updated": np.nan, }, { + "info": None, "author_name.first": "Jane", "author_name.last_name": "Doe", - "info": None, "info.created_at": "11/08/1993", "info.last_updated": "26/05/2012", }, @@ -510,17 +508,11 @@ def test_missing_meta(self, missing_metadata): data=missing_metadata, record_path="addresses", meta="name", errors="ignore" ) ex_data = [ - [9562, "Morris St.", "Massillon", "OH", 44646, "Alice"], - [8449, "Spring St.", "Elizabethton", "TN", 37643, np.nan], + ["Massillon", 9562, "OH", "Morris St.", 44646, "Alice"], + ["Elizabethton", 8449, "TN", "Spring St.", 37643, np.nan], ] - columns = ["number", "street", "city", "state", "zip", "name"] + columns = ["city", "number", "state", "street", "zip", "name"] expected = DataFrame(ex_data, columns=columns) - if not PY36: - # json_normalize order is not guaranteed, so columns - # depends on implementation. Opt to test on PY36/37 - # and force column order on PY35. - expected = expected[columns] - result = result[columns] tm.assert_frame_equal(result, expected) def test_donot_drop_nonevalues(self): @@ -692,7 +684,7 @@ def test_with_large_max_level(self): "CreatedBy.user.family_tree.father.name": "Father001", "CreatedBy.user.family_tree.father.father.Name": "Father002", "CreatedBy.user.family_tree.father.father.father.name": "Father003", - "CreatedBy.user.family_tree.father.father.father.father.Name": "Father004", + "CreatedBy.user.family_tree.father.father.father.father.Name": "Father004", # noqa: E501 } ] output = nested_to_record(input_data, max_level=max_level) From a3987e72023b2489b22f847232b8011c73db3324 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Thu, 11 Jul 2019 03:29:05 -0500 Subject: [PATCH 17/53] Skip some json tests on Py35 --- pandas/tests/io/json/test_normalize.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index a625c912d1d8e..7dd0ce9f15acd 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -6,6 +6,8 @@ from pandas import DataFrame, Index import pandas.util.testing as tm +from pandas.compat import PY36 + from pandas.io.json import json_normalize from pandas.io.json._normalize import nested_to_record @@ -360,21 +362,24 @@ def test_non_ascii_key(self): result = json_normalize(json.loads(testjson)) tm.assert_frame_equal(result, expected) + @pytest.mark.skipif( + not PY36, reason="Test vectors depend on " "PY36 guaranteed dict key ordering" + ) def test_missing_field(self, author_missing_data): # GH20030: result = json_normalize(author_missing_data) ex_data = [ { - "info": np.nan, "author_name.first": np.nan, "author_name.last_name": np.nan, + "info": np.nan, "info.created_at": np.nan, "info.last_updated": np.nan, }, { - "info": None, "author_name.first": "Jane", "author_name.last_name": "Doe", + "info": None, "info.created_at": "11/08/1993", "info.last_updated": "26/05/2012", }, @@ -500,6 +505,9 @@ def test_json_normalize_errors(self, missing_metadata): errors="raise", ) + @pytest.mark.skipif( + not PY36, reason="Test vectors depend on " "PY36 guaranteed dict key ordering" + ) def test_missing_meta(self, missing_metadata): # GH25468 # If metadata is nullable with errors set to ignore, the null values From e72b6666e3162a975ed0df9ddc1e5b166c6d4a56 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Thu, 11 Jul 2019 03:33:33 -0500 Subject: [PATCH 18/53] fix test --- pandas/tests/indexing/test_indexing.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index e06047b52ac15..e1a27d5fbf7e5 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -231,6 +231,7 @@ def test_setitem_dtype_upcast(self): df.loc[0, "c"] = "foo" expected = DataFrame([{"a": 1, "c": "foo"}, {"a": 3, "b": 2, "c": np.nan}]) + expected = expected[df.columns] # compensate dict key ordering tm.assert_frame_equal(df, expected) # GH10280 From 104c2a76b9ef832a182d3dd6d4b7f826876cae1a Mon Sep 17 00:00:00 2001 From: pilkibun Date: Thu, 11 Jul 2019 03:33:55 -0500 Subject: [PATCH 19/53] black --- pandas/core/internals/construction.py | 2 +- pandas/tests/indexing/test_indexing.py | 2 +- pandas/tests/io/json/test_normalize.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 346c7dce20a40..509736df4cf7d 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -10,7 +10,7 @@ from pandas._libs import lib from pandas._libs.tslibs import IncompatibleFrequency, OutOfBoundsDatetime import pandas.compat as compat -from pandas.compat import raise_with_traceback, PY36 +from pandas.compat import PY36, raise_with_traceback from pandas.core.dtypes.cast import ( construct_1d_arraylike_from_scalar, diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index e1a27d5fbf7e5..e86ae5bef5000 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -231,7 +231,7 @@ def test_setitem_dtype_upcast(self): df.loc[0, "c"] = "foo" expected = DataFrame([{"a": 1, "c": "foo"}, {"a": 3, "b": 2, "c": np.nan}]) - expected = expected[df.columns] # compensate dict key ordering + expected = expected[df.columns] # compensate dict key ordering tm.assert_frame_equal(df, expected) # GH10280 diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index 7dd0ce9f15acd..b0290b6b09b7f 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -3,11 +3,11 @@ import numpy as np import pytest +from pandas.compat import PY36 + from pandas import DataFrame, Index import pandas.util.testing as tm -from pandas.compat import PY36 - from pandas.io.json import json_normalize from pandas.io.json._normalize import nested_to_record From 4d52802957ae1fa29ca8779755c68fdffc42aee2 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Fri, 12 Jul 2019 12:16:37 -0500 Subject: [PATCH 20/53] fix tests --- pandas/tests/io/json/test_normalize.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index b0290b6b09b7f..ae552260321c2 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -353,9 +353,9 @@ def test_non_ascii_key(self): ).decode("utf8") testdata = { + b"\xc3\x9cnic\xc3\xb8de".decode("utf8"): [0, 1], "sub.A": [1, 3], "sub.B": [2, 4], - b"\xc3\x9cnic\xc3\xb8de".decode("utf8"): [0, 1], } expected = DataFrame(testdata) @@ -370,18 +370,18 @@ def test_missing_field(self, author_missing_data): result = json_normalize(author_missing_data) ex_data = [ { - "author_name.first": np.nan, - "author_name.last_name": np.nan, "info": np.nan, "info.created_at": np.nan, "info.last_updated": np.nan, + "author_name.first": np.nan, + "author_name.last_name": np.nan, }, { - "author_name.first": "Jane", - "author_name.last_name": "Doe", "info": None, "info.created_at": "11/08/1993", "info.last_updated": "26/05/2012", + "author_name.first": "Jane", + "author_name.last_name": "Doe", }, ] expected = DataFrame(ex_data) @@ -516,10 +516,11 @@ def test_missing_meta(self, missing_metadata): data=missing_metadata, record_path="addresses", meta="name", errors="ignore" ) ex_data = [ - ["Massillon", 9562, "OH", "Morris St.", 44646, "Alice"], - ["Elizabethton", 8449, "TN", "Spring St.", 37643, np.nan], + [9562, "Morris St.", "Massillon", "OH", 44646, "Alice"], + [8449, "Spring St.", "Elizabethton", "TN", 37643, np.nan], ] columns = ["city", "number", "state", "street", "zip", "name"] + columns = ["number", "street", "city", "state", "zip", "name"] expected = DataFrame(ex_data, columns=columns) tm.assert_frame_equal(result, expected) From 5371de5477b61b52bb605b377201b6e03f1dcd82 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Thu, 11 Jul 2019 14:20:35 -0500 Subject: [PATCH 21/53] CI From e8c27e5cea5efea1fb42519b7acbb1b13dd95606 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Thu, 11 Jul 2019 14:46:23 -0500 Subject: [PATCH 22/53] clean isinstance check --- pandas/core/internals/construction.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 509736df4cf7d..702ab41c0b2f3 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -538,8 +538,8 @@ def _list_of_series_to_arrays(data, columns, coerce_float=False, dtype=None): def _list_of_dict_to_arrays(data, columns, coerce_float=False, dtype=None): if columns is None: gen = (list(x.keys()) for x in data) - additional = dict if PY36 else OrderedDict - sort = not any(isinstance(d, (additional, OrderedDict)) for d in data) + types = (dict, OrderedDict) if PY36 else OrderedDict + sort = not any(isinstance(d, types) for d in data) columns = lib.fast_unique_multiple_list_gen(gen, sort=sort) # assure that they are of the base dict class and not of derived From b38f65a415ae1efa36989ad4ce534996c6245dc6 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Thu, 11 Jul 2019 14:48:51 -0500 Subject: [PATCH 23/53] messages --- pandas/tests/frame/test_constructors.py | 2 +- pandas/tests/io/json/test_normalize.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 98ac7cb2d8c93..d0c6026e82ac7 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1340,7 +1340,7 @@ def test_constructor_list_of_namedtuples(self): result = DataFrame(tuples, columns=["y", "z"]) tm.assert_frame_equal(result, expected) - @pytest.mark.skipif(not PY36, reason="Guaranteed dict order is Python>=3.7") + @pytest.mark.skipif(not PY36, reason="Guaranteed dict order requires Python>=3.6") def test_constructor_list_of_dict_order(self): # GH10056 data = [ diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index ae552260321c2..cc8825f122785 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -363,7 +363,7 @@ def test_non_ascii_key(self): tm.assert_frame_equal(result, expected) @pytest.mark.skipif( - not PY36, reason="Test vectors depend on " "PY36 guaranteed dict key ordering" + not PY36, reason="Test vectors depends on PY36 guaranteed dict key ordering" ) def test_missing_field(self, author_missing_data): # GH20030: @@ -506,7 +506,7 @@ def test_json_normalize_errors(self, missing_metadata): ) @pytest.mark.skipif( - not PY36, reason="Test vectors depend on " "PY36 guaranteed dict key ordering" + not PY36, reason="Test vectors depends on PY36 guaranteed dict key ordering" ) def test_missing_meta(self, missing_metadata): # GH25468 From 32e5b003c08cc0bb4e64ff970e2b6d5f6d63c328 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Thu, 11 Jul 2019 14:50:41 -0500 Subject: [PATCH 24/53] Update test after behavior change --- pandas/tests/frame/test_constructors.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index d0c6026e82ac7..6e9c717a028b4 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1354,16 +1354,13 @@ def test_constructor_list_of_dict_order(self): "Second": [4, 5, 6], "Third": [7, 8, 9], "Fourth": [10, 11, 12], - "XXX": [None, None, 13], "YYY": [None, None, 14], + "XXX": [None, None, 13], } ) result = DataFrame(data) assert set(result.columns) == set(expected.columns) - # order of first 4 columns dictated by data[0] - tm.assert_frame_equal(result.iloc[:, :4], expected.iloc[:, :4]) - # the order of the remaining two is arbitrary, but assert the data matches - tm.assert_frame_equal(result[["XXX", "YYY"]], expected[["XXX", "YYY"]]) + tm.assert_frame_equal(result, expected) def test_constructor_orient(self, float_string_frame): data_dict = float_string_frame.T._series From 864a1168adae967d5a65fa0d095e4019ceacd422 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Thu, 11 Jul 2019 14:55:14 -0500 Subject: [PATCH 25/53] Ignore column order on py35 --- pandas/tests/frame/test_constructors.py | 3 +-- pandas/tests/io/json/test_normalize.py | 10 ++-------- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 6e9c717a028b4..1a6b4f2705937 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1340,7 +1340,6 @@ def test_constructor_list_of_namedtuples(self): result = DataFrame(tuples, columns=["y", "z"]) tm.assert_frame_equal(result, expected) - @pytest.mark.skipif(not PY36, reason="Guaranteed dict order requires Python>=3.6") def test_constructor_list_of_dict_order(self): # GH10056 data = [ @@ -1360,7 +1359,7 @@ def test_constructor_list_of_dict_order(self): ) result = DataFrame(data) assert set(result.columns) == set(expected.columns) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected, check_like=PY36) def test_constructor_orient(self, float_string_frame): data_dict = float_string_frame.T._series diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index cc8825f122785..8a2cb5a7a8673 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -362,9 +362,6 @@ def test_non_ascii_key(self): result = json_normalize(json.loads(testjson)) tm.assert_frame_equal(result, expected) - @pytest.mark.skipif( - not PY36, reason="Test vectors depends on PY36 guaranteed dict key ordering" - ) def test_missing_field(self, author_missing_data): # GH20030: result = json_normalize(author_missing_data) @@ -385,7 +382,7 @@ def test_missing_field(self, author_missing_data): }, ] expected = DataFrame(ex_data) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected, check_like=PY36) @pytest.mark.parametrize( "max_level,expected", @@ -505,9 +502,6 @@ def test_json_normalize_errors(self, missing_metadata): errors="raise", ) - @pytest.mark.skipif( - not PY36, reason="Test vectors depends on PY36 guaranteed dict key ordering" - ) def test_missing_meta(self, missing_metadata): # GH25468 # If metadata is nullable with errors set to ignore, the null values @@ -522,7 +516,7 @@ def test_missing_meta(self, missing_metadata): columns = ["city", "number", "state", "street", "zip", "name"] columns = ["number", "street", "city", "state", "zip", "name"] expected = DataFrame(ex_data, columns=columns) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected, check_like=PY36) def test_donot_drop_nonevalues(self): # GH21356 From 9cb436284d95198a001d9ae33a796fe1751bf4ff Mon Sep 17 00:00:00 2001 From: pilkibun Date: Thu, 11 Jul 2019 15:18:29 -0500 Subject: [PATCH 26/53] clean --- pandas/tests/frame/test_constructors.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 1a6b4f2705937..2b95108146c6a 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1358,7 +1358,6 @@ def test_constructor_list_of_dict_order(self): } ) result = DataFrame(data) - assert set(result.columns) == set(expected.columns) tm.assert_frame_equal(result, expected, check_like=PY36) def test_constructor_orient(self, float_string_frame): From 355979e300647e249c486c2a2d6db91ea474af16 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Thu, 11 Jul 2019 15:44:18 -0500 Subject: [PATCH 27/53] fix --- pandas/tests/indexing/test_indexing.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index e86ae5bef5000..d3950b9cbf8f5 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -231,8 +231,9 @@ def test_setitem_dtype_upcast(self): df.loc[0, "c"] = "foo" expected = DataFrame([{"a": 1, "c": "foo"}, {"a": 3, "b": 2, "c": np.nan}]) - expected = expected[df.columns] # compensate dict key ordering - tm.assert_frame_equal(df, expected) + # dataframe dict key order is preserved, so these two + # sequences of ops lead to different column order + tm.assert_frame_equal(df, expected, check_like=True) # GH10280 df = DataFrame( From 887f201765047c909300a634d26add7ec1cbeb38 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Thu, 11 Jul 2019 15:44:50 -0500 Subject: [PATCH 28/53] fix --- pandas/tests/frame/test_constructors.py | 2 +- pandas/tests/io/json/test_normalize.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 2b95108146c6a..736258899a41e 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1358,7 +1358,7 @@ def test_constructor_list_of_dict_order(self): } ) result = DataFrame(data) - tm.assert_frame_equal(result, expected, check_like=PY36) + tm.assert_frame_equal(result, expected, check_like=not PY36) def test_constructor_orient(self, float_string_frame): data_dict = float_string_frame.T._series diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index 8a2cb5a7a8673..3ceddfc3c1db4 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -382,7 +382,7 @@ def test_missing_field(self, author_missing_data): }, ] expected = DataFrame(ex_data) - tm.assert_frame_equal(result, expected, check_like=PY36) + tm.assert_frame_equal(result, expected, check_like=not PY36) @pytest.mark.parametrize( "max_level,expected", @@ -516,7 +516,7 @@ def test_missing_meta(self, missing_metadata): columns = ["city", "number", "state", "street", "zip", "name"] columns = ["number", "street", "city", "state", "zip", "name"] expected = DataFrame(ex_data, columns=columns) - tm.assert_frame_equal(result, expected, check_like=PY36) + tm.assert_frame_equal(result, expected, check_like=not PY36) def test_donot_drop_nonevalues(self): # GH21356 From d65a0851169648ba7eb576c03ed6aca0e89798c7 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Thu, 11 Jul 2019 16:57:31 -0500 Subject: [PATCH 29/53] whatsnew --- doc/source/whatsnew/v0.25.0.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 193a0edee5e96..54cbf8367ebbc 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -181,6 +181,15 @@ The repr now looks like this: }] json_normalize(data, max_level=1) +Column order is preserved when passing a list of dicts to DataFrame +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A long requested feature, this has been the behavior for lists of +`OrderedDict` since v0.19.0. With dict key-order now officially +`guaranteed ` +for python>=3.7, and in practice since python 3.6, the DataFrame +constructor now treats dicts the same way in this case, when pandas +is running on python>=3.6 (:issue:`10056`). .. _whatsnew_0250.enhancements.other: From 2e8247315f4d1fc6e10bec5a24865994c5174e87 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Thu, 11 Jul 2019 16:59:11 -0500 Subject: [PATCH 30/53] fix issue ref --- doc/source/whatsnew/v0.25.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 54cbf8367ebbc..7b63aeb205318 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -189,7 +189,7 @@ A long requested feature, this has been the behavior for lists of `guaranteed ` for python>=3.7, and in practice since python 3.6, the DataFrame constructor now treats dicts the same way in this case, when pandas -is running on python>=3.6 (:issue:`10056`). +is running on python>=3.6 (:issue:`27309`). .. _whatsnew_0250.enhancements.other: From 21ec5a7f9172bfe1631426b7712e50298badfc29 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Thu, 11 Jul 2019 17:58:47 -0500 Subject: [PATCH 31/53] fix header type of unrelated issue --- doc/source/whatsnew/v0.25.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 7b63aeb205318..928856cbe634c 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -409,7 +409,7 @@ of ``object`` dtype. :attr:`Series.str` will now infer the dtype data *within* t .. _whatsnew_0250.api_breaking.groupby_categorical: Categorical dtypes are preserved during groupby -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Previously, columns that were categorical, but not the groupby key(s) would be converted to ``object`` dtype during groupby operations. Pandas now will preserve these dtypes. (:issue:`18502`) From 51ff714e9b05b2b97cbe5da6bf7b1e958b501b63 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Thu, 11 Jul 2019 18:14:32 -0500 Subject: [PATCH 32/53] whatsnew --- doc/source/whatsnew/v0.25.0.rst | 47 ++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 928856cbe634c..71f6c53636375 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -181,15 +181,6 @@ The repr now looks like this: }] json_normalize(data, max_level=1) -Column order is preserved when passing a list of dicts to DataFrame -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -A long requested feature, this has been the behavior for lists of -`OrderedDict` since v0.19.0. With dict key-order now officially -`guaranteed ` -for python>=3.7, and in practice since python 3.6, the DataFrame -constructor now treats dicts the same way in this case, when pandas -is running on python>=3.6 (:issue:`27309`). .. _whatsnew_0250.enhancements.other: @@ -750,6 +741,44 @@ consistent with NumPy and the rest of pandas (:issue:`21801`). cat.argsort() cat[cat.argsort()] +.. _whatsnew_0250.api_breaking.list_of_dict: + +Column order is preserved when passing a list of dicts to DataFrame +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Starting with Python 3.7 the key-order of `dict` is `guaranteed `_. In practice, this has been true since +Python 3.6. The DataFrame constructor now treats a list of dicts in the same way as +it has a list of `OrderedDict` since v0.19.0. This change applies only when pandas is running +on python>=3.6 (:issue:`27309`). As a consequence, the column order produced by `DataFrame()` +in such cases has changed. + +.. code-block:: ipython + + In [1]: data= [ + ...: {'name': 'Joe', 'state': 'NY', 'age': 18}, + ...: {'name': 'Jane', 'state': 'KY', 'age': 19} + ...: ] + +*Previous behavior* + +.. code-block:: ipython + + In [1]: pd.DataFrame(data) + Out[1]: + age name state + 0 18 Joe NY + 1 19 Jane KY + +*New behavior* + +.. code-block:: ipython + + In [2]: pd.DataFrame(data) + Out[2]: + name state age + 0 Joe NY 18 + 1 Jane KY 19 + .. _whatsnew_0250.api_breaking.deps: Increased minimum versions for dependencies From 92d83ea16c23ae71bc1a1a17a6fe9d6e4b1f9d8d Mon Sep 17 00:00:00 2001 From: pilkibun Date: Thu, 11 Jul 2019 18:39:52 -0500 Subject: [PATCH 33/53] checks --- doc/source/whatsnew/v0.25.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 71f6c53636375..7901592832480 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -754,7 +754,7 @@ in such cases has changed. .. code-block:: ipython - In [1]: data= [ + In [1]: data = [ ...: {'name': 'Joe', 'state': 'NY', 'age': 18}, ...: {'name': 'Jane', 'state': 'KY', 'age': 19} ...: ] From 85da582b762a42af201bada77d1ec86ef88d411b Mon Sep 17 00:00:00 2001 From: pilkibun <51503352+pilkibun@users.noreply.github.com> Date: Fri, 12 Jul 2019 00:21:53 +0000 Subject: [PATCH 34/53] Update pandas/tests/indexing/test_indexing.py Co-Authored-By: Joris Van den Bossche --- pandas/tests/indexing/test_indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index d3950b9cbf8f5..7cd7b1024151e 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -230,7 +230,7 @@ def test_setitem_dtype_upcast(self): assert df["c"].dtype == np.float64 df.loc[0, "c"] = "foo" - expected = DataFrame([{"a": 1, "c": "foo"}, {"a": 3, "b": 2, "c": np.nan}]) + expected = DataFrame([{"a": 1, "b": np.nan, "c": "foo"}, {"a": 3, "b": 2, "c": np.nan}]) # dataframe dict key order is preserved, so these two # sequences of ops lead to different column order tm.assert_frame_equal(df, expected, check_like=True) From 4f9228c09166aed7ba78f0a865ea9a4609d40491 Mon Sep 17 00:00:00 2001 From: pilkibun <51503352+pilkibun@users.noreply.github.com> Date: Fri, 12 Jul 2019 00:22:10 +0000 Subject: [PATCH 35/53] Update pandas/tests/indexing/test_indexing.py Co-Authored-By: Joris Van den Bossche --- pandas/tests/indexing/test_indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 7cd7b1024151e..f6d4f99a994e2 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -233,7 +233,7 @@ def test_setitem_dtype_upcast(self): expected = DataFrame([{"a": 1, "b": np.nan, "c": "foo"}, {"a": 3, "b": 2, "c": np.nan}]) # dataframe dict key order is preserved, so these two # sequences of ops lead to different column order - tm.assert_frame_equal(df, expected, check_like=True) + tm.assert_frame_equal(df, expected, check_like=not PY36) # GH10280 df = DataFrame( From 61d833a24715cdb78716e8b42e7af7843134631e Mon Sep 17 00:00:00 2001 From: pilkibun <51503352+pilkibun@users.noreply.github.com> Date: Fri, 12 Jul 2019 00:22:26 +0000 Subject: [PATCH 36/53] Update doc/source/whatsnew/v0.25.0.rst Co-Authored-By: Joris Van den Bossche --- doc/source/whatsnew/v0.25.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 7901592832480..35728f6d8d840 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -759,7 +759,7 @@ in such cases has changed. ...: {'name': 'Jane', 'state': 'KY', 'age': 19} ...: ] -*Previous behavior* +*Previous behavior* (lexicographically sorted) .. code-block:: ipython From 79346dec7a61745c839052f47bca52578654ffca Mon Sep 17 00:00:00 2001 From: pilkibun <51503352+pilkibun@users.noreply.github.com> Date: Fri, 12 Jul 2019 00:22:39 +0000 Subject: [PATCH 37/53] Update doc/source/whatsnew/v0.25.0.rst Co-Authored-By: Joris Van den Bossche --- doc/source/whatsnew/v0.25.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 35728f6d8d840..da9e787dc22fc 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -769,7 +769,7 @@ in such cases has changed. 0 18 Joe NY 1 19 Jane KY -*New behavior* +*New behavior*: preserve order of the dicts .. code-block:: ipython From ddcce3e9c040044464cc23b4bea98696cc000375 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Thu, 11 Jul 2019 19:24:13 -0500 Subject: [PATCH 38/53] remove comment --- pandas/tests/indexing/test_indexing.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index f6d4f99a994e2..c900682084784 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -231,8 +231,6 @@ def test_setitem_dtype_upcast(self): df.loc[0, "c"] = "foo" expected = DataFrame([{"a": 1, "b": np.nan, "c": "foo"}, {"a": 3, "b": 2, "c": np.nan}]) - # dataframe dict key order is preserved, so these two - # sequences of ops lead to different column order tm.assert_frame_equal(df, expected, check_like=not PY36) # GH10280 From 2f22ec958b3570f69f19ce7d92ebb2d6834b3bdf Mon Sep 17 00:00:00 2001 From: pilkibun Date: Thu, 11 Jul 2019 19:42:49 -0500 Subject: [PATCH 39/53] Checks --- pandas/tests/indexing/test_indexing.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index c900682084784..f0eb3752abd3a 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -230,7 +230,9 @@ def test_setitem_dtype_upcast(self): assert df["c"].dtype == np.float64 df.loc[0, "c"] = "foo" - expected = DataFrame([{"a": 1, "b": np.nan, "c": "foo"}, {"a": 3, "b": 2, "c": np.nan}]) + expected = DataFrame( + [{"a": 1, "b": np.nan, "c": "foo"}, {"a": 3, "b": 2, "c": np.nan}] + ) tm.assert_frame_equal(df, expected, check_like=not PY36) # GH10280 From 3dcacd28d6daa4ce9f56b806675eb9b78028d1d8 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Thu, 11 Jul 2019 20:17:48 -0500 Subject: [PATCH 40/53] Add import --- pandas/tests/indexing/test_indexing.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index f0eb3752abd3a..f6bb5f774e758 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -7,6 +7,8 @@ import numpy as np import pytest +from pandas.compat import PY36 + from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype import pandas as pd From c28e2fd7cc4b5d012a2d1b1fd227fbb6c34dac3a Mon Sep 17 00:00:00 2001 From: pilkibun Date: Thu, 11 Jul 2019 22:40:47 -0500 Subject: [PATCH 41/53] CI From 9afdec353e41844c90ad4f54dce9a2282ffa0070 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Fri, 12 Jul 2019 12:59:31 -0500 Subject: [PATCH 42/53] whatsnew --- doc/source/whatsnew/v0.25.0.rst | 48 +++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index da9e787dc22fc..33c272b1b638e 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -746,38 +746,46 @@ consistent with NumPy and the rest of pandas (:issue:`21801`). Column order is preserved when passing a list of dicts to DataFrame ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Starting with Python 3.7 the key-order of `dict` is `guaranteed `_. In practice, this has been true since -Python 3.6. The DataFrame constructor now treats a list of dicts in the same way as -it has a list of `OrderedDict` since v0.19.0. This change applies only when pandas is running -on python>=3.6 (:issue:`27309`). As a consequence, the column order produced by `DataFrame()` -in such cases has changed. +Starting with Python 3.7 the key-order of ``dict`` is `guaranteed `_. In practice, this has been true since +Python 3.6. The :class:`DataFrame` constructor now treats a list of dicts in the same way as +it has a list of ``OrderedDict`` since v0.19.0. This change applies only when pandas is running +on Python>=3.6. As a consequence, the column order produced by `DataFrame()` +in such cases has changed (:issue:`27309`). -.. code-block:: ipython +.. ipython:: python - In [1]: data = [ - ...: {'name': 'Joe', 'state': 'NY', 'age': 18}, - ...: {'name': 'Jane', 'state': 'KY', 'age': 19} - ...: ] + data = [ + {'name': 'Joe', 'state': 'NY', 'age': 18}, + {'name': 'Jane', 'state': 'KY', 'age': 19, 'hobby': 'Minecraft'}, + {'name': 'Jean', 'state': 'OK', 'age': 20, 'finances': 'good'} + ] -*Previous behavior* (lexicographically sorted) +*Previous Behavior*: -.. code-block:: ipython +The columns were lexicographically sorted previously, + +.. code-block:: python In [1]: pd.DataFrame(data) Out[1]: - age name state - 0 18 Joe NY - 1 19 Jane KY + age finances hobby name state + 0 18 NaN NaN Joe NY + 1 19 NaN Minecraft Jane KY + 2 20 good NaN Jean OK -*New behavior*: preserve order of the dicts +*New Behavior*: -.. code-block:: ipython +The column order now matches the insertion-order of the keys in the ``dict``, +considering all the records from top to bottom. + +.. code-block:: python In [2]: pd.DataFrame(data) Out[2]: - name state age - 0 Joe NY 18 - 1 Jane KY 19 + name state age hobby finances + 0 Joe NY 18 NaN NaN + 1 Jane KY 19 Minecraft NaN + 2 Jean OK 20 NaN good .. _whatsnew_0250.api_breaking.deps: From e1f5f6b188927283aa798a6a4aead8f778469f30 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Fri, 12 Jul 2019 13:39:13 -0500 Subject: [PATCH 43/53] comment --- pandas/core/internals/construction.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 702ab41c0b2f3..891fb05204610 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -536,6 +536,13 @@ def _list_of_series_to_arrays(data, columns, coerce_float=False, dtype=None): def _list_of_dict_to_arrays(data, columns, coerce_float=False, dtype=None): + """Convert list of OrderedDict to dict + + if `columns` is not passed, column names are inferred from the records + - for OrderedDict and (on Python>=3.6) dicts, the column names match + the key insertion-order from the first record to the last. + - For other kinds of dict-likes, the keys are lexically sorted. + """ if columns is None: gen = (list(x.keys()) for x in data) types = (dict, OrderedDict) if PY36 else OrderedDict From b8d8e28bd27c55040f488a94dcae76b1d4b5913e Mon Sep 17 00:00:00 2001 From: pilkibun Date: Fri, 12 Jul 2019 13:48:35 -0500 Subject: [PATCH 44/53] whatsnew --- doc/source/whatsnew/v0.25.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 33c272b1b638e..6eb0bb524e0c4 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -749,7 +749,7 @@ Column order is preserved when passing a list of dicts to DataFrame Starting with Python 3.7 the key-order of ``dict`` is `guaranteed `_. In practice, this has been true since Python 3.6. The :class:`DataFrame` constructor now treats a list of dicts in the same way as it has a list of ``OrderedDict`` since v0.19.0. This change applies only when pandas is running -on Python>=3.6. As a consequence, the column order produced by `DataFrame()` +on Python>=3.6. As a consequence, the column order produced by :class:`DataFrame` in such cases has changed (:issue:`27309`). .. ipython:: python From 807e34193da72a146b5e6f0f3df408615c69c225 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Fri, 12 Jul 2019 13:55:49 -0500 Subject: [PATCH 45/53] comment --- pandas/core/frame.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a1989fd62b6ee..a4d355de3d8f0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -313,8 +313,12 @@ class DataFrame(NDFrame): Dict can contain Series, arrays, constants, or list-like objects .. versionchanged :: 0.23.0 - If data is a dict, argument order is maintained for Python 3.6 - and later. + If data is a dict, column order follows insertion-order for + Python 3.6 and later. + + .. versionchanged :: 0.25.0 + If data is a list of dicts, column order follows insertion-order + Python 3.6 and later. index : Index or array-like Index to use for resulting frame. Will default to RangeIndex if From 209c9228db7dac7de2d44a6acd45e48c7303e3b9 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Fri, 12 Jul 2019 15:28:32 -0500 Subject: [PATCH 46/53] checks --- doc/source/whatsnew/v0.25.0.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 6eb0bb524e0c4..67990786471bf 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -755,9 +755,9 @@ in such cases has changed (:issue:`27309`). .. ipython:: python data = [ - {'name': 'Joe', 'state': 'NY', 'age': 18}, - {'name': 'Jane', 'state': 'KY', 'age': 19, 'hobby': 'Minecraft'}, - {'name': 'Jean', 'state': 'OK', 'age': 20, 'finances': 'good'} + {'name': 'Joe', 'state': 'NY', 'age': 18}, + {'name': 'Jane', 'state': 'KY', 'age': 19, 'hobby': 'Minecraft'}, + {'name': 'Jean', 'state': 'OK', 'age': 20, 'finances': 'good'} ] *Previous Behavior*: From e3dfa45e9e41d67b3aea663fadb9f2f4de5c0220 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Fri, 12 Jul 2019 18:03:02 -0500 Subject: [PATCH 47/53] docstring --- pandas/core/internals/construction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 891fb05204610..caac6ad340ae8 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -536,7 +536,7 @@ def _list_of_series_to_arrays(data, columns, coerce_float=False, dtype=None): def _list_of_dict_to_arrays(data, columns, coerce_float=False, dtype=None): - """Convert list of OrderedDict to dict + """Convert list of OrderedDict to numpy array if `columns` is not passed, column names are inferred from the records - for OrderedDict and (on Python>=3.6) dicts, the column names match From e0749fe276b187cec1e6309170c88caa8d57471b Mon Sep 17 00:00:00 2001 From: pilkibun Date: Fri, 12 Jul 2019 21:42:23 -0500 Subject: [PATCH 48/53] whatsnew --- doc/source/whatsnew/v0.25.0.rst | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 67990786471bf..0c95805093b16 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -778,14 +778,9 @@ The columns were lexicographically sorted previously, The column order now matches the insertion-order of the keys in the ``dict``, considering all the records from top to bottom. -.. code-block:: python +.. ipython:: python - In [2]: pd.DataFrame(data) - Out[2]: - name state age hobby finances - 0 Joe NY 18 NaN NaN - 1 Jane KY 19 Minecraft NaN - 2 Jean OK 20 NaN good + pd.DataFrame(data) .. _whatsnew_0250.api_breaking.deps: From 4f815cde38d609e46f3ee6e4177d551b63a0808a Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 15 Jul 2019 17:48:01 -0400 Subject: [PATCH 49/53] doc comments --- doc/source/whatsnew/v0.25.0.rst | 10 +++++----- pandas/core/internals/construction.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 0c95805093b16..67b9d911392de 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -747,10 +747,9 @@ Column order is preserved when passing a list of dicts to DataFrame ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Starting with Python 3.7 the key-order of ``dict`` is `guaranteed `_. In practice, this has been true since -Python 3.6. The :class:`DataFrame` constructor now treats a list of dicts in the same way as -it has a list of ``OrderedDict`` since v0.19.0. This change applies only when pandas is running -on Python>=3.6. As a consequence, the column order produced by :class:`DataFrame` -in such cases has changed (:issue:`27309`). +Python 3.6. The :class:`DataFrame` constructor now treats a list of dicts in the same way as +it has a list of ``OrderedDict`` since v0.19.0, i.e. preserving the order of the dicsts. +This change applies only when pandas is running on Python>=3.6 (:issue:`27309`). .. ipython:: python @@ -776,7 +775,8 @@ The columns were lexicographically sorted previously, *New Behavior*: The column order now matches the insertion-order of the keys in the ``dict``, -considering all the records from top to bottom. +considering all the records from top to bottom. As a consequence, the column +order of the resulting DataFrame has changed compared to previous pandas verisons. .. ipython:: python diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index caac6ad340ae8..a8295d3f8bc50 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -536,7 +536,7 @@ def _list_of_series_to_arrays(data, columns, coerce_float=False, dtype=None): def _list_of_dict_to_arrays(data, columns, coerce_float=False, dtype=None): - """Convert list of OrderedDict to numpy array + """Convert list of dicts to numpy arrays if `columns` is not passed, column names are inferred from the records - for OrderedDict and (on Python>=3.6) dicts, the column names match From 60236e53fed9505de6fcb3b2a4a4fe2929d9ba51 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Mon, 15 Jul 2019 18:09:52 -0500 Subject: [PATCH 50/53] typo --- doc/source/whatsnew/v0.25.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 67b9d911392de..007eb51a8cc30 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -748,7 +748,7 @@ Column order is preserved when passing a list of dicts to DataFrame Starting with Python 3.7 the key-order of ``dict`` is `guaranteed `_. In practice, this has been true since Python 3.6. The :class:`DataFrame` constructor now treats a list of dicts in the same way as -it has a list of ``OrderedDict`` since v0.19.0, i.e. preserving the order of the dicsts. +it has a list of ``OrderedDict`` since v0.19.0, i.e. preserving the order of the dicts. This change applies only when pandas is running on Python>=3.6 (:issue:`27309`). .. ipython:: python From f4e6309c15e4a43fd4acb01bda7a5079ca856b85 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Mon, 15 Jul 2019 18:13:35 -0500 Subject: [PATCH 51/53] whatsnew --- doc/source/whatsnew/v0.25.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 007eb51a8cc30..805fe21bdcc9d 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -748,7 +748,7 @@ Column order is preserved when passing a list of dicts to DataFrame Starting with Python 3.7 the key-order of ``dict`` is `guaranteed `_. In practice, this has been true since Python 3.6. The :class:`DataFrame` constructor now treats a list of dicts in the same way as -it has a list of ``OrderedDict`` since v0.19.0, i.e. preserving the order of the dicts. +it does a list of ``OrderedDict``, i.e. preserving the order of the dicts. This change applies only when pandas is running on Python>=3.6 (:issue:`27309`). .. ipython:: python From 10024c132b924cdc7bb443374ad4e7917186d6e9 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Mon, 15 Jul 2019 18:21:45 -0500 Subject: [PATCH 52/53] document parameters --- pandas/core/internals/construction.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index a8295d3f8bc50..87ed0d8b312c6 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -542,6 +542,20 @@ def _list_of_dict_to_arrays(data, columns, coerce_float=False, dtype=None): - for OrderedDict and (on Python>=3.6) dicts, the column names match the key insertion-order from the first record to the last. - For other kinds of dict-likes, the keys are lexically sorted. + + Parameters + ---------- + data : iterable + collection of records (OrderedDict, dict) + columns: iterables or None + coerce_float : bool + If False, don't copy the data if not needed. + dtype : np.dtype + + Returns + ------- + tuple + arrays, columns """ if columns is None: gen = (list(x.keys()) for x in data) From 0d194f1047984df534382b7a7b66885fe784ae3b Mon Sep 17 00:00:00 2001 From: pilkibun Date: Mon, 15 Jul 2019 19:13:57 -0500 Subject: [PATCH 53/53] remove wrong description --- pandas/core/internals/construction.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 87ed0d8b312c6..f44cb5207891f 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -549,7 +549,6 @@ def _list_of_dict_to_arrays(data, columns, coerce_float=False, dtype=None): collection of records (OrderedDict, dict) columns: iterables or None coerce_float : bool - If False, don't copy the data if not needed. dtype : np.dtype Returns