Skip to content

ENH: Preserve key order when passing list of dicts to DataFrame on py 3.6+ #27309

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 53 commits into from Jul 17, 2019
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
afa72b4
ENH: Support new case of implied column ordering in Dataframe()
pilkibun Jul 9, 2019
8a4113c
Safer
pilkibun Jul 9, 2019
408ad8b
Restrict to Index case
pilkibun Jul 9, 2019
b732096
Fix tests
pilkibun Jul 9, 2019
be57fd9
Style
pilkibun Jul 9, 2019
717716b
Fix test
pilkibun Jul 9, 2019
e9d4989
rename
pilkibun Jul 10, 2019
63adbfe
Restrict to PY37
pilkibun Jul 10, 2019
0ed89ff
Style
pilkibun Jul 10, 2019
0a48016
Restrict to PY36
pilkibun Jul 10, 2019
4b73536
Work around fake test failure on PY35
pilkibun Jul 10, 2019
eb64d31
Style
pilkibun Jul 10, 2019
b5db0bc
fix test
pilkibun Jul 10, 2019
76d7d54
ENH: treat dict like OrderedDict for PY36+ in dataframe constructor
pilkibun Jul 11, 2019
4571e16
Restore frame.py
pilkibun Jul 11, 2019
71f3c79
restore test_normalize.py
pilkibun Jul 11, 2019
a3987e7
Skip some json tests on Py35
pilkibun Jul 11, 2019
e72b666
fix test
pilkibun Jul 11, 2019
104c2a7
black
pilkibun Jul 11, 2019
e8c27e5
clean isinstance check
pilkibun Jul 11, 2019
b38f65a
messages
pilkibun Jul 11, 2019
32e5b00
Update test after behavior change
pilkibun Jul 11, 2019
864a116
Ignore column order on py35
pilkibun Jul 11, 2019
9cb4362
clean
pilkibun Jul 11, 2019
355979e
fix
pilkibun Jul 11, 2019
887f201
fix
pilkibun Jul 11, 2019
d65a085
whatsnew
pilkibun Jul 11, 2019
2e82473
fix issue ref
pilkibun Jul 11, 2019
21ec5a7
fix header type of unrelated issue
pilkibun Jul 11, 2019
51ff714
whatsnew
pilkibun Jul 11, 2019
92d83ea
checks
pilkibun Jul 11, 2019
85da582
Update pandas/tests/indexing/test_indexing.py
Jul 12, 2019
4f9228c
Update pandas/tests/indexing/test_indexing.py
Jul 12, 2019
61d833a
Update doc/source/whatsnew/v0.25.0.rst
Jul 12, 2019
79346de
Update doc/source/whatsnew/v0.25.0.rst
Jul 12, 2019
ddcce3e
remove comment
pilkibun Jul 12, 2019
2f22ec9
Checks
pilkibun Jul 12, 2019
3dcacd2
Add import
pilkibun Jul 12, 2019
c28e2fd
CI
pilkibun Jul 12, 2019
4d52802
fix tests
pilkibun Jul 12, 2019
5371de5
CI
pilkibun Jul 11, 2019
9afdec3
whatsnew
pilkibun Jul 12, 2019
e1f5f6b
comment
pilkibun Jul 12, 2019
b8d8e28
whatsnew
pilkibun Jul 12, 2019
807e341
comment
pilkibun Jul 12, 2019
209c922
checks
pilkibun Jul 12, 2019
e3dfa45
docstring
pilkibun Jul 12, 2019
e0749fe
whatsnew
pilkibun Jul 13, 2019
4f815cd
doc comments
jorisvandenbossche Jul 15, 2019
60236e5
typo
pilkibun Jul 15, 2019
f4e6309
whatsnew
pilkibun Jul 15, 2019
10024c1
document parameters
pilkibun Jul 15, 2019
0d194f1
remove wrong description
pilkibun Jul 16, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 22 additions & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,9 +439,28 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, copy=False):
data = list(data)
if len(data) > 0:
if is_list_like(data[0]) and getattr(data[0], "ndim", 1) == 1:
if is_named_tuple(data[0]) and columns is None:
infer_columns_names = columns is None
if is_named_tuple(data[0]) and infer_columns_names:
columns = data[0]._fields
arrays, columns = to_arrays(data, columns, dtype=dtype)
arrays, arr_names = to_arrays(data, columns, dtype=dtype)
arr_names = ensure_index(arr_names)

columns = arr_names
# GH#10056
if (
PY36
and is_dict_like(data[0])
and infer_columns_names
and (type(columns) is Index)
):
_columns = list(columns)
known_columns = set(data[0])
extra_columns = [_ for _ in _columns if _ not in known_columns]
if set(_columns[: len(data[0])]) == known_columns:
_columns[: len(known_columns)] = list(data[0])
_columns[len(known_columns) :] = extra_columns
columns = _columns

columns = ensure_index(columns)

# set the index
Expand All @@ -453,7 +472,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, copy=False):
else:
index = ibase.default_index(len(data))

mgr = arrays_to_mgr(arrays, columns, index, columns, dtype=dtype)
mgr = arrays_to_mgr(arrays, arr_names, index, columns, dtype=dtype)
else:
mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy)
else:
Expand Down
27 changes: 26 additions & 1 deletion pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1119,7 +1119,7 @@ def test_constructor_generator(self):
expected = DataFrame({0: range(10), 1: "a"})
tm.assert_frame_equal(result, expected, check_dtype=False)

def test_constructor_list_of_dicts(self):
def test_constructor_list_of_odicts(self):
data = [
OrderedDict([["a", 1.5], ["b", 3], ["c", 4], ["d", 6]]),
OrderedDict([["a", 1.5], ["b", 3], ["d", 6]]),
Expand Down Expand Up @@ -1340,6 +1340,31 @@ def test_constructor_list_of_namedtuples(self):
result = DataFrame(tuples, columns=["y", "z"])
tm.assert_frame_equal(result, expected)

@pytest.mark.skipif(not PY36, reason="Guaranteed dict order is Python>=3.7")
def test_constructor_list_of_dict_order(self):
# GH10056
data = [
{"First": 1, "Second": 4, "Third": 7, "Fourth": 10},
{"Second": 5, "First": 2, "Fourth": 11, "Third": 8},
{"Second": 6, "First": 3, "Fourth": 12, "Third": 9, "YYY": 14, "XXX": 13},
]
expected = DataFrame(
{
"First": [1, 2, 3],
"Second": [4, 5, 6],
"Third": [7, 8, 9],
"Fourth": [10, 11, 12],
"XXX": [None, None, 13],
"YYY": [None, None, 14],
}
)
result = DataFrame(data)
assert set(result.columns) == set(expected.columns)
# order of first 4 columns dictated by data[0]
tm.assert_frame_equal(result.iloc[:, :4], expected.iloc[:, :4])
# the order of the remaining two is arbitrary, but assert the data matches
tm.assert_frame_equal(result[["XXX", "YYY"]], expected[["XXX", "YYY"]])

def test_constructor_orient(self, float_string_frame):
data_dict = float_string_frame.T._series
recons = DataFrame.from_dict(data_dict, orient="index")
Expand Down
20 changes: 14 additions & 6 deletions pandas/tests/io/json/test_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import numpy as np
import pytest

from pandas.compat import PY36

from pandas import DataFrame, Index
import pandas.util.testing as tm

Expand Down Expand Up @@ -351,9 +353,9 @@ def test_non_ascii_key(self):
).decode("utf8")

testdata = {
b"\xc3\x9cnic\xc3\xb8de".decode("utf8"): [0, 1],
"sub.A": [1, 3],
"sub.B": [2, 4],
b"\xc3\x9cnic\xc3\xb8de".decode("utf8"): [0, 1],
}
expected = DataFrame(testdata)

Expand All @@ -365,16 +367,16 @@ def test_missing_field(self, author_missing_data):
result = json_normalize(author_missing_data)
ex_data = [
{
"info": np.nan,
"author_name.first": np.nan,
"author_name.last_name": np.nan,
"info": np.nan,
"info.created_at": np.nan,
"info.last_updated": np.nan,
},
{
"info": None,
"author_name.first": "Jane",
"author_name.last_name": "Doe",
"info": None,
"info.created_at": "11/08/1993",
"info.last_updated": "26/05/2012",
},
Expand Down Expand Up @@ -508,11 +510,17 @@ def test_missing_meta(self, missing_metadata):
data=missing_metadata, record_path="addresses", meta="name", errors="ignore"
)
ex_data = [
["Massillon", 9562, "OH", "Morris St.", 44646, "Alice"],
["Elizabethton", 8449, "TN", "Spring St.", 37643, np.nan],
[9562, "Morris St.", "Massillon", "OH", 44646, "Alice"],
[8449, "Spring St.", "Elizabethton", "TN", 37643, np.nan],
]
columns = ["city", "number", "state", "street", "zip", "name"]
columns = ["number", "street", "city", "state", "zip", "name"]
expected = DataFrame(ex_data, columns=columns)
if not PY36:
# json_normalize order is not guaranteed, so columns
# depends on implementation. Opt to test on PY36/37
# and force column order on PY35.
expected = expected[columns]
result = result[columns]
tm.assert_frame_equal(result, expected)

def test_donot_drop_nonevalues(self):
Expand Down