Skip to content

Commit 66ecb06

Browse files
authored
Fix memory error when computing unique values (#313)
1 parent e113b0f commit 66ecb06

File tree

2 files changed

+15
-7
lines changed

2 files changed

+15
-7
lines changed

intake_esm/search.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,16 @@ def _unique(df, columns=None):
1212
columns = [columns]
1313
if not columns:
1414
columns = df.columns.tolist()
15+
16+
def _find_unique(series):
17+
values = series.dropna().values
18+
uniques = list(set(_flatten_list(values)))
19+
return uniques
20+
21+
x = df[columns].apply(_find_unique, result_type='reduce').to_dict()
1522
info = {}
16-
for col in columns:
17-
values = df[col].dropna().values
18-
uniques = np.unique(list(_flatten_list(values))).tolist()
19-
info[col] = {'count': len(uniques), 'values': uniques}
23+
for col in x.keys():
24+
info[col] = {'count': len(x[col]), 'values': x[col]}
2025
return info
2126

2227

tests/test_search.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,17 @@ def test_unique():
3232
'random': {'count': 3, 'values': ['bx', 'by', 'bz']},
3333
}
3434
actual = _unique(df, df.columns.tolist())
35-
assert actual == expected
35+
36+
assert set(actual.keys()) == set(expected.keys())
37+
for key in actual.keys():
38+
assert set(actual[key]['values']) == set(expected[key]['values'])
3639

3740
actual = _unique(df)
38-
assert actual == expected
41+
assert set(actual.keys()) == set(expected.keys())
3942

4043
actual = _unique(df, columns='random')
4144
expected = {'random': {'count': 3, 'values': ['bx', 'by', 'bz']}}
42-
assert actual == expected
45+
assert set(actual['random']['values']) == set(expected['random']['values'])
4346

4447

4548
params = [

0 commit comments

Comments
 (0)