Skip to content

Commit ba602d1

Browse files
authored
Merge pull request #94 from fact-project/update_write_data
Add docstring and fix typo in kwarg in write_data
2 parents 8101aab + 9e230cd commit ba602d1

File tree

3 files changed

+39
-13
lines changed

3 files changed

+39
-13
lines changed

fact/VERSION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.15.1
1+
0.16.0

fact/io.py

+30-4
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,39 @@
2525
native_byteorder = native_byteorder = {'little': '<', 'big': '>'}[sys.byteorder]
2626

2727

28-
def write_data(df, file_path, key='data', use_hp5y=False, **kwargs):
28+
def write_data(df, file_path, key='data', use_h5py=True, **kwargs):
29+
'''
30+
Write a pandas DataFrame to several output formats, determined by the
31+
extension of `file_path`
32+
33+
Supported file types are:
34+
* hdf5, used when extensions are `.hdf`, `.hdf5` or `.h5`.
35+
By default h5py with one dataset per column is used.
36+
Pandas to_hdf5 is used if `use_h5py=False`
37+
* json, if extension is json
38+
* jsonlines if extension is `jsonl` or `jsonline`
39+
* csv, if extension is `csv`
40+
41+
Arguments
42+
---------
43+
44+
df: pd.DataFrame
45+
DataFrame to save
46+
file_path: str
47+
Path to the outputfile
48+
key: str
49+
Groupkey, only used for hdf5
50+
use_h5py: bool
51+
wheither to write h5py style or pandas style hdf5
52+
53+
All other key word arguments are passed to the actual writer functions.
54+
'''
2955

3056
name, extension = path.splitext(file_path)
3157

3258
if extension in ['.hdf', '.hdf5', '.h5']:
33-
if use_hp5y is True:
34-
to_h5py(file_path, df, key=key, **kwargs)
59+
if use_h5py is True:
60+
to_h5py(df, file_path, key=key, **kwargs)
3561
else:
3662
df.to_hdf(file_path, key=key, **kwargs)
3763

@@ -232,7 +258,7 @@ def check_extension(file_path, allowed_extensions=allowed_extensions):
232258
raise IOError('Allowed formats: {}'.format(allowed_extensions))
233259

234260

235-
def to_h5py(filename, df, key='data', mode='a', dtypes=None, index=True, **kwargs):
261+
def to_h5py(df, filename, key='data', mode='a', dtypes=None, index=True, **kwargs):
236262
'''
237263
Write pandas dataframe to h5py style hdf5 file
238264

tests/test_io.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def test_to_h5py():
1414
})
1515

1616
with tempfile.NamedTemporaryFile() as f:
17-
to_h5py(f.name, df, key='test')
17+
to_h5py(df, f.name, key='test')
1818

1919
with h5py.File(f.name, 'r') as hf:
2020

@@ -40,7 +40,7 @@ def test_to_h5py_string():
4040
})
4141

4242
with tempfile.NamedTemporaryFile() as f:
43-
to_h5py(f.name, df, key='test')
43+
to_h5py(df, f.name, key='test')
4444
df2 = read_h5py(f.name, key='test')
4545

4646
assert all(df.dtypes == df2.dtypes)
@@ -59,7 +59,7 @@ def test_to_h5py_datetime():
5959
})
6060

6161
with tempfile.NamedTemporaryFile() as f:
62-
to_h5py(f.name, df, key='test')
62+
to_h5py(df, f.name, key='test')
6363
df2 = read_h5py(f.name, key='test')
6464

6565
for col in df2.columns:
@@ -82,8 +82,8 @@ def test_to_h5py_append():
8282
})
8383

8484
with tempfile.NamedTemporaryFile() as f:
85-
to_h5py(f.name, df1, key='test', index=False)
86-
to_h5py(f.name, df2, key='test', mode='a', index=False)
85+
to_h5py(df1, f.name, key='test', index=False)
86+
to_h5py(df2, f.name, key='test', mode='a', index=False)
8787

8888
df_read = read_h5py(f.name, key='test')
8989
df_written = pd.concat([df1, df2], ignore_index=True)
@@ -105,8 +105,8 @@ def test_to_h5py_append_second_group():
105105
})
106106

107107
with tempfile.NamedTemporaryFile() as f:
108-
to_h5py(f.name, df1, key='g1', index=False)
109-
to_h5py(f.name, df2, key='g2', index=False)
108+
to_h5py(df1, f.name, key='g1', index=False)
109+
to_h5py(df2, f.name, key='g2', index=False)
110110

111111
df_g1 = read_h5py(f.name, key='g1')
112112
df_g2 = read_h5py(f.name, key='g2')
@@ -247,7 +247,7 @@ def test_write_lists_h5py():
247247
})
248248

249249
with tempfile.NamedTemporaryFile(suffix='.hdf5') as f:
250-
to_h5py(f.name, df)
250+
to_h5py(df, f.name)
251251

252252
df = read_h5py(f.name, columns=['x'])
253253

0 commit comments

Comments
 (0)