Skip to content

Commit a17e5ae

Browse files
committed
First commit
0 parents  commit a17e5ae

File tree

6 files changed

+754
-0
lines changed

6 files changed

+754
-0
lines changed

Diff for: .gitignore

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
*.py[cod]
2+
3+
# C extensions
4+
*.so
5+
6+
# Unit test / coverage reports
7+
.coverage
8+
.tox
9+
nosetests.xml
10+
11+
# Translations
12+
*.mo
13+
14+
15+
# Vi
16+
*.swp

Diff for: README.md

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
cmd_datatools
2+
=============
3+
4+
Command line data tools for Python

Diff for: common.py

+362
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,362 @@
1+
"""
2+
Common functions/classes for dataprep.
3+
"""
4+
from random import choice
5+
import numpy as np
6+
import sys
7+
import csv
8+
import json
9+
import cPickle
10+
from StringIO import StringIO
11+
12+
13+
################################################################################
14+
# Decorators
15+
################################################################################
16+
17+
18+
def lazyprop(fn):
19+
"""
20+
Use as a decorator to get lazily evaluated properties.
21+
"""
22+
attr_name = '_lazy_' + fn.__name__
23+
@property
24+
def _lazyprop(self):
25+
if not hasattr(self, attr_name):
26+
setattr(self, attr_name, fn(self))
27+
return getattr(self, attr_name)
28+
return _lazyprop
29+
30+
31+
################################################################################
32+
# Wrappers for opening/closing files
33+
################################################################################
34+
35+
36+
def get_inout_files(infilename, outfilename, inmode='rb', outmode='wb'):
37+
"""
38+
Gets infile, and outfile, which are opened versions of infilename,
39+
outfilename.
40+
41+
Parameters
42+
----------
43+
infilename : String
44+
Name of file to read. If None, we will read from stdin
45+
outfilename : String
46+
Name of file to write. If None, we will write to stdout
47+
outmode : String
48+
Mode to open file in
49+
50+
Returns
51+
-------
52+
The tuple (infile, outfile)
53+
54+
Examples
55+
--------
56+
>>> infile, outfile = get_inout_files(infilename, outfilename)
57+
>>> myfunction(infile, outfile,...)
58+
>>> close_files(infile, outfile)
59+
"""
60+
infile = get_infile(infilename, inmode=inmode)
61+
outfile = get_outfile(outfilename, outmode=outmode)
62+
63+
return infile, outfile
64+
65+
66+
def close_files(infile, outfile):
67+
"""
68+
Closes the files if and only if they are not equal to sys.stdin, sys.stdout
69+
70+
Parameters
71+
----------
72+
infile : Open file buffer
73+
outfile : Open file buffer
74+
75+
Examples
76+
--------
77+
>>> infile, outfile = get_inout_files(infilename, outfilename)
78+
>>> myfunction(infile, outfile,...)
79+
>>> close_files(infile, outfile)
80+
"""
81+
close_infile(infile)
82+
close_outfile(outfile)
83+
84+
85+
def close_infile(infile):
86+
"""
87+
Closes infile if and only if it is not equal to sys.stdin. Use with get_infile.
88+
89+
Parameters
90+
----------
91+
infile : Open file buffer
92+
93+
Examples
94+
--------
95+
>>> infile = get_infile(infilename)
96+
>>> myfunction(infile,...)
97+
>>> close_infile(infile)
98+
"""
99+
if infile != sys.stdin:
100+
infile.close()
101+
102+
103+
def close_outfile(outfile):
104+
"""
105+
Closes outfile if and only if it is not equal to sys.stdout. Use with get_outfile.
106+
107+
Examples
108+
--------
109+
>>> outfile = get_infile(outfilename)
110+
>>> myfunction(outfile,...)
111+
>>> close_outfile(outfile)
112+
"""
113+
if outfile != sys.stdout:
114+
outfile.close()
115+
116+
117+
def get_infile(infilename, inmode='rb'):
118+
"""
119+
Gets infile, which is an opened version of infilename.
120+
121+
Parameters
122+
----------
123+
infilename : String
124+
Name of file to read. If None, we will read from stdin
125+
126+
Returns
127+
-------
128+
infile
129+
130+
Examples
131+
--------
132+
>>> infile = get_infile(infilename)
133+
>>> myfunction(infile,...)
134+
>>> close_infile(infile)
135+
"""
136+
if infilename:
137+
infile = open(infilename, inmode)
138+
else:
139+
infile = sys.stdin
140+
141+
return infile
142+
143+
144+
def get_outfile(outfilename, outmode='wb', default=sys.stdout):
145+
"""
146+
Open outfilename in outmode.
147+
148+
Parameters
149+
----------
150+
outfilename : String
151+
Name of file to open and return.
152+
If None, return the kwarg 'default'
153+
outmode : String
154+
Mode to open file in
155+
default : File buffer
156+
The value to return if outfilename is None
157+
158+
Returns
159+
-------
160+
outfile
161+
162+
Examples
163+
--------
164+
>>> outfile = get_outfile(outfilename)
165+
>>> myfunction(outfile,...)
166+
>>> close_outfile(outfile)
167+
"""
168+
if isinstance(outfilename, str):
169+
outfile = open(outfilename, outmode)
170+
elif outfilename is None:
171+
outfile = default
172+
else:
173+
raise ValueError("Argument outfilename is of type %s. Not handled." % outfilename)
174+
175+
return outfile
176+
177+
178+
def openfile_wrap(filename, mode):
179+
"""
180+
If filename is a string, returns an opened version of filename.
181+
If filename is a file buffer, then passthrough.
182+
183+
Parameters
184+
----------
185+
filename : String or file buffer
186+
mode : String
187+
mode to open the file in
188+
189+
Returns
190+
-------
191+
ofile : Opened file buffer
192+
was_path : Boolean
193+
If True, then filename was a string (and thus was opened here, and so
194+
you better remember to close it elsewhere)
195+
196+
Examples
197+
--------
198+
>>> infile, was_path = openfile_wrap(infilename, 'r')
199+
>>> myfunction(infile,...)
200+
>>> if was_path:
201+
>>> infile.close()
202+
"""
203+
if isinstance(filename, str):
204+
was_path = True
205+
ofile = open(filename, mode)
206+
elif isinstance(filename, file) or isinstance(filename, StringIO):
207+
was_path = False
208+
ofile = filename
209+
else:
210+
raise Exception("Could not work with %s" % filename)
211+
212+
return ofile, was_path
213+
214+
215+
################################################################################
216+
# Functions to read special file formats
217+
################################################################################
218+
219+
def get_list_from_filerows(infile):
220+
"""
221+
Returns a list generated from rows of a file.
222+
223+
Parameters
224+
----------
225+
infile : File buffer or path
226+
Lines starting with # are comments
227+
Blank lines and leading/trailing whitespace are ignored
228+
Other lines will be converted to a string and appended to a
229+
list.
230+
"""
231+
f, was_path = openfile_wrap(infile, 'r')
232+
233+
kpv_list = []
234+
for line in f:
235+
# Strip whitespace
236+
line = line.strip()
237+
# Skip empty lines
238+
if len(line) > 0:
239+
# If the line isn't a comment
240+
# Append the content to the list
241+
if line[0] != '#':
242+
kpv_list.append(line.rstrip('\n'))
243+
244+
if was_path:
245+
f.close()
246+
247+
return kpv_list
248+
249+
250+
def write_list_to_filerows(outfile, mylist):
251+
"""
252+
The inverse of get_list_from_filerows.
253+
254+
Parameters
255+
----------
256+
mylist : List
257+
"""
258+
f, was_path = openfile_wrap(outfile, 'w')
259+
260+
for item in mylist:
261+
f.write(str(item) + '\n')
262+
263+
if was_path:
264+
f.close()
265+
266+
267+
def pickleme(obj, filename, protocol=2):
268+
"""
269+
Save obj to disk using cPickle.
270+
271+
Parameters
272+
----------
273+
obj : Serializable Python object
274+
filename : String
275+
Name of file to store obj to
276+
protocol : 0, 1, or 2
277+
2 is fastest
278+
"""
279+
with open(filename, 'w') as f:
280+
cPickle.dump(obj, f, protocol=protocol)
281+
282+
283+
def unpickleme(filename):
284+
"""
285+
Returns unpickled version of object.
286+
287+
Parameters
288+
----------
289+
filename : String
290+
We will attempt to unpickle this file.
291+
"""
292+
with open(filename, 'r') as f:
293+
return cPickle.load(f)
294+
295+
296+
def get_structured_array(listoflists, schema, dropmissing=False):
297+
"""
298+
Uses schema to convert listoflists to a structured array.
299+
300+
Parameters
301+
----------
302+
listoflists : List of lists
303+
schema : List of tuples
304+
E.g. [(var1, type1),...,(varK, typeK)]
305+
dropmissing : Boolean
306+
If True, drop rows that contain missing values
307+
"""
308+
## First convert listoflists to a list of tuples...
309+
# TODO : This CAN'T actually be necessary..find another way
310+
if dropmissing:
311+
tuple_list = [tuple(row) for row in loan_list if '' not in row]
312+
else:
313+
tuple_list = [tuple(row) for row in loan_list]
314+
315+
return np.array(tuple_list, schema)
316+
317+
318+
################################################################################
319+
# Custom Exceptions
320+
################################################################################
321+
322+
323+
class BadDataError(Exception):
324+
"""
325+
Dummy class that is exactly like the Exception class. Used to make sure
326+
people are raising the intended exception, rather than some other wierd
327+
one.
328+
"""
329+
pass
330+
331+
332+
class ConfigurationSyntaxError(Exception):
333+
"""
334+
Dummy class that is exactly like the Exception class. Used to deal with syntax issues
335+
config files.
336+
"""
337+
pass
338+
339+
340+
################################################################################
341+
# Functions for printing objects
342+
################################################################################
343+
344+
345+
def printdict(d, max_print_len=None):
346+
s = ''
347+
for key, value in d.iteritems():
348+
s += str(key) + ': ' + str(value) + '\n'
349+
if max_print_len:
350+
print s[:max_print_len]
351+
else:
352+
print s
353+
354+
355+
def print_dicts(dicts, prepend_str=''):
356+
for key, value in dicts.iteritems():
357+
if isinstance(value, dict):
358+
print prepend_str + key
359+
next_prepend_str = prepend_str + ' '
360+
print_dicts(value, next_prepend_str)
361+
else:
362+
print "%s%s = %.5f"%(prepend_str, key, value)

0 commit comments

Comments
 (0)