1616import numpy
1717
1818
19- def loadData (filename , minrows = 10 , ** kwargs ):
19+ def loadData (filename , minrows = 10 , headers = False , hdel = '=' , hignore = None , ** kwargs ):
2020 """Find and load data from a text file.
2121
22- The data reading starts at the first matrix block of at least minrows rows
23- and constant number of columns. This seems to work for most of the
24- datafiles including those generated by PDFGetX2.
25-
26- filename -- name of the file we want to load data from.
27- minrows -- minimum number of rows in the first data block.
28- All rows must have the same number of floating point values.
29- usecols -- zero-based index of columns to be loaded, by default use
30- all detected columns. The reading skips data blocks that
31- do not have the usecols-specified columns.
32- unpack -- return data as a sequence of columns that allows tuple
33- unpacking such as x, y = loadData(FILENAME, unpack=True).
34- Note transposing the loaded array as loadData(FILENAME).T
35- has the same effect.
36- kwargs -- keyword arguments that are passed to numpy.loadtxt
37-
38- Return a numpy array of the data.
39- See also numpy.loadtxt for more details.
22+ The data block is identified as the first matrix block of at least minrows rows
23+ and constant number of columns. This seems to work for most of the datafiles including
24+ those generated by PDFGetX2.
25+
26+ filename -- name of the file we want to load data from.
27+ minrows -- minimum number of rows in the first data block.
28+ All rows must have the same number of floating point values.
29+ headers -- when False (defualt), the function returns a numpy array of the
30+ data in the data block. When True, the function instead returns a
31+ dictionary of parameters and their corresponding values parsed from
32+ header (information prior the data block). See hdel and hignore for
33+ options to help with parsing header information.
34+ hdel -- (only used when headers enabled) delimiter for parsing header
35+ information (default '='). e.g. using default hdel, the line
36+ 'parameter = p_value' is put into the dictionary as
37+ {parameter: p_value}.
38+ hignore -- (only used when headers enabled) ignore header rows beginning
39+ with any elements in the hignore list. e.g. hignore=['# ', '[']
40+ means the following lines are skipped: '# qmax=10', '[defaults]'.
41+ kwargs -- keyword arguments that are passed to numpy.loadtxt including
42+ the following arguments below. (See also numpy.loadtxt for more
43+ details.)
44+ delimiter -- delimiter for the data in the block (default use whitespace).
45+ For comma-separated data blocks, set delimiter to ','.
46+ usecols -- zero-based index of columns to be loaded, by default use
47+ all detected columns. The reading skips data blocks that
48+ do not have the usecols-specified columns.
49+ unpack -- return data as a sequence of columns that allows tuple
50+ unpacking such as x, y = loadData(FILENAME, unpack=True).
51+ Note transposing the loaded array as loadData(FILENAME).T
52+ has the same effect.
53+
54+ Return a numpy array of the data. If headers enabled, instead returns a
55+ dictionary of parameters read from the header.
4056 """
4157 from numpy import array , loadtxt
58+ # for storing header data
59+ hdata = {}
4260 # determine the arguments
4361 delimiter = kwargs .get ('delimiter' )
4462 usecols = kwargs .get ('usecols' )
@@ -72,8 +90,39 @@ def countcolumnsvalues(line):
7290 fpos = (0 , 0 )
7391 nrows = 0
7492 for line in fid :
93+ # decode line
94+ dline = line .decode ()
95+ # find header information if requested
96+ if headers :
97+ hpair = dline .split (hdel )
98+ flag = True
99+ # ensure number of non-blank arguments is two
100+ if len (hpair ) != 2 :
101+ flag = False
102+ else :
103+ # ignore if an argument is blank
104+ hpair [0 ] = hpair [0 ].strip () # name of data entry
105+ hpair [1 ] = hpair [1 ].strip () # value of entry
106+ if not hpair [0 ] or not hpair [1 ]:
107+ flag = False
108+ else :
109+ # check if row has an ignore tag
110+ if hignore is not None :
111+ for tag in hignore :
112+ taglen = len (tag )
113+ if len (hpair [0 ]) >= taglen and hpair [0 ][:taglen ] == tag :
114+ flag = False
115+ # add header data
116+ if flag :
117+ name = hpair [0 ]
118+ value = hpair [1 ]
119+ # check if data value should be stored as float
120+ if isfloat (hpair [1 ]):
121+ value = float (hpair [1 ])
122+ hdata .update ({name : value })
123+ # continue search for the start of datablock
75124 fpos = (fpos [1 ], fpos [1 ] + len (line ))
76- line = line . decode ()
125+ line = dline
77126 ncv = countcolumnsvalues (line )
78127 if ncv < mincv :
79128 start = None
@@ -88,6 +137,11 @@ def countcolumnsvalues(line):
88137 # block was found here!
89138 if nrows >= minrows :
90139 break
140+
141+ # Return header data if requested
142+ if headers :
143+ return hdata # Return, so do not proceed to reading datablock
144+
91145 # Return an empty array when no data found.
92146 # loadtxt would otherwise raise an exception on loading from EOF.
93147 if start is None :
@@ -247,4 +301,4 @@ def isfloat(s):
247301 pass
248302 return False
249303
250- # End of file
304+ # End of file
0 commit comments