-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathxmcfiles.py
executable file
·462 lines (366 loc) · 15.8 KB
/
xmcfiles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
"""
Module of functions for dealing with files output from xmc
Contains the following functions:
parse_file_line
read_parnames
merge_output
remove_nans
add_header
calculate_prior
fake_devonvolution
read_spectra
"""
#----------------------------------------------------------
#Import common modules
import os
import numpy as np
import pandas as pd
import file_utilities as fu
#----------------------------------------------------------
def remove_nans(datatable,filename=None,verbose=1):
"""Remove rows containing nans from dataframe and print warning."""
nb1 = len(datatable.index)
datatable.dropna(inplace=True)
nb2 = len(datatable.index)
ndropped = nb1-nb2
if ndropped <= 1:
row=' row'
was=' was'
else:
row=' rows'
was=' were'
if verbose > 1: print "number of dropped rows = ",ndropped
if (ndropped > 0) and (verbose > 0):
if filename == None:
print "Warning: "+str(nb1-nb2)+row+" contained nan "+\
"values and"+was+" dropped."
else:
print "Warning: "+str(nb1-nb2)+row+" in "+filename+\
" contained nan values and"+was+" dropped."
return datatable
#----------------------------------------------------------
def read_parnames(runpath):
"""
Read in parameters.txt file from an xmc run to get names of columns in deconvolution files
Author: Kari A. Frank
Date: March 20, 2014
Usage: parnames = read_parnames(runpath)
Input:
runpath -- Path to xmc run directory, containing the file parameters.txt.
Only the first line of the file is read. Parameter names
should contain no spaces or punctuation and are assumed to be
separated by commas. They should correspond to the columns
of the deconvolution files.
Output:
returns string list of column (parameter) names
Usage Notes:
"""
#--Set file paths--
parfile = runpath+'/parameters.txt'
outpars = fu.parse_file_line(parfile)
#--return string list--
return outpars
#----------------------------------------------------------
def merge_output(runpath='./',filetype='deconvolution',save=True,sep='\t',
itmin=0,itmax=None):
"""
Author: Kari A. Frank
Date: October 16, 2015
Purpose: Read in deconvolution, statistic, or other files from
an xmc run and merge into a single dataframe, with an extra
column to specify iteration. optionally can also save as
a text file.
Usage: merge_output(runpath='./',filetype='deconvolution',save=True)
Input:
runpath -- Path to xmc run directory, containing the
deconvolution, statistic, etc. files
filetype -- string containing the type of xmc file to merge
deconvolution, statistic, sigma, mean, changed
save -- boolean switch to prevent saving of the merged
file. use save=False if all that is need is the merged
dataframe (default=True).
sep -- optionally specify the delimiter in the output file
(default='\t', tab-separated)
itmin/itmax -- optionally specify the minimum and maximum iterations
to read
Output:
- if save=True, writes a text file <filetype>_merged.txt into the
runpath directory
- returns a pandas dataframe containing data from all the input files
Usage Notes:
- overwrites the file if it already exists
- automatically includes all iterations present if no
itmin/itmax given
- iterations are not written in order, but an extra column is added
to specify which iteration each row is associated with
- the created file can be read into a dataframe using
datatable = pd.read_table(mergedfile,sep='\t',index_col=0)
"""
# -- defaults --
if itmax is None:
itmax = 100000. # dummy itmax value
# - get list of files -
# filelist_raw = fu.ls_to_list(runpath,ls_args = filetype+'.*')
filelist_raw = fu.files_to_list(runpath,search_str = filetype+'.*')
# - remove less than itmin, greater than itmax -
filelist = [f for f in filelist_raw if (int(f.split('.')[-1]) >= itmin and int(f.split('.')[-1]) <= itmax)]
# --Initialize dataframe with first file --
# - read file -
datatable = pd.read_table(runpath+'/'+filelist[0],sep='\s+',header=None)
# - check for nans and drop them -
datatable = remove_nans(datatable,filename=filelist[0])
# - add headers -
parnames = []
# read header information
if filetype == 'statistic':
parnames = ['stat1','dof','oversim','nblobs','alpha','chi2']
datatable.columns = parnames
if filetype == 'deconvolution':
if os.path.isfile(runpath+'/parameters.txt'):
parnames = fu.parse_file_line(runpath+'/parameters.txt')
datatable.columns = parnames
# - get iteration number and add as column -
iternum = int(filelist[0].split('.')[-1]) # returns integer
datatable['iteration'] = iternum
# -- Loop through files and concatenate into single dataframe --
for f in filelist[1:]:
if os.stat(runpath+'/'+f).st_size > 0:
newframe = pd.read_table(runpath+'/'+f,sep='\s+',
header=None)
iternum = int(f.split('.')[-1]) # returns integer
if len(parnames) > 0: newframe.columns=parnames
newframe['iteration'] = iternum
# - check for nans and drop them -
newframe = remove_nans(newframe,filename=f)
datatable = pd.concat([datatable,newframe],
ignore_index=True)
if iternum%500 == 0:
print 'Read iteration '+str(iternum)
else:
print 'Warning: '+f+' is missing or empty. Skipping.'
# -- Write to file --
if save == True: datatable.to_csv(runpath+'/'+filetype+'_merged.txt',
sep=sep)
return datatable
#----------------------------------------------------------
def calculate_prior(minval,maxval):
"""Given endpoints of a range, returns the center and stepsize."""
width = maxval-minval
stepsize = width/2.0
center = minval+stepsize
return center,stepsize
#----------------------------------------------------------
def fake_deconvolution(df,suffix='99999',runpath='../'):
"""
Write a deconvolution file from a dataframe
Author: Kari A. Frank
Date: April 27, 2016
Input:
df -- dataframe
suffix -- string to attach to end of file names
runpath -- Path to xmc run directory, containing the
parameters.txt file
Output:
- writes a text file with blob parameters formatted as a
deconvolution file, and corresponding empty sigma, statistic,
changed, and mean files.
- returns the new dataframe
Usage Notes:
- overwrites the files if they already exist
- requires a parameters.txt file containing the deconvolution file
column names in the correct order
"""
#--read parameters.txt--
pnames = read_parnames(runpath)
#--create dataframe with only parameters.txt columns, in correct order--
dfout = df[pnames].copy()
#--check if file exists--
clobber = 'y'
if os.path.isfile('deconvolution.'+suffix):
clobber = raw_input("File deconvolution."+suffix+" already exists. Overwrite file? (y/n) ")
if clobber in ['y','yes','Yes','Y']:
clobber = True
else:
clobber = False
#--write dataframe to file--
if clobber:
dfout.to_csv('deconvolution.'+suffix,sep=' ',header=False,
index=False)
#--create auxiliary (empty) files--
open('statistic.'+suffix,'w').close()
open('sigma.'+suffix,'w').close()
open('mean.'+suffix,'w').close()
open('changed.'+suffix,'w').close()
return dfout
#----------------------------------------------------------
def read_spectra(runpath='../',itmin=1,itmax=None,logbins=False,
legacy=False,
average=False,bins=0.03,datarange=None,oversim='auto'):
"""
Read xmc spectrum files into histograms.
Author: Kari A. Frank
Date: May 3, 2017
Input:
runpath (string) : the relative path to xmc run folder, which
contains the spectrum.* files. alternatively,
can pass a dataframe with the x and y values, e.g.
as would be read from a text file written by
xspec with the wd command.
itmin/itmax (int) : minimum and/or maximum iterations included in the
averaging of the model spectra. corresponds to the
spectrum* file names, e.g. itmax=3 will average the
files spectrum_1.fits, spectrum_2.fits, and
spectrum_3.fits. default is all available spectra.
bins: optionally specify the number of bins (int), binsize
(float), or an array containing the bin edge values.
can also accept any of the strings recognized by
np.histogram() for special calculation of bin sizes.
- 'auto' (but don't use if using weights)
- 'fd' (Freedman Diaconis Estimator)
- 'doane'
- 'scott'
- 'rice'
- 'sturges'
- 'sqrt'
default is to create equally spaced bins of width 0.03,
(30 eV). note that xmc typically uses a binsize of 0.015.
average (bool) : if True, will return the average of the spectra
between itmin and itmax, as the last tuple in the
returned list.
datarange () : passed directly to make_histogram()
oversim (numerical scalar or 'auto') : must specify the oversim value
to properly scale the model spectra.
- 'auto' (or any string): will read in the first
statistic.* file it
finds in runpath/ and get the oversim from there.
- can also explicitly pass the oversim value
legacy (bool) : assume old-style spectrum file names, i.e.
spectrum_iter/100.fits
Output:
- returns a tuple containing the histogram information,
(y,yerrors,yedges) for each iteration read, in a list of tuples
Usage Notes:
- to read in a single spectrum set itmin=itmax=iteration number
- to read in the data spectrum, set itmin=0 and itmax=0
"""
#----Import Modules----
import os,re
import astropy.io.fits as fits
from file_utilities import ls_to_list,parse_file_line
from wrangle import make_histogram
#----Set defaults----
# check for MPI file names (if xmc was run with mpi)
if os.path.isfile(runpath+'/spectrum0_0.fits'):
specname = 'spectrum0_'
else:
specname = 'spectrum_'
# get oversim
if isinstance(oversim,str):
oversim = float(parse_file_line(runpath+ls_to_list(runpath,'statistic.*')[0])[2])
# set file name type
if legacy is True:
itmin = itmin/100
if itmax is not None: itmax = itmax/100
if itmax is None:
# itmax = len(ls_to_list(runpath,'spectrum*')) - 1
itmax = int(re.search(r'_(.*)\.fits',
ls_to_list(runpath,'-tr spectrum*')[-1]).group(1))
#----Read in first model spectrum----
foundspec = False
sm = itmin
hists = []
print itmin,itmax
while (sm<=itmax and foundspec is False):
specfile = runpath+'/'+specname+str(sm)+'.fits'
print specfile
if os.path.isfile(specfile):
table = fits.getdata(specfile,0)
wave = table.field('wave')
wave_avg = table.field('wave')
iters_avg = np.ones_like(wave)
iters_avg.fill(sm)
foundspec = True
#--convert pd.Series--
if np.max(wave) < 15.0:
xlabel = 'Energy (keV)'
else:
xlabel = 'Wavelength (Angstroms)'
if sm == 0:
wave = pd.Series(wave.byteswap().newbyteorder(),
name=xlabel)
else:
wave = pd.Series(wave+0,name=xlabel)
#--Create and Save Histogram--
# y units are counts per bin
if isinstance(bins,float): # assume binsize, convert
# to number bins
nbins = np.ceil((np.max(wave.values)-
np.min(wave.values))/bins)
y,yerrors,yedges = \
make_histogram(wave,bins=nbins,
logbins=logbins,
datarange=datarange,
density=False,iterations=None)
hists = hists + [(y,yerrors,yedges)]
else:
# print ("Warning: "+specfile+" not found. Skipping +"
# "to next spectrum.")
sm = sm+1
if foundspec is False:
#print sm
print "ERROR: no spectrum files found in range."
# print "WARNING: No spectrum files found in range. Plotting data spectrum only."
#----Loop over remaining spectra----
for s in xrange(sm+1,itmax+1):
specfile = runpath+'/'+specname+str(s)+'.fits'
if os.path.isfile(specfile):
table = fits.getdata(specfile,0)
wave = table.field('wave')
iters = np.ones_like(wave)
iters.fill(s) #assign 'iteration' number
wave_avg = np.hstack((wave_avg,wave))
iters_avg = np.hstack((iters_avg,iters))
#--convert pd.Series--
if np.max(wave) < 15.0:
xlabel = 'Energy (keV)'
else:
xlabel = 'Wavelength (Angstroms)'
wave = pd.Series(wave+0,name=xlabel)
#--Create and Save Histogram--
# y units are counts per bin
if isinstance(bins,float): # assume binsize, convert
# to number bins
nbins = np.ceil((np.max(wave.values)-
np.min(wave.values))/bins)
y,yerrors,yedges = \
make_histogram(wave,bins=nbins,
logbins=logbins,
datarange=datarange,
density=False,iterations=None)
hists = hists + [(y/oversim,yerrors,yedges)]
# else:
# print "Warning: "+specfile+" does not exist. Skipping."
#----Save average----
if average is True and foundspec is True:
# --convert average to pd.Series--
wave_avg = pd.Series(wave_avg+0,name=xlabel)
iters_avg = pd.Series(iters_avg+0,name='iteration')
#--Create and Save Histogram--
# y units are counts per bin
if isinstance(bins,float): # assume binsize, convert
# to number bins
nbins = np.ceil((np.max(wave_avg.values)-
np.min(wave_avg.values))/bins)
y,yerrors,yedges = make_histogram(wave_avg,bins=nbins,
logbins=logbins,
datarange=datarange,
density=False,
iterations=iters_avg)
#-scale by number of iterations-
nspec = len(np.unique(iters_avg))
print 'nspec = ',nspec
y = y/float(nspec)/oversim
yerrors = yerrors/float(nspec)/oversim
#-save-
hists = hists + [(y,yerrors,yedges)]
return hists